""" Image Classification Demo — 2015 vs 2025 Implementation Comparison 画像分類デモアプリ — 2015 vs 2025 実装比較 ==================================================== Compares the same feature (image → category prediction) across two generations. 同じ機能(画像 → カテゴリ予測)を 2 世代の実装で並べて表示する。 Inference is handled by the 2025 implementation (HuggingFace ViT). 推論は 2025 実装(HuggingFace ViT)が担い、 The 2015 implementation (Theano CNN) is shown as reference code. 2015 実装(Theano CNN)は実装コードを参照表示する。 Usage / 起動方法: python app.py """ import textwrap import gradio as gr from model_2025 import classify as classify_2025 # ── Code snippets for display / 表示用コードスニペット ───────────────────────── CODE_2015 = textwrap.dedent("""\ # 2015 Implementation — Theano + NumPy (excerpt, ~130 lines) # 2015 実装 — Theano + NumPy(抜粋・約 130 行) # ❶ Manually Initialize the Weights # 重みを手動で初期化 W0 = theano.shared(np.random.normal(0, 0.01, (32,3,5,5)), 'W0') W1 = theano.shared(np.random.normal(0, 0.01, (64,32,5,5)), 'W1') W2 = theano.shared(np.random.normal(0, 0.01, (1600,512)), 'W2') W3 = theano.shared(np.random.normal(0, 0.01, (512,10)), 'W3') # ... b0, b1, b2, b3 defined in the same way / 同様に定義 ... # ❷ Hand-write the Symbolic Computation Graph # シンボルグラフを手書き x = T.tensor4('x') conv0 = T.tanh(pool.pool_2d( conv2d(x, W0, filter_shape=(32,3,5,5)) + b0.dimshuffle('x',0,'x','x'), ws=(2,2), ignore_border=True)) conv1 = T.tanh(pool.pool_2d( conv2d(conv0, W1, filter_shape=(64,32,5,5)) + b1.dimshuffle('x',0,'x','x'), ws=(2,2), ignore_border=True)) flat = conv1.flatten(2) fc = T.tanh(T.dot(flat, W2) + b2) out = T.nnet.softmax(T.dot(fc, W3) + b3) # ❸ Manually Define Loss, Gradients, and SGD Update Rules # 損失・勾配・SGD 更新則を手動定義 loss = -T.mean(T.log(out)[T.arange(y.shape[0]), y]) grads = T.grad(loss, [W0,b0,W1,b1,W2,b2,W3,b3]) updates = [(p, p - 0.01*g) for p, g in zip(params, grads)] # ❹ Compile Theano Functions (takes tens of seconds) # Theano 関数をコンパイル(数十秒かかる) train_fn = theano.function([x, y], loss, updates=updates) pred_fn = theano.function([x], T.argmax(out, axis=1)) # ❺ Manually Implement Preprocessing # 前処理を手動実装 def preprocess(path): img = Image.open(path).convert('RGB').resize((32,32)) arr = (np.array(img)/255.0 - MEAN) / STD return arr.transpose(2,0,1)[np.newaxis] # ❻ Manually Implement the Training Loop # 学習ループを手動実装 for epoch in range(200): for batch in range(n // 50): train_fn(X[batch], y[batch]) # ❼ Run Inference / 推論 idx = pred_fn(preprocess('cat.jpg'))[0] return LABELS[idx] """) CODE_2025 = textwrap.dedent("""\ # 2025 Implementation — HuggingFace Transformers (just 5 lines) # 2025 実装 — HuggingFace Transformers(実質 5 行) from transformers import pipeline # ❶ Load a Pre-trained Model # 事前学習済みモデルをロード classifier = pipeline( "image-classification", model="google/vit-base-patch16-224", ) # ❷ Run Inference (preprocessing & postprocessing are automatic) # 推論(前処理・後処理すべて自動) result = classifier("cat.jpg", top_k=5) # → [{'label': 'tabby cat', 'score': 0.923}, ...] """) # ── Comparison table / 比較表 ──────────────────────────────────────────────── COMPARISON_MD = """\ | Item
項目 | 2015 (Theano) | 2025 (HuggingFace) | |---|---|---| | **Lines of code**
実装行数 | ~130 lines | 5 lines | | **Model**
モデル | Hand-written CNN
手書き CNN | ViT-Base (pre-trained)
ViT-Base(事前学習済) | | **Preprocessing**
前処理 | Manual
手動実装 | Automatic
自動 | | **Training**
学習 | SGD written by hand
SGD 手動記述 | Not required (fine-tuning is separate)
不要(Fine-tuning は別途) | | **Accuracy (approx.)**
精度目安 | ~70 % (CIFAR-10) | ~81 % (ImageNet) | | **Theano compile step**
コンパイル | Tens of seconds
数十秒 | Not required
不要 | """ # ── Inference function / 推論関数 ──────────────────────────────────────────── def run_inference(image): """Classify the uploaded image with ViT and return top-5 scores. アップロード画像を ViT で分類し、スコア上位 5 件を返す。""" if image is None: return {}, CODE_2015, CODE_2025 results = classify_2025(image) label_scores = {r["label"]: float(r["score"]) for r in results} return label_scores, CODE_2015, CODE_2025 # ── UI / UI 定義 ───────────────────────────────────────────────────────────── CSS = """ .code-2015 textarea { border-left: 3px solid #888780 !important; } .code-2025 textarea { border-left: 3px solid #1D9E75 !important; } .bilingual-label .label-wrap span { display: block; } """ def _bi(en, ja): """Return bilingual Markdown: English normal, Japanese small gray below.""" return f"{en}
{ja}" with gr.Blocks( title="Image Classification: 2015 vs 2025", css=CSS, theme=gr.themes.Default( font=["BIZ UDPGothic", "Noto Sans JP", "sans-serif"], primary_hue=gr.themes.colors.emerald, ), ) as demo: gr.Markdown( """ # Image Classification Demo — 2015 vs 2025 画像分類デモ — 2015 vs 2025 実装比較 **The same feature (image → category prediction) compared across two generations of implementation.**
同じ機能(画像 → カテゴリ予測)を 2 世代の実装で比較する。 Inference is handled by the 2025 implementation (ViT).
推論は 2025 実装(ViT)が担います。 """ ) with gr.Row(): # ── Left column: upload + result ────────────────────────────────── with gr.Column(scale=1): img_input = gr.Image( type="pil", label="Upload an Image / 画像をアップロード", height=280, ) run_btn = gr.Button( "▶ Run Classification / 分類を実行", variant="primary", ) results_output = gr.Label( num_top_classes=5, label="Prediction Results (2025 implementation) / 予測結果(2025 実装)", ) # ── Right column: code comparison ───────────────────────────────── with gr.Column(scale=2): gr.Markdown( """ ### Code Implementation Comparison 実装コードの比較 > Difference in lines of code required to implement the same inference feature. > 同じ推論機能を実装するのに必要なコード量の差 """ ) with gr.Row(): with gr.Column(): gr.Markdown( "**🕰️ 2015 Implementation — Theano + NumPy (~130 lines)**" "
2015 実装 — Theano + NumPy(約 130 行)" ) code_2015_box = gr.Code( value=CODE_2015, language="python", label="", lines=30, interactive=False, elem_classes=["code-2015"], ) with gr.Column(): gr.Markdown( "**✅ 2025 Implementation — HuggingFace Transformers (5 lines)**" "
2025 実装 — HuggingFace Transformers(5 行)" ) code_2025_box = gr.Code( value=CODE_2025, language="python", label="", lines=30, interactive=False, elem_classes=["code-2025"], ) gr.Markdown("---") gr.Markdown( "### Implementation Comparison Summary\n" "実装比較サマリー" ) gr.Markdown(COMPARISON_MD) # Event binding / イベントバインド run_btn.click( fn=run_inference, inputs=[img_input], outputs=[results_output, code_2015_box, code_2025_box], ) img_input.change( fn=run_inference, inputs=[img_input], outputs=[results_output, code_2015_box, code_2025_box], ) if __name__ == "__main__": demo.launch()