"""
Image Classification Demo — 2015 vs 2025 Implementation Comparison
画像分類デモアプリ — 2015 vs 2025 実装比較
====================================================
Compares the same feature (image → category prediction) across two generations.
同じ機能(画像 → カテゴリ予測)を 2 世代の実装で並べて表示する。
Inference is handled by the 2025 implementation (HuggingFace ViT).
推論は 2025 実装(HuggingFace ViT)が担い、
The 2015 implementation (Theano CNN) is shown as reference code.
2015 実装(Theano CNN)は実装コードを参照表示する。
Usage / 起動方法:
python app.py
"""
import textwrap
import gradio as gr
from model_2025 import classify as classify_2025
# ── Code snippets for display / 表示用コードスニペット ─────────────────────────
CODE_2015 = textwrap.dedent("""\
# 2015 Implementation — Theano + NumPy (excerpt, ~130 lines)
# 2015 実装 — Theano + NumPy(抜粋・約 130 行)
# ❶ Manually Initialize the Weights
# 重みを手動で初期化
W0 = theano.shared(np.random.normal(0, 0.01, (32,3,5,5)), 'W0')
W1 = theano.shared(np.random.normal(0, 0.01, (64,32,5,5)), 'W1')
W2 = theano.shared(np.random.normal(0, 0.01, (1600,512)), 'W2')
W3 = theano.shared(np.random.normal(0, 0.01, (512,10)), 'W3')
# ... b0, b1, b2, b3 defined in the same way / 同様に定義 ...
# ❷ Hand-write the Symbolic Computation Graph
# シンボルグラフを手書き
x = T.tensor4('x')
conv0 = T.tanh(pool.pool_2d(
conv2d(x, W0, filter_shape=(32,3,5,5))
+ b0.dimshuffle('x',0,'x','x'),
ws=(2,2), ignore_border=True))
conv1 = T.tanh(pool.pool_2d(
conv2d(conv0, W1, filter_shape=(64,32,5,5))
+ b1.dimshuffle('x',0,'x','x'),
ws=(2,2), ignore_border=True))
flat = conv1.flatten(2)
fc = T.tanh(T.dot(flat, W2) + b2)
out = T.nnet.softmax(T.dot(fc, W3) + b3)
# ❸ Manually Define Loss, Gradients, and SGD Update Rules
# 損失・勾配・SGD 更新則を手動定義
loss = -T.mean(T.log(out)[T.arange(y.shape[0]), y])
grads = T.grad(loss, [W0,b0,W1,b1,W2,b2,W3,b3])
updates = [(p, p - 0.01*g) for p, g in zip(params, grads)]
# ❹ Compile Theano Functions (takes tens of seconds)
# Theano 関数をコンパイル(数十秒かかる)
train_fn = theano.function([x, y], loss, updates=updates)
pred_fn = theano.function([x], T.argmax(out, axis=1))
# ❺ Manually Implement Preprocessing
# 前処理を手動実装
def preprocess(path):
img = Image.open(path).convert('RGB').resize((32,32))
arr = (np.array(img)/255.0 - MEAN) / STD
return arr.transpose(2,0,1)[np.newaxis]
# ❻ Manually Implement the Training Loop
# 学習ループを手動実装
for epoch in range(200):
for batch in range(n // 50):
train_fn(X[batch], y[batch])
# ❼ Run Inference / 推論
idx = pred_fn(preprocess('cat.jpg'))[0]
return LABELS[idx]
""")
CODE_2025 = textwrap.dedent("""\
# 2025 Implementation — HuggingFace Transformers (just 5 lines)
# 2025 実装 — HuggingFace Transformers(実質 5 行)
from transformers import pipeline
# ❶ Load a Pre-trained Model
# 事前学習済みモデルをロード
classifier = pipeline(
"image-classification",
model="google/vit-base-patch16-224",
)
# ❷ Run Inference (preprocessing & postprocessing are automatic)
# 推論(前処理・後処理すべて自動)
result = classifier("cat.jpg", top_k=5)
# → [{'label': 'tabby cat', 'score': 0.923}, ...]
""")
# ── Comparison table / 比較表 ────────────────────────────────────────────────
COMPARISON_MD = """\
| Item
項目 | 2015 (Theano) | 2025 (HuggingFace) |
|---|---|---|
| **Lines of code**
実装行数 | ~130 lines | 5 lines |
| **Model**
モデル | Hand-written CNN
手書き CNN | ViT-Base (pre-trained)
ViT-Base(事前学習済) |
| **Preprocessing**
前処理 | Manual
手動実装 | Automatic
自動 |
| **Training**
学習 | SGD written by hand
SGD 手動記述 | Not required (fine-tuning is separate)
不要(Fine-tuning は別途) |
| **Accuracy (approx.)**
精度目安 | ~70 % (CIFAR-10) | ~81 % (ImageNet) |
| **Theano compile step**
コンパイル | Tens of seconds
数十秒 | Not required
不要 |
"""
# ── Inference function / 推論関数 ────────────────────────────────────────────
def run_inference(image):
"""Classify the uploaded image with ViT and return top-5 scores.
アップロード画像を ViT で分類し、スコア上位 5 件を返す。"""
if image is None:
return {}, CODE_2015, CODE_2025
results = classify_2025(image)
label_scores = {r["label"]: float(r["score"]) for r in results}
return label_scores, CODE_2015, CODE_2025
# ── UI / UI 定義 ─────────────────────────────────────────────────────────────
CSS = """
.code-2015 textarea { border-left: 3px solid #888780 !important; }
.code-2025 textarea { border-left: 3px solid #1D9E75 !important; }
.bilingual-label .label-wrap span {
display: block;
}
"""
def _bi(en, ja):
"""Return bilingual Markdown: English normal, Japanese small gray below."""
return f"{en}
{ja}"
with gr.Blocks(
title="Image Classification: 2015 vs 2025",
css=CSS,
theme=gr.themes.Default(
font=["BIZ UDPGothic", "Noto Sans JP", "sans-serif"],
primary_hue=gr.themes.colors.emerald,
),
) as demo:
gr.Markdown(
"""
# Image Classification Demo — 2015 vs 2025
画像分類デモ — 2015 vs 2025 実装比較
**The same feature (image → category prediction) compared across two generations of implementation.**
同じ機能(画像 → カテゴリ予測)を 2 世代の実装で比較する。
Inference is handled by the 2025 implementation (ViT).
推論は 2025 実装(ViT)が担います。
"""
)
with gr.Row():
# ── Left column: upload + result ──────────────────────────────────
with gr.Column(scale=1):
img_input = gr.Image(
type="pil",
label="Upload an Image / 画像をアップロード",
height=280,
)
run_btn = gr.Button(
"▶ Run Classification / 分類を実行",
variant="primary",
)
results_output = gr.Label(
num_top_classes=5,
label="Prediction Results (2025 implementation) / 予測結果(2025 実装)",
)
# ── Right column: code comparison ─────────────────────────────────
with gr.Column(scale=2):
gr.Markdown(
"""
### Code Implementation Comparison
実装コードの比較
> Difference in lines of code required to implement the same inference feature.
> 同じ推論機能を実装するのに必要なコード量の差
"""
)
with gr.Row():
with gr.Column():
gr.Markdown(
"**🕰️ 2015 Implementation — Theano + NumPy (~130 lines)**"
"
2015 実装 — Theano + NumPy(約 130 行)"
)
code_2015_box = gr.Code(
value=CODE_2015,
language="python",
label="",
lines=30,
interactive=False,
elem_classes=["code-2015"],
)
with gr.Column():
gr.Markdown(
"**✅ 2025 Implementation — HuggingFace Transformers (5 lines)**"
"
2025 実装 — HuggingFace Transformers(5 行)"
)
code_2025_box = gr.Code(
value=CODE_2025,
language="python",
label="",
lines=30,
interactive=False,
elem_classes=["code-2025"],
)
gr.Markdown("---")
gr.Markdown(
"### Implementation Comparison Summary\n"
"実装比較サマリー"
)
gr.Markdown(COMPARISON_MD)
# Event binding / イベントバインド
run_btn.click(
fn=run_inference,
inputs=[img_input],
outputs=[results_output, code_2015_box, code_2025_box],
)
img_input.change(
fn=run_inference,
inputs=[img_input],
outputs=[results_output, code_2015_box, code_2025_box],
)
if __name__ == "__main__":
demo.launch()