Spaces:

Msk7000
/

Image_Clf_App_Implementation_Comparison

Running

File size: 10,343 Bytes

a60082f

"""
Image Classification Demo — 2015 vs 2025 Implementation Comparison
画像分類デモアプリ — 2015 vs 2025 実装比較
====================================================
Compares the same feature (image → category prediction) across two generations.
同じ機能（画像 → カテゴリ予測）を 2 世代の実装で並べて表示する。

Inference is handled by the 2025 implementation (HuggingFace ViT).
推論は 2025 実装（HuggingFace ViT）が担い、
The 2015 implementation (Theano CNN) is shown as reference code.
2015 実装（Theano CNN）は実装コードを参照表示する。

Usage / 起動方法:
    python app.py
"""

import textwrap

import gradio as gr
from model_2025 import classify as classify_2025

# ── Code snippets for display / 表示用コードスニペット ─────────────────────────

CODE_2015 = textwrap.dedent("""\
    # 2015 Implementation — Theano + NumPy  (excerpt, ~130 lines)
    # 2015 実装 — Theano + NumPy（抜粋・約 130 行）

    # ❶ Manually Initialize the Weights
    #    重みを手動で初期化
    W0 = theano.shared(np.random.normal(0, 0.01, (32,3,5,5)), 'W0')
    W1 = theano.shared(np.random.normal(0, 0.01, (64,32,5,5)), 'W1')
    W2 = theano.shared(np.random.normal(0, 0.01, (1600,512)), 'W2')
    W3 = theano.shared(np.random.normal(0, 0.01, (512,10)),  'W3')
    # ... b0, b1, b2, b3 defined in the same way / 同様に定義 ...

    # ❷ Hand-write the Symbolic Computation Graph
    #    シンボルグラフを手書き
    x    = T.tensor4('x')
    conv0 = T.tanh(pool.pool_2d(
                conv2d(x, W0, filter_shape=(32,3,5,5))
                + b0.dimshuffle('x',0,'x','x'),
                ws=(2,2), ignore_border=True))
    conv1 = T.tanh(pool.pool_2d(
                conv2d(conv0, W1, filter_shape=(64,32,5,5))
                + b1.dimshuffle('x',0,'x','x'),
                ws=(2,2), ignore_border=True))
    flat  = conv1.flatten(2)
    fc    = T.tanh(T.dot(flat, W2) + b2)
    out   = T.nnet.softmax(T.dot(fc, W3) + b3)

    # ❸ Manually Define Loss, Gradients, and SGD Update Rules
    #    損失・勾配・SGD 更新則を手動定義
    loss    = -T.mean(T.log(out)[T.arange(y.shape[0]), y])
    grads   = T.grad(loss, [W0,b0,W1,b1,W2,b2,W3,b3])
    updates = [(p, p - 0.01*g) for p, g in zip(params, grads)]

    # ❹ Compile Theano Functions  (takes tens of seconds)
    #    Theano 関数をコンパイル（数十秒かかる）
    train_fn = theano.function([x, y], loss, updates=updates)
    pred_fn  = theano.function([x], T.argmax(out, axis=1))

    # ❺ Manually Implement Preprocessing
    #    前処理を手動実装
    def preprocess(path):
        img = Image.open(path).convert('RGB').resize((32,32))
        arr = (np.array(img)/255.0 - MEAN) / STD
        return arr.transpose(2,0,1)[np.newaxis]

    # ❻ Manually Implement the Training Loop
    #    学習ループを手動実装
    for epoch in range(200):
        for batch in range(n // 50):
            train_fn(X[batch], y[batch])

    # ❼ Run Inference / 推論
    idx = pred_fn(preprocess('cat.jpg'))[0]
    return LABELS[idx]
""")

CODE_2025 = textwrap.dedent("""\
    # 2025 Implementation — HuggingFace Transformers  (just 5 lines)
    # 2025 実装 — HuggingFace Transformers（実質 5 行）

    from transformers import pipeline

    # ❶ Load a Pre-trained Model
    #    事前学習済みモデルをロード
    classifier = pipeline(
        "image-classification",
        model="google/vit-base-patch16-224",
    )

    # ❷ Run Inference  (preprocessing & postprocessing are automatic)
    #    推論（前処理・後処理すべて自動）
    result = classifier("cat.jpg", top_k=5)
    # → [{'label': 'tabby cat', 'score': 0.923}, ...]
""")

# ── Comparison table / 比較表 ────────────────────────────────────────────────

COMPARISON_MD = """\
| Item<br><small style="color:#999">項目</small> | 2015 (Theano) | 2025 (HuggingFace) |
|---|---|---|
| **Lines of code**<br><small style="color:#999">実装行数</small> | ~130 lines | 5 lines |
| **Model**<br><small style="color:#999">モデル</small> | Hand-written CNN<br><small style="color:#999">手書き CNN</small> | ViT-Base (pre-trained)<br><small style="color:#999">ViT-Base（事前学習済）</small> |
| **Preprocessing**<br><small style="color:#999">前処理</small> | Manual<br><small style="color:#999">手動実装</small> | Automatic<br><small style="color:#999">自動</small> |
| **Training**<br><small style="color:#999">学習</small> | SGD written by hand<br><small style="color:#999">SGD 手動記述</small> | Not required (fine-tuning is separate)<br><small style="color:#999">不要（Fine-tuning は別途）</small> |
| **Accuracy (approx.)**<br><small style="color:#999">精度目安</small> | ~70 % (CIFAR-10) | ~81 % (ImageNet) |
| **Theano compile step**<br><small style="color:#999">コンパイル</small> | Tens of seconds<br><small style="color:#999">数十秒</small> | Not required<br><small style="color:#999">不要</small> |
"""

# ── Inference function / 推論関数 ────────────────────────────────────────────

def run_inference(image):
    """Classify the uploaded image with ViT and return top-5 scores.
    アップロード画像を ViT で分類し、スコア上位 5 件を返す。"""
    if image is None:
        return {}, CODE_2015, CODE_2025

    results = classify_2025(image)
    label_scores = {r["label"]: float(r["score"]) for r in results}
    return label_scores, CODE_2015, CODE_2025


# ── UI / UI 定義 ─────────────────────────────────────────────────────────────

CSS = """
.code-2015 textarea { border-left: 3px solid #888780 !important; }
.code-2025 textarea { border-left: 3px solid #1D9E75 !important; }
.bilingual-label .label-wrap span {
    display: block;
}
"""

def _bi(en, ja):
    """Return bilingual Markdown: English normal, Japanese small gray below."""
    return f"{en}<br><small style='color:#999'>{ja}</small>"


with gr.Blocks(
    title="Image Classification: 2015 vs 2025",
    css=CSS,
    theme=gr.themes.Default(
        font=["BIZ UDPGothic", "Noto Sans JP", "sans-serif"],
        primary_hue=gr.themes.colors.emerald,
    ),
) as demo:

    gr.Markdown(
        """
        # Image Classification Demo — 2015 vs 2025
        <small style="color:#999">画像分類デモ — 2015 vs 2025 実装比較</small>

        **The same feature (image → category prediction) compared across two generations of implementation.**
        <br><small style="color:#999">同じ機能（画像 → カテゴリ予測）を 2 世代の実装で比較する。</small>

        Inference is handled by the 2025 implementation (ViT).
        <br><small style="color:#999">推論は 2025 実装（ViT）が担います。</small>
        """
    )

    with gr.Row():
        # ── Left column: upload + result ──────────────────────────────────
        with gr.Column(scale=1):
            img_input = gr.Image(
                type="pil",
                label="Upload an Image / 画像をアップロード",
                height=280,
            )
            run_btn = gr.Button(
                "▶  Run Classification / 分類を実行",
                variant="primary",
            )
            results_output = gr.Label(
                num_top_classes=5,
                label="Prediction Results (2025 implementation) / 予測結果（2025 実装）",
            )

        # ── Right column: code comparison ─────────────────────────────────
        with gr.Column(scale=2):
            gr.Markdown(
                """
                ### Code Implementation Comparison
                <small style="color:#999">実装コードの比較</small>

                > Difference in lines of code required to implement the same inference feature.
                > <small style="color:#999">同じ推論機能を実装するのに必要なコード量の差</small>
                """
            )
            with gr.Row():
                with gr.Column():
                    gr.Markdown(
                        "**🕰️ 2015 Implementation — Theano + NumPy (~130 lines)**"
                        "<br><small style='color:#999'>2015 実装 — Theano + NumPy（約 130 行）</small>"
                    )
                    code_2015_box = gr.Code(
                        value=CODE_2015,
                        language="python",
                        label="",
                        lines=30,
                        interactive=False,
                        elem_classes=["code-2015"],
                    )
                with gr.Column():
                    gr.Markdown(
                        "**✅ 2025 Implementation — HuggingFace Transformers (5 lines)**"
                        "<br><small style='color:#999'>2025 実装 — HuggingFace Transformers（5 行）</small>"
                    )
                    code_2025_box = gr.Code(
                        value=CODE_2025,
                        language="python",
                        label="",
                        lines=30,
                        interactive=False,
                        elem_classes=["code-2025"],
                    )

    gr.Markdown("---")
    gr.Markdown(
        "### Implementation Comparison Summary\n"
        "<small style='color:#999'>実装比較サマリー</small>"
    )
    gr.Markdown(COMPARISON_MD)

    # Event binding / イベントバインド
    run_btn.click(
        fn=run_inference,
        inputs=[img_input],
        outputs=[results_output, code_2015_box, code_2025_box],
    )
    img_input.change(
        fn=run_inference,
        inputs=[img_input],
        outputs=[results_output, code_2015_box, code_2025_box],
    )

if __name__ == "__main__":
    demo.launch()