| """ |
| Image Classification Demo โ 2015 vs 2025 Implementation Comparison |
| ็ปๅๅ้กใใขใขใใช โ 2015 vs 2025 ๅฎ่ฃ
ๆฏ่ผ |
| ==================================================== |
| Compares the same feature (image โ category prediction) across two generations. |
| ๅใๆฉ่ฝ๏ผ็ปๅ โ ใซใใดใชไบๆธฌ๏ผใ 2 ไธไปฃใฎๅฎ่ฃ
ใงไธฆในใฆ่กจ็คบใใใ |
| |
| Inference is handled by the 2025 implementation (HuggingFace ViT). |
| ๆจ่ซใฏ 2025 ๅฎ่ฃ
๏ผHuggingFace ViT๏ผใๆ
ใใ |
| The 2015 implementation (Theano CNN) is shown as reference code. |
| 2015 ๅฎ่ฃ
๏ผTheano CNN๏ผใฏๅฎ่ฃ
ใณใผใใๅ็
ง่กจ็คบใใใ |
| |
| Usage / ่ตทๅๆนๆณ: |
| python app.py |
| """ |
|
|
| import textwrap |
|
|
| import gradio as gr |
| from model_2025 import classify as classify_2025 |
|
|
| |
|
|
| CODE_2015 = textwrap.dedent("""\ |
| # 2015 Implementation โ Theano + NumPy (excerpt, ~130 lines) |
| # 2015 ๅฎ่ฃ
โ Theano + NumPy๏ผๆ็ฒใป็ด 130 ่ก๏ผ |
| |
| # โถ Manually Initialize the Weights |
| # ้ใฟใๆๅใงๅๆๅ |
| W0 = theano.shared(np.random.normal(0, 0.01, (32,3,5,5)), 'W0') |
| W1 = theano.shared(np.random.normal(0, 0.01, (64,32,5,5)), 'W1') |
| W2 = theano.shared(np.random.normal(0, 0.01, (1600,512)), 'W2') |
| W3 = theano.shared(np.random.normal(0, 0.01, (512,10)), 'W3') |
| # ... b0, b1, b2, b3 defined in the same way / ๅๆงใซๅฎ็พฉ ... |
| |
| # โท Hand-write the Symbolic Computation Graph |
| # ใทใณใใซใฐใฉใใๆๆธใ |
| x = T.tensor4('x') |
| conv0 = T.tanh(pool.pool_2d( |
| conv2d(x, W0, filter_shape=(32,3,5,5)) |
| + b0.dimshuffle('x',0,'x','x'), |
| ws=(2,2), ignore_border=True)) |
| conv1 = T.tanh(pool.pool_2d( |
| conv2d(conv0, W1, filter_shape=(64,32,5,5)) |
| + b1.dimshuffle('x',0,'x','x'), |
| ws=(2,2), ignore_border=True)) |
| flat = conv1.flatten(2) |
| fc = T.tanh(T.dot(flat, W2) + b2) |
| out = T.nnet.softmax(T.dot(fc, W3) + b3) |
| |
| # โธ Manually Define Loss, Gradients, and SGD Update Rules |
| # ๆๅคฑใปๅพ้
ใปSGD ๆดๆฐๅใๆๅๅฎ็พฉ |
| loss = -T.mean(T.log(out)[T.arange(y.shape[0]), y]) |
| grads = T.grad(loss, [W0,b0,W1,b1,W2,b2,W3,b3]) |
| updates = [(p, p - 0.01*g) for p, g in zip(params, grads)] |
| |
| # โน Compile Theano Functions (takes tens of seconds) |
| # Theano ้ขๆฐใใณใณใใคใซ๏ผๆฐๅ็งใใใ๏ผ |
| train_fn = theano.function([x, y], loss, updates=updates) |
| pred_fn = theano.function([x], T.argmax(out, axis=1)) |
| |
| # โบ Manually Implement Preprocessing |
| # ๅๅฆ็ใๆๅๅฎ่ฃ
|
| def preprocess(path): |
| img = Image.open(path).convert('RGB').resize((32,32)) |
| arr = (np.array(img)/255.0 - MEAN) / STD |
| return arr.transpose(2,0,1)[np.newaxis] |
| |
| # โป Manually Implement the Training Loop |
| # ๅญฆ็ฟใซใผใใๆๅๅฎ่ฃ
|
| for epoch in range(200): |
| for batch in range(n // 50): |
| train_fn(X[batch], y[batch]) |
| |
| # โผ Run Inference / ๆจ่ซ |
| idx = pred_fn(preprocess('cat.jpg'))[0] |
| return LABELS[idx] |
| """) |
|
|
| CODE_2025 = textwrap.dedent("""\ |
| # 2025 Implementation โ HuggingFace Transformers (just 5 lines) |
| # 2025 ๅฎ่ฃ
โ HuggingFace Transformers๏ผๅฎ่ณช 5 ่ก๏ผ |
| |
| from transformers import pipeline |
| |
| # โถ Load a Pre-trained Model |
| # ไบๅๅญฆ็ฟๆธใฟใขใใซใใญใผใ |
| classifier = pipeline( |
| "image-classification", |
| model="google/vit-base-patch16-224", |
| ) |
| |
| # โท Run Inference (preprocessing & postprocessing are automatic) |
| # ๆจ่ซ๏ผๅๅฆ็ใปๅพๅฆ็ใในใฆ่ชๅ๏ผ |
| result = classifier("cat.jpg", top_k=5) |
| # โ [{'label': 'tabby cat', 'score': 0.923}, ...] |
| """) |
|
|
| |
|
|
| COMPARISON_MD = """\ |
| | Item<br><small style="color:#999">้
็ฎ</small> | 2015 (Theano) | 2025 (HuggingFace) | |
| |---|---|---| |
| | **Lines of code**<br><small style="color:#999">ๅฎ่ฃ
่กๆฐ</small> | ~130 lines | 5 lines | |
| | **Model**<br><small style="color:#999">ใขใใซ</small> | Hand-written CNN<br><small style="color:#999">ๆๆธใ CNN</small> | ViT-Base (pre-trained)<br><small style="color:#999">ViT-Base๏ผไบๅๅญฆ็ฟๆธ๏ผ</small> | |
| | **Preprocessing**<br><small style="color:#999">ๅๅฆ็</small> | Manual<br><small style="color:#999">ๆๅๅฎ่ฃ
</small> | Automatic<br><small style="color:#999">่ชๅ</small> | |
| | **Training**<br><small style="color:#999">ๅญฆ็ฟ</small> | SGD written by hand<br><small style="color:#999">SGD ๆๅ่จ่ฟฐ</small> | Not required (fine-tuning is separate)<br><small style="color:#999">ไธ่ฆ๏ผFine-tuning ใฏๅฅ้๏ผ</small> | |
| | **Accuracy (approx.)**<br><small style="color:#999">็ฒพๅบฆ็ฎๅฎ</small> | ~70 % (CIFAR-10) | ~81 % (ImageNet) | |
| | **Theano compile step**<br><small style="color:#999">ใณใณใใคใซ</small> | Tens of seconds<br><small style="color:#999">ๆฐๅ็ง</small> | Not required<br><small style="color:#999">ไธ่ฆ</small> | |
| """ |
|
|
| |
|
|
| def run_inference(image): |
| """Classify the uploaded image with ViT and return top-5 scores. |
| ใขใใใญใผใ็ปๅใ ViT ใงๅ้กใใในใณใขไธไฝ 5 ไปถใ่ฟใใ""" |
| if image is None: |
| return {}, CODE_2015, CODE_2025 |
|
|
| results = classify_2025(image) |
| label_scores = {r["label"]: float(r["score"]) for r in results} |
| return label_scores, CODE_2015, CODE_2025 |
|
|
|
|
| |
|
|
| CSS = """ |
| .code-2015 textarea { border-left: 3px solid #888780 !important; } |
| .code-2025 textarea { border-left: 3px solid #1D9E75 !important; } |
| .bilingual-label .label-wrap span { |
| display: block; |
| } |
| """ |
|
|
| def _bi(en, ja): |
| """Return bilingual Markdown: English normal, Japanese small gray below.""" |
| return f"{en}<br><small style='color:#999'>{ja}</small>" |
|
|
|
|
| with gr.Blocks( |
| title="Image Classification: 2015 vs 2025", |
| css=CSS, |
| theme=gr.themes.Default( |
| font=["BIZ UDPGothic", "Noto Sans JP", "sans-serif"], |
| primary_hue=gr.themes.colors.emerald, |
| ), |
| ) as demo: |
|
|
| gr.Markdown( |
| """ |
| # Image Classification Demo โ 2015 vs 2025 |
| <small style="color:#999">็ปๅๅ้กใใข โ 2015 vs 2025 ๅฎ่ฃ
ๆฏ่ผ</small> |
| |
| **The same feature (image โ category prediction) compared across two generations of implementation.** |
| <br><small style="color:#999">ๅใๆฉ่ฝ๏ผ็ปๅ โ ใซใใดใชไบๆธฌ๏ผใ 2 ไธไปฃใฎๅฎ่ฃ
ใงๆฏ่ผใใใ</small> |
| |
| Inference is handled by the 2025 implementation (ViT). |
| <br><small style="color:#999">ๆจ่ซใฏ 2025 ๅฎ่ฃ
๏ผViT๏ผใๆ
ใใพใใ</small> |
| """ |
| ) |
|
|
| with gr.Row(): |
| |
| with gr.Column(scale=1): |
| img_input = gr.Image( |
| type="pil", |
| label="Upload an Image / ็ปๅใใขใใใญใผใ", |
| height=280, |
| ) |
| run_btn = gr.Button( |
| "โถ Run Classification / ๅ้กใๅฎ่ก", |
| variant="primary", |
| ) |
| results_output = gr.Label( |
| num_top_classes=5, |
| label="Prediction Results (2025 implementation) / ไบๆธฌ็ตๆ๏ผ2025 ๅฎ่ฃ
๏ผ", |
| ) |
|
|
| |
| with gr.Column(scale=2): |
| gr.Markdown( |
| """ |
| ### Code Implementation Comparison |
| <small style="color:#999">ๅฎ่ฃ
ใณใผใใฎๆฏ่ผ</small> |
| |
| > Difference in lines of code required to implement the same inference feature. |
| > <small style="color:#999">ๅใๆจ่ซๆฉ่ฝใๅฎ่ฃ
ใใใฎใซๅฟ
่ฆใชใณใผใ้ใฎๅทฎ</small> |
| """ |
| ) |
| with gr.Row(): |
| with gr.Column(): |
| gr.Markdown( |
| "**๐ฐ๏ธ 2015 Implementation โ Theano + NumPy (~130 lines)**" |
| "<br><small style='color:#999'>2015 ๅฎ่ฃ
โ Theano + NumPy๏ผ็ด 130 ่ก๏ผ</small>" |
| ) |
| code_2015_box = gr.Code( |
| value=CODE_2015, |
| language="python", |
| label="", |
| lines=30, |
| interactive=False, |
| elem_classes=["code-2015"], |
| ) |
| with gr.Column(): |
| gr.Markdown( |
| "**โ
2025 Implementation โ HuggingFace Transformers (5 lines)**" |
| "<br><small style='color:#999'>2025 ๅฎ่ฃ
โ HuggingFace Transformers๏ผ5 ่ก๏ผ</small>" |
| ) |
| code_2025_box = gr.Code( |
| value=CODE_2025, |
| language="python", |
| label="", |
| lines=30, |
| interactive=False, |
| elem_classes=["code-2025"], |
| ) |
|
|
| gr.Markdown("---") |
| gr.Markdown( |
| "### Implementation Comparison Summary\n" |
| "<small style='color:#999'>ๅฎ่ฃ
ๆฏ่ผใตใใชใผ</small>" |
| ) |
| gr.Markdown(COMPARISON_MD) |
|
|
| |
| run_btn.click( |
| fn=run_inference, |
| inputs=[img_input], |
| outputs=[results_output, code_2015_box, code_2025_box], |
| ) |
| img_input.change( |
| fn=run_inference, |
| inputs=[img_input], |
| outputs=[results_output, code_2015_box, code_2025_box], |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|