Msk7000's picture
Upload 6 files
a60082f verified
"""
Image Classification Demo โ€” 2015 vs 2025 Implementation Comparison
็”ปๅƒๅˆ†้กžใƒ‡ใƒขใ‚ขใƒ—ใƒช โ€” 2015 vs 2025 ๅฎŸ่ฃ…ๆฏ”่ผƒ
====================================================
Compares the same feature (image โ†’ category prediction) across two generations.
ๅŒใ˜ๆฉŸ่ƒฝ๏ผˆ็”ปๅƒ โ†’ ใ‚ซใƒ†ใ‚ดใƒชไบˆๆธฌ๏ผ‰ใ‚’ 2 ไธ–ไปฃใฎๅฎŸ่ฃ…ใงไธฆในใฆ่กจ็คบใ™ใ‚‹ใ€‚
Inference is handled by the 2025 implementation (HuggingFace ViT).
ๆŽจ่ซ–ใฏ 2025 ๅฎŸ่ฃ…๏ผˆHuggingFace ViT๏ผ‰ใŒๆ‹…ใ„ใ€
The 2015 implementation (Theano CNN) is shown as reference code.
2015 ๅฎŸ่ฃ…๏ผˆTheano CNN๏ผ‰ใฏๅฎŸ่ฃ…ใ‚ณใƒผใƒ‰ใ‚’ๅ‚็…ง่กจ็คบใ™ใ‚‹ใ€‚
Usage / ่ตทๅ‹•ๆ–นๆณ•:
python app.py
"""
import textwrap
import gradio as gr
from model_2025 import classify as classify_2025
# โ”€โ”€ Code snippets for display / ่กจ็คบ็”จใ‚ณใƒผใƒ‰ใ‚นใƒ‹ใƒšใƒƒใƒˆ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
CODE_2015 = textwrap.dedent("""\
# 2015 Implementation โ€” Theano + NumPy (excerpt, ~130 lines)
# 2015 ๅฎŸ่ฃ… โ€” Theano + NumPy๏ผˆๆŠœ็ฒ‹ใƒป็ด„ 130 ่กŒ๏ผ‰
# โถ Manually Initialize the Weights
# ้‡ใฟใ‚’ๆ‰‹ๅ‹•ใงๅˆๆœŸๅŒ–
W0 = theano.shared(np.random.normal(0, 0.01, (32,3,5,5)), 'W0')
W1 = theano.shared(np.random.normal(0, 0.01, (64,32,5,5)), 'W1')
W2 = theano.shared(np.random.normal(0, 0.01, (1600,512)), 'W2')
W3 = theano.shared(np.random.normal(0, 0.01, (512,10)), 'W3')
# ... b0, b1, b2, b3 defined in the same way / ๅŒๆง˜ใซๅฎš็พฉ ...
# โท Hand-write the Symbolic Computation Graph
# ใ‚ทใƒณใƒœใƒซใ‚ฐใƒฉใƒ•ใ‚’ๆ‰‹ๆ›ธใ
x = T.tensor4('x')
conv0 = T.tanh(pool.pool_2d(
conv2d(x, W0, filter_shape=(32,3,5,5))
+ b0.dimshuffle('x',0,'x','x'),
ws=(2,2), ignore_border=True))
conv1 = T.tanh(pool.pool_2d(
conv2d(conv0, W1, filter_shape=(64,32,5,5))
+ b1.dimshuffle('x',0,'x','x'),
ws=(2,2), ignore_border=True))
flat = conv1.flatten(2)
fc = T.tanh(T.dot(flat, W2) + b2)
out = T.nnet.softmax(T.dot(fc, W3) + b3)
# โธ Manually Define Loss, Gradients, and SGD Update Rules
# ๆๅคฑใƒปๅ‹พ้…ใƒปSGD ๆ›ดๆ–ฐๅ‰‡ใ‚’ๆ‰‹ๅ‹•ๅฎš็พฉ
loss = -T.mean(T.log(out)[T.arange(y.shape[0]), y])
grads = T.grad(loss, [W0,b0,W1,b1,W2,b2,W3,b3])
updates = [(p, p - 0.01*g) for p, g in zip(params, grads)]
# โน Compile Theano Functions (takes tens of seconds)
# Theano ้–ขๆ•ฐใ‚’ใ‚ณใƒณใƒ‘ใ‚คใƒซ๏ผˆๆ•ฐๅ็ง’ใ‹ใ‹ใ‚‹๏ผ‰
train_fn = theano.function([x, y], loss, updates=updates)
pred_fn = theano.function([x], T.argmax(out, axis=1))
# โบ Manually Implement Preprocessing
# ๅ‰ๅ‡ฆ็†ใ‚’ๆ‰‹ๅ‹•ๅฎŸ่ฃ…
def preprocess(path):
img = Image.open(path).convert('RGB').resize((32,32))
arr = (np.array(img)/255.0 - MEAN) / STD
return arr.transpose(2,0,1)[np.newaxis]
# โป Manually Implement the Training Loop
# ๅญฆ็ฟ’ใƒซใƒผใƒ—ใ‚’ๆ‰‹ๅ‹•ๅฎŸ่ฃ…
for epoch in range(200):
for batch in range(n // 50):
train_fn(X[batch], y[batch])
# โผ Run Inference / ๆŽจ่ซ–
idx = pred_fn(preprocess('cat.jpg'))[0]
return LABELS[idx]
""")
CODE_2025 = textwrap.dedent("""\
# 2025 Implementation โ€” HuggingFace Transformers (just 5 lines)
# 2025 ๅฎŸ่ฃ… โ€” HuggingFace Transformers๏ผˆๅฎŸ่ณช 5 ่กŒ๏ผ‰
from transformers import pipeline
# โถ Load a Pre-trained Model
# ไบ‹ๅ‰ๅญฆ็ฟ’ๆธˆใฟใƒขใƒ‡ใƒซใ‚’ใƒญใƒผใƒ‰
classifier = pipeline(
"image-classification",
model="google/vit-base-patch16-224",
)
# โท Run Inference (preprocessing & postprocessing are automatic)
# ๆŽจ่ซ–๏ผˆๅ‰ๅ‡ฆ็†ใƒปๅพŒๅ‡ฆ็†ใ™ในใฆ่‡ชๅ‹•๏ผ‰
result = classifier("cat.jpg", top_k=5)
# โ†’ [{'label': 'tabby cat', 'score': 0.923}, ...]
""")
# โ”€โ”€ Comparison table / ๆฏ”่ผƒ่กจ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
COMPARISON_MD = """\
| Item<br><small style="color:#999">้ …็›ฎ</small> | 2015 (Theano) | 2025 (HuggingFace) |
|---|---|---|
| **Lines of code**<br><small style="color:#999">ๅฎŸ่ฃ…่กŒๆ•ฐ</small> | ~130 lines | 5 lines |
| **Model**<br><small style="color:#999">ใƒขใƒ‡ใƒซ</small> | Hand-written CNN<br><small style="color:#999">ๆ‰‹ๆ›ธใ CNN</small> | ViT-Base (pre-trained)<br><small style="color:#999">ViT-Base๏ผˆไบ‹ๅ‰ๅญฆ็ฟ’ๆธˆ๏ผ‰</small> |
| **Preprocessing**<br><small style="color:#999">ๅ‰ๅ‡ฆ็†</small> | Manual<br><small style="color:#999">ๆ‰‹ๅ‹•ๅฎŸ่ฃ…</small> | Automatic<br><small style="color:#999">่‡ชๅ‹•</small> |
| **Training**<br><small style="color:#999">ๅญฆ็ฟ’</small> | SGD written by hand<br><small style="color:#999">SGD ๆ‰‹ๅ‹•่จ˜่ฟฐ</small> | Not required (fine-tuning is separate)<br><small style="color:#999">ไธ่ฆ๏ผˆFine-tuning ใฏๅˆฅ้€”๏ผ‰</small> |
| **Accuracy (approx.)**<br><small style="color:#999">็ฒพๅบฆ็›ฎๅฎ‰</small> | ~70 % (CIFAR-10) | ~81 % (ImageNet) |
| **Theano compile step**<br><small style="color:#999">ใ‚ณใƒณใƒ‘ใ‚คใƒซ</small> | Tens of seconds<br><small style="color:#999">ๆ•ฐๅ็ง’</small> | Not required<br><small style="color:#999">ไธ่ฆ</small> |
"""
# โ”€โ”€ Inference function / ๆŽจ่ซ–้–ขๆ•ฐ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
def run_inference(image):
"""Classify the uploaded image with ViT and return top-5 scores.
ใ‚ขใƒƒใƒ—ใƒญใƒผใƒ‰็”ปๅƒใ‚’ ViT ใงๅˆ†้กžใ—ใ€ใ‚นใ‚ณใ‚ขไธŠไฝ 5 ไปถใ‚’่ฟ”ใ™ใ€‚"""
if image is None:
return {}, CODE_2015, CODE_2025
results = classify_2025(image)
label_scores = {r["label"]: float(r["score"]) for r in results}
return label_scores, CODE_2015, CODE_2025
# โ”€โ”€ UI / UI ๅฎš็พฉ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
CSS = """
.code-2015 textarea { border-left: 3px solid #888780 !important; }
.code-2025 textarea { border-left: 3px solid #1D9E75 !important; }
.bilingual-label .label-wrap span {
display: block;
}
"""
def _bi(en, ja):
"""Return bilingual Markdown: English normal, Japanese small gray below."""
return f"{en}<br><small style='color:#999'>{ja}</small>"
with gr.Blocks(
title="Image Classification: 2015 vs 2025",
css=CSS,
theme=gr.themes.Default(
font=["BIZ UDPGothic", "Noto Sans JP", "sans-serif"],
primary_hue=gr.themes.colors.emerald,
),
) as demo:
gr.Markdown(
"""
# Image Classification Demo โ€” 2015 vs 2025
<small style="color:#999">็”ปๅƒๅˆ†้กžใƒ‡ใƒข โ€” 2015 vs 2025 ๅฎŸ่ฃ…ๆฏ”่ผƒ</small>
**The same feature (image โ†’ category prediction) compared across two generations of implementation.**
<br><small style="color:#999">ๅŒใ˜ๆฉŸ่ƒฝ๏ผˆ็”ปๅƒ โ†’ ใ‚ซใƒ†ใ‚ดใƒชไบˆๆธฌ๏ผ‰ใ‚’ 2 ไธ–ไปฃใฎๅฎŸ่ฃ…ใงๆฏ”่ผƒใ™ใ‚‹ใ€‚</small>
Inference is handled by the 2025 implementation (ViT).
<br><small style="color:#999">ๆŽจ่ซ–ใฏ 2025 ๅฎŸ่ฃ…๏ผˆViT๏ผ‰ใŒๆ‹…ใ„ใพใ™ใ€‚</small>
"""
)
with gr.Row():
# โ”€โ”€ Left column: upload + result โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
with gr.Column(scale=1):
img_input = gr.Image(
type="pil",
label="Upload an Image / ็”ปๅƒใ‚’ใ‚ขใƒƒใƒ—ใƒญใƒผใƒ‰",
height=280,
)
run_btn = gr.Button(
"โ–ถ Run Classification / ๅˆ†้กžใ‚’ๅฎŸ่กŒ",
variant="primary",
)
results_output = gr.Label(
num_top_classes=5,
label="Prediction Results (2025 implementation) / ไบˆๆธฌ็ตๆžœ๏ผˆ2025 ๅฎŸ่ฃ…๏ผ‰",
)
# โ”€โ”€ Right column: code comparison โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
with gr.Column(scale=2):
gr.Markdown(
"""
### Code Implementation Comparison
<small style="color:#999">ๅฎŸ่ฃ…ใ‚ณใƒผใƒ‰ใฎๆฏ”่ผƒ</small>
> Difference in lines of code required to implement the same inference feature.
> <small style="color:#999">ๅŒใ˜ๆŽจ่ซ–ๆฉŸ่ƒฝใ‚’ๅฎŸ่ฃ…ใ™ใ‚‹ใฎใซๅฟ…่ฆใชใ‚ณใƒผใƒ‰้‡ใฎๅทฎ</small>
"""
)
with gr.Row():
with gr.Column():
gr.Markdown(
"**๐Ÿ•ฐ๏ธ 2015 Implementation โ€” Theano + NumPy (~130 lines)**"
"<br><small style='color:#999'>2015 ๅฎŸ่ฃ… โ€” Theano + NumPy๏ผˆ็ด„ 130 ่กŒ๏ผ‰</small>"
)
code_2015_box = gr.Code(
value=CODE_2015,
language="python",
label="",
lines=30,
interactive=False,
elem_classes=["code-2015"],
)
with gr.Column():
gr.Markdown(
"**โœ… 2025 Implementation โ€” HuggingFace Transformers (5 lines)**"
"<br><small style='color:#999'>2025 ๅฎŸ่ฃ… โ€” HuggingFace Transformers๏ผˆ5 ่กŒ๏ผ‰</small>"
)
code_2025_box = gr.Code(
value=CODE_2025,
language="python",
label="",
lines=30,
interactive=False,
elem_classes=["code-2025"],
)
gr.Markdown("---")
gr.Markdown(
"### Implementation Comparison Summary\n"
"<small style='color:#999'>ๅฎŸ่ฃ…ๆฏ”่ผƒใ‚ตใƒžใƒชใƒผ</small>"
)
gr.Markdown(COMPARISON_MD)
# Event binding / ใ‚คใƒ™ใƒณใƒˆใƒใ‚คใƒณใƒ‰
run_btn.click(
fn=run_inference,
inputs=[img_input],
outputs=[results_output, code_2015_box, code_2025_box],
)
img_input.change(
fn=run_inference,
inputs=[img_input],
outputs=[results_output, code_2015_box, code_2025_box],
)
if __name__ == "__main__":
demo.launch()