Spaces:

Msk7000
/

Image_Clf_App_Implementation_Comparison

Running

App Files Files Community

Image_Clf_App_Implementation_Comparison / app.py

Msk7000

Upload 6 files

a60082f verified 4 days ago

raw

history blame contribute delete

10.3 kB

	"""
	Image Classification Demo — 2015 vs 2025 Implementation Comparison
	画像分類デモアプリ — 2015 vs 2025 実装比較
	====================================================
	Compares the same feature (image → category prediction) across two generations.
	同じ機能（画像 → カテゴリ予測）を 2 世代の実装で並べて表示する。

	Inference is handled by the 2025 implementation (HuggingFace ViT).
	推論は 2025 実装（HuggingFace ViT）が担い、
	The 2015 implementation (Theano CNN) is shown as reference code.
	2015 実装（Theano CNN）は実装コードを参照表示する。

	Usage / 起動方法:
	python app.py
	"""

	import textwrap

	import gradio as gr
	from model_2025 import classify as classify_2025

	# ── Code snippets for display / 表示用コードスニペット ─────────────────────────

	CODE_2015 = textwrap.dedent("""\
	# 2015 Implementation — Theano + NumPy (excerpt, ~130 lines)
	# 2015 実装 — Theano + NumPy（抜粋・約 130 行）

	# ❶ Manually Initialize the Weights
	# 重みを手動で初期化
	W0 = theano.shared(np.random.normal(0, 0.01, (32,3,5,5)), 'W0')
	W1 = theano.shared(np.random.normal(0, 0.01, (64,32,5,5)), 'W1')
	W2 = theano.shared(np.random.normal(0, 0.01, (1600,512)), 'W2')
	W3 = theano.shared(np.random.normal(0, 0.01, (512,10)), 'W3')
	# ... b0, b1, b2, b3 defined in the same way / 同様に定義 ...

	# ❷ Hand-write the Symbolic Computation Graph
	# シンボルグラフを手書き
	x = T.tensor4('x')
	conv0 = T.tanh(pool.pool_2d(
	conv2d(x, W0, filter_shape=(32,3,5,5))
	+ b0.dimshuffle('x',0,'x','x'),
	ws=(2,2), ignore_border=True))
	conv1 = T.tanh(pool.pool_2d(
	conv2d(conv0, W1, filter_shape=(64,32,5,5))
	+ b1.dimshuffle('x',0,'x','x'),
	ws=(2,2), ignore_border=True))
	flat = conv1.flatten(2)
	fc = T.tanh(T.dot(flat, W2) + b2)
	out = T.nnet.softmax(T.dot(fc, W3) + b3)

	# ❸ Manually Define Loss, Gradients, and SGD Update Rules
	# 損失・勾配・SGD 更新則を手動定義
	loss = -T.mean(T.log(out)[T.arange(y.shape[0]), y])
	grads = T.grad(loss, [W0,b0,W1,b1,W2,b2,W3,b3])
	updates = [(p, p - 0.01*g) for p, g in zip(params, grads)]

	# ❹ Compile Theano Functions (takes tens of seconds)
	# Theano 関数をコンパイル（数十秒かかる）
	train_fn = theano.function([x, y], loss, updates=updates)
	pred_fn = theano.function([x], T.argmax(out, axis=1))

	# ❺ Manually Implement Preprocessing
	# 前処理を手動実装
	def preprocess(path):
	img = Image.open(path).convert('RGB').resize((32,32))
	arr = (np.array(img)/255.0 - MEAN) / STD
	return arr.transpose(2,0,1)[np.newaxis]

	# ❻ Manually Implement the Training Loop
	# 学習ループを手動実装
	for epoch in range(200):
	for batch in range(n // 50):
	train_fn(X[batch], y[batch])

	# ❼ Run Inference / 推論
	idx = pred_fn(preprocess('cat.jpg'))[0]
	return LABELS[idx]
	""")

	CODE_2025 = textwrap.dedent("""\
	# 2025 Implementation — HuggingFace Transformers (just 5 lines)
	# 2025 実装 — HuggingFace Transformers（実質 5 行）

	from transformers import pipeline

	# ❶ Load a Pre-trained Model
	# 事前学習済みモデルをロード
	classifier = pipeline(
	"image-classification",
	model="google/vit-base-patch16-224",
	)

	# ❷ Run Inference (preprocessing & postprocessing are automatic)
	# 推論（前処理・後処理すべて自動）
	result = classifier("cat.jpg", top_k=5)
	# → [{'label': 'tabby cat', 'score': 0.923}, ...]
	""")

	# ── Comparison table / 比較表 ────────────────────────────────────────────────

	COMPARISON_MD = """\
	\| Item<br><small style="color:#999">項目</small> \| 2015 (Theano) \| 2025 (HuggingFace) \|
	\|---\|---\|---\|
	\| Lines of code<br><small style="color:#999">実装行数</small> \| ~130 lines \| 5 lines \|
	\| Model<br><small style="color:#999">モデル</small> \| Hand-written CNN<br><small style="color:#999">手書き CNN</small> \| ViT-Base (pre-trained)<br><small style="color:#999">ViT-Base（事前学習済）</small> \|
	\| Preprocessing<br><small style="color:#999">前処理</small> \| Manual<br><small style="color:#999">手動実装</small> \| Automatic<br><small style="color:#999">自動</small> \|
	\| Training<br><small style="color:#999">学習</small> \| SGD written by hand<br><small style="color:#999">SGD 手動記述</small> \| Not required (fine-tuning is separate)<br><small style="color:#999">不要（Fine-tuning は別途）</small> \|
	\| Accuracy (approx.)<br><small style="color:#999">精度目安</small> \| ~70 % (CIFAR-10) \| ~81 % (ImageNet) \|
	\| Theano compile step<br><small style="color:#999">コンパイル</small> \| Tens of seconds<br><small style="color:#999">数十秒</small> \| Not required<br><small style="color:#999">不要</small> \|
	"""

	# ── Inference function / 推論関数 ────────────────────────────────────────────

	def run_inference(image):
	"""Classify the uploaded image with ViT and return top-5 scores.
	アップロード画像を ViT で分類し、スコア上位 5 件を返す。"""
	if image is None:
	return {}, CODE_2015, CODE_2025

	results = classify_2025(image)
	label_scores = {r["label"]: float(r["score"]) for r in results}
	return label_scores, CODE_2015, CODE_2025


	# ── UI / UI 定義 ─────────────────────────────────────────────────────────────

	CSS = """
	.code-2015 textarea { border-left: 3px solid #888780 !important; }
	.code-2025 textarea { border-left: 3px solid #1D9E75 !important; }
	.bilingual-label .label-wrap span {
	display: block;
	}
	"""

	def _bi(en, ja):
	"""Return bilingual Markdown: English normal, Japanese small gray below."""
	return f"{en}<br><small style='color:#999'>{ja}</small>"


	with gr.Blocks(
	title="Image Classification: 2015 vs 2025",
	css=CSS,
	theme=gr.themes.Default(
	font=["BIZ UDPGothic", "Noto Sans JP", "sans-serif"],
	primary_hue=gr.themes.colors.emerald,
	),
	) as demo:

	gr.Markdown(
	"""
	# Image Classification Demo — 2015 vs 2025
	<small style="color:#999">画像分類デモ — 2015 vs 2025 実装比較</small>

	The same feature (image → category prediction) compared across two generations of implementation.
	<br><small style="color:#999">同じ機能（画像 → カテゴリ予測）を 2 世代の実装で比較する。</small>

	Inference is handled by the 2025 implementation (ViT).
	<br><small style="color:#999">推論は 2025 実装（ViT）が担います。</small>
	"""
	)

	with gr.Row():
	# ── Left column: upload + result ──────────────────────────────────
	with gr.Column(scale=1):
	img_input = gr.Image(
	type="pil",
	label="Upload an Image / 画像をアップロード",
	height=280,
	)
	run_btn = gr.Button(
	"▶ Run Classification / 分類を実行",
	variant="primary",
	)
	results_output = gr.Label(
	num_top_classes=5,
	label="Prediction Results (2025 implementation) / 予測結果（2025 実装）",
	)

	# ── Right column: code comparison ─────────────────────────────────
	with gr.Column(scale=2):
	gr.Markdown(
	"""
	### Code Implementation Comparison
	<small style="color:#999">実装コードの比較</small>

	> Difference in lines of code required to implement the same inference feature.
	> <small style="color:#999">同じ推論機能を実装するのに必要なコード量の差</small>
	"""
	)
	with gr.Row():
	with gr.Column():
	gr.Markdown(
	"🕰️ 2015 Implementation — Theano + NumPy (~130 lines)"
	"<br><small style='color:#999'>2015 実装 — Theano + NumPy（約 130 行）</small>"
	)
	code_2015_box = gr.Code(
	value=CODE_2015,
	language="python",
	label="",
	lines=30,
	interactive=False,
	elem_classes=["code-2015"],
	)
	with gr.Column():
	gr.Markdown(
	"✅ 2025 Implementation — HuggingFace Transformers (5 lines)"
	"<br><small style='color:#999'>2025 実装 — HuggingFace Transformers（5 行）</small>"
	)
	code_2025_box = gr.Code(
	value=CODE_2025,
	language="python",
	label="",
	lines=30,
	interactive=False,
	elem_classes=["code-2025"],
	)

	gr.Markdown("---")
	gr.Markdown(
	"### Implementation Comparison Summary\n"
	"<small style='color:#999'>実装比較サマリー</small>"
	)
	gr.Markdown(COMPARISON_MD)

	# Event binding / イベントバインド
	run_btn.click(
	fn=run_inference,
	inputs=[img_input],
	outputs=[results_output, code_2015_box, code_2025_box],
	)
	img_input.change(
	fn=run_inference,
	inputs=[img_input],
	outputs=[results_output, code_2015_box, code_2025_box],
	)

	if __name__ == "__main__":
	demo.launch()