Spaces:

Msk7000
/

cnn_cat_convolution_dashboard

Sleeping

App Files Files Community

Msk7000 commited on Apr 6

Commit

cd5bd1f

verified ·

1 Parent(s): 9e086cd

Upload cnn_cat_convolution_dashboard.py

Browse files

Files changed (1) hide show

cnn_cat_convolution_dashboard.py +179 -64

cnn_cat_convolution_dashboard.py CHANGED Viewed

@@ -1,10 +1,10 @@
 # -*- coding: utf-8 -*-
 """
-Streamlit ダッシュボード版: CNN 畳み込み可視化教材（リファクタリング版）
 """
 from pathlib import Path
-from typing import Tuple, Optional, Dict
 import matplotlib.pyplot as plt
 import numpy as np
@@ -13,7 +13,7 @@ from matplotlib import patches
 from matplotlib.font_manager import FontProperties
 # -----------------------------
-# フォント設定
 # -----------------------------
 def get_japanese_font() -> Tuple[Optional[FontProperties], Optional[FontProperties]]:
     """プロジェクトルートにあるNotoSansJPフォントを読み込む"""
@@ -39,20 +39,25 @@ def set_jp_font(ax_or_text, is_bold: bool = False, size: int = 12):
             ax_or_text.set_fontsize(size)
 # -----------------------------
-# 画像生成・処理ロジック
 # -----------------------------
-def draw_polyline(img: np.ndarray, pts: list, thickness: float = 1.25):
     h, w = img.shape
     ys, xs = np.mgrid[0:h, 0:w]
     for (x1, y1), (x2, y2) in zip(pts[:-1], pts[1:]):
         vx, vy = x2 - x1, y2 - y1
         c2 = vx**2 + vy**2 + 1e-12
-        bx = np.clip((vx * (xs - x1) + vy * (ys - y1)) / c2, 0, 1) * vx + x1
-        by = np.clip((vx * (xs - x1) + vy * (ys - y1)) / c2, 0, 1) * vy + y1
         img[np.sqrt((xs - bx)**2 + (ys - by)**2) <= thickness] = 1.0
 def fit_binary_image_to_canvas(img: np.ndarray, target_size: int = 48, margin: int = 2) -> np.ndarray:
-    """画像をターゲットサイズにリサイズして中央配置（ループを排除）"""
     coords = np.argwhere(img > 0.5)
     if coords.size == 0: return np.zeros((target_size, target_size))
@@ -64,10 +69,10 @@ def fit_binary_image_to_canvas(img: np.ndarray, target_size: int = 48, margin: i
     scale = min((target_size - 2 * margin) / max(ch, cw, 1), 1.0)
     new_h, new_w = int(ch * scale), int(cw * scale)
-    # 簡易的なリサイズ処理（最近傍補間的に座標変換）
     out = np.zeros((target_size, target_size))
     y_off, x_off = (target_size - new_h) // 2, (target_size - new_w) // 2
     for sy, sx in np.argwhere(cropped > 0.5):
         ty, tx = y_off + int(sy * scale), x_off + int(sx * scale)
         if 0 <= ty < target_size and 0 <= tx < target_size:
@@ -76,18 +81,51 @@ def fit_binary_image_to_canvas(img: np.ndarray, target_size: int = 48, margin: i
 @st.cache_data
 def get_cat_image(size: int = 48) -> np.ndarray:
     base = np.zeros((64, 64))
-    # 顔・耳・パーツの描画（元のロジックを継承）
     t = np.linspace(np.deg2rad(205), np.deg2rad(335), 160)
     pts_face = list(zip(32 + 18 * np.cos(t), 34 + 18 * np.sin(t)))
-    draw_polyline(base, pts_face)
-    for pts in [[(19, 24), (25, 10), (30, 24)], [(34, 24), (39, 10), (45, 24)]]:
-        draw_polyline(base, pts)
-    # ... (中略: 他のパーツも同様に描画) ...
     return fit_binary_image_to_canvas(base, target_size=size)
 # -----------------------------
-# 畳み込み演算
 # -----------------------------
 KERNELS = {
     "縦線": np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]),
@@ -97,106 +135,183 @@ KERNELS = {
 @st.cache_data
 def run_convolution(img: np.ndarray, kernel: np.ndarray) -> np.ndarray:
     kh, kw = kernel.shape
     oh, ow = img.shape[0] - kh + 1, img.shape[1] - kw + 1
-    # スライディングウィンドウを効率的に作成するビューの利用も検討できるが、教材用なのでシンプルに
     output = np.zeros((oh, ow))
     for i in range(oh):
         for j in range(ow):
             output[i, j] = np.sum(img[i:i+kh, j:j+kw] * kernel)
     return output
 # -----------------------------
-# 表示用コンポーネント
 # -----------------------------
-def render_dashboard(img, k_name, row, col, show_ans):
     kernel = KERNELS[k_name]
     conv_full = run_convolution(img, kernel)
     patch = img[row:row+3, col:col+3]
     val = conv_full[row, col]
-    # メイン図
-    fig, axes = plt.subplots(1, 3, figsize=(12, 5), constrained_layout=True)
-    # 1. 入力画像
-    axes[0].imshow(img, cmap="gray_r")
-    axes[0].add_patch(patches.Rectangle((col-0.5, row-0.5), 3, 3, lw=2, ec="red", fc="none"))
-    axes[0].set_title("入力画像 (48x48)")
-    set_jp_font(axes[0], is_bold=True)
-    # 2. カーネル
     axes[1].set_xlim(-0.5, 2.5); axes[1].set_ylim(2.5, -0.5); axes[1].set_aspect("equal")
     for r in range(3):
         for c in range(3):
-            t = axes[1].text(c, r, f"{int(kernel[r,c])}", ha="center", va="center", fontsize=18)
             set_jp_font(t, is_bold=True)
-            if patch[r,c] > 0: t.set_bbox(dict(facecolor="mistyrose", alpha=0.5))
-    axes[1].set_title(f"カーネル: {k_name}")
-    set_jp_font(axes[1], is_bold=True)
-    # 3. 畳み込み結果
     norm_conv = conv_full / (np.max(np.abs(conv_full)) + 1e-12)
-    axes[2].imshow(norm_conv, cmap="bwr", vmin=-1, vmax=1)
-    axes[2].add_patch(patches.Rectangle((col-0.5, row-0.5), 1, 1, lw=2, ec="gold", fc="none"))
-    axes[2].set_title(f"結果: {'?' if not show_ans else int(val)}")
-    set_jp_font(axes[2], is_bold=True)
     st.pyplot(fig)
 # -----------------------------
-# メインアプリ
 # -----------------------------
 def main():
     st.set_page_config(page_title="CNN Convolution Demo", layout="wide")
-    st.title("🔢 CNNの畳み込み計算を理解しよう")
-    # セッション状態の初期化
-    if "idx" not in st.session_state: st.session_state.idx = 0
     img = get_cat_image()
-    output_size = img.shape[0] - 2
     with st.sidebar:
-        st.header("⚙️ 設定")
-        k_name = st.radio("カーネル選択", list(KERNELS.keys()))
-        show_ans = st.checkbox("答えを表示", value=True)
         st.divider()
-        st.write("📍 位置移動")
         c1, c2 = st.columns(2)
-        if c1.button("← 前へ"): st.session_state.idx = max(0, st.session_state.idx - 1)
-        if c2.button("次へ →"): st.session_state.idx = min(output_size**2 - 1, st.session_state.idx + 1)
-        st.session_state.idx = st.slider("スライダーで移動", 0, output_size**2 - 1, st.session_state.idx)
         row, col = divmod(st.session_state.idx, output_size)
-    # 描画実行
-    render_dashboard(img, k_name, row, col, show_ans)
-    # 下部詳細エリア
-    col_left, col_right = st.columns([1, 1])
     patch = img[row:row+3, col:col+3]
     kernel = KERNELS[k_name]
     with col_left:
-        st.subheader("🔍 拡大表示")
-        fig_sub, axes_sub = plt.subplots(1, 2, figsize=(6, 3))
-        axes_sub[0].imshow(patch, cmap="gray_r"); axes_sub[0].set_title("抽出領域")
-        axes_sub[1].imshow(patch * kernel, cmap="bwr"); axes_sub[1].set_title("要素ごとの積")
-        for ax in axes_sub: set_jp_font(ax); ax.axis("off")
         st.pyplot(fig_sub)
     with col_right:
-        st.subheader("🧮 計算式")
         if show_ans:
-            formula = " + ".join([f"({int(p)}×{int(k)})" for p, k in zip(patch.flatten(), kernel.flatten())])
-            st.code(f"{formula}\n\n= 合計: {int(np.sum(patch * kernel))}", language="text")
-            st.metric("畳み込み値", int(np.sum(patch * kernel)))
         else:
-            st.info("サイドバーの「答えを表示」をチェックして確認してください。")
 if __name__ == "__main__":
-    # Pythonコード作成時のデフォルト要件確認
-    # 「このチャットでのPythonコード作成のフォント指定は、長期メモリに保存されているデフォルト
-    # （NotoSansJP-Regular/Bold を FontProperties(fname=...) で明示指定）のままで良いですか？」
     main()

 # -*- coding: utf-8 -*-
 """
+Streamlit ダッシュボード版: CNN 畳み込み可視化教材（ネコの顔全体・修正版）
 """
 from pathlib import Path
+from typing import Tuple, Optional
 import matplotlib.pyplot as plt
 import numpy as np
 from matplotlib.font_manager import FontProperties
 # -----------------------------
+# 1. フォント設定 (findfont警告回避)
 # -----------------------------
 def get_japanese_font() -> Tuple[Optional[FontProperties], Optional[FontProperties]]:
     """プロジェクトルートにあるNotoSansJPフォントを読み込む"""
             ax_or_text.set_fontsize(size)
 # -----------------------------
+# 2. 画像生成ロジック (ネコの顔全体を復元)
 # -----------------------------
+def draw_polyline(img: np.ndarray, pts: list, thickness: float = 1.0):
+    """配列上にポリラインを描画する（高速化版）"""
     h, w = img.shape
     ys, xs = np.mgrid[0:h, 0:w]
     for (x1, y1), (x2, y2) in zip(pts[:-1], pts[1:]):
         vx, vy = x2 - x1, y2 - y1
         c2 = vx**2 + vy**2 + 1e-12
+        # 点から線分への最短距離のパラメータt
+        t = np.clip((vx * (xs - x1) + vy * (ys - y1)) / c2, 0, 1)
+        # 最短点(bx, by)
+        bx = x1 + t * vx
+        by = y1 + t * vy
+        # 距離がthickness以下のピクセルを1にする
         img[np.sqrt((xs - bx)**2 + (ys - by)**2) <= thickness] = 1.0
 def fit_binary_image_to_canvas(img: np.ndarray, target_size: int = 48, margin: int = 2) -> np.ndarray:
+    """描画された画像をキャンバス中央にリサイズして配置"""
     coords = np.argwhere(img > 0.5)
     if coords.size == 0: return np.zeros((target_size, target_size))
     scale = min((target_size - 2 * margin) / max(ch, cw, 1), 1.0)
     new_h, new_w = int(ch * scale), int(cw * scale)
     out = np.zeros((target_size, target_size))
     y_off, x_off = (target_size - new_h) // 2, (target_size - new_w) // 2
+    # 座標変換による簡易リサイズ
     for sy, sx in np.argwhere(cropped > 0.5):
         ty, tx = y_off + int(sy * scale), x_off + int(sx * scale)
         if 0 <= ty < target_size and 0 <= tx < target_size:
 @st.cache_data
 def get_cat_image(size: int = 48) -> np.ndarray:
+    """64x64のキャンバスにネコの顔全体を描画し、48x48にフィットさせる"""
     base = np.zeros((64, 64))
+    # --- [復元] ネコの顔を描画するパーツ群 ---
+    # 1. 顔の輪郭 (下半分のアーク)
     t = np.linspace(np.deg2rad(205), np.deg2rad(335), 160)
     pts_face = list(zip(32 + 18 * np.cos(t), 34 + 18 * np.sin(t)))
+    draw_polyline(base, pts_face, thickness=1.3)
+    # 2. 耳
+    draw_polyline(base, [(19, 24), (25, 10), (30, 24)], thickness=1.25) # 左
+    draw_polyline(base, [(34, 24), (39, 10), (45, 24)], thickness=1.25) # 右
+    # 3. 頭頂部
+    draw_polyline(base, [(30, 24), (32, 22), (34, 24)], thickness=1.15)
+    # 4. ほっぺた
+    draw_polyline(base, [(19, 24), (15, 31), (16, 41)], thickness=1.25) # 左
+    draw_polyline(base, [(45, 24), (49, 31), (48, 41)], thickness=1.25) # 右
+    # 5. 目 (アーチ状)
+    draw_polyline(base, [(24, 30), (27, 28), (30, 30)], thickness=1.0) # 左
+    draw_polyline(base, [(34, 30), (37, 28), (40, 30)], thickness=1.0) # 右
+    # 6. 鼻と口
+    draw_polyline(base, [(30, 37), (32, 39), (34, 37), (32, 37), (30, 37)], thickness=1.0) # 鼻
+    draw_polyline(base, [(32, 39), (30, 42)], thickness=1.0) # 口・左
+    draw_polyline(base, [(32, 39), (34, 42)], thickness=1.0) # 口・右
+    # 7. ヒゲ (左右3本ずつ)
+    whiskers = [
+        [(17, 34), (25, 35)], [(16, 38), (25, 38)], [(17, 42), (25, 41)], # 左
+        [(39, 35), (47, 34)], [(39, 38), (48, 38)], [(39, 41), (47, 42)], # 右
+    ]
+    for pts in whiskers:
+        draw_polyline(base, pts, thickness=0.8)
+    # --- [復元ここまで] ---
+    # 指定サイズにフィットさせて返す
     return fit_binary_image_to_canvas(base, target_size=size)
 # -----------------------------
+# 3. 畳み込み演算
 # -----------------------------
 KERNELS = {
     "縦線": np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]),
 @st.cache_data
 def run_convolution(img: np.ndarray, kernel: np.ndarray) -> np.ndarray:
+    """Validモードでの畳み込み演算（教材用シンプル実装）"""
     kh, kw = kernel.shape
     oh, ow = img.shape[0] - kh + 1, img.shape[1] - kw + 1
     output = np.zeros((oh, ow))
     for i in range(oh):
         for j in range(ow):
+            # 要素ごとの積の合計
             output[i, j] = np.sum(img[i:i+kh, j:j+kw] * kernel)
     return output
 # -----------------------------
+# 4. 表示用コンポーネント
 # -----------------------------
+def render_main_figures(img, k_name, row, col, show_ans):
+    """メインの3つのグラフ（入力、カーネル、結果）を描画"""
     kernel = KERNELS[k_name]
     conv_full = run_convolution(img, kernel)
     patch = img[row:row+3, col:col+3]
     val = conv_full[row, col]
+    fig, axes = plt.subplots(1, 3, figsize=(14, 5.5), constrained_layout=True)
+    # 1. 入力画像 (48x48)
+    axes[0].imshow(img, cmap="gray_r", interpolation="nearest")
+    # 現在の畳み込み位置を赤枠で表示
+    axes[0].add_patch(patches.Rectangle((col-0.5, row-0.5), 3, 3, lw=2.5, ec="red", fc="none"))
+    axes[0].set_title("入力画像 (48x48 ネコの顔全体)")
+    axes[0].set_xticks(np.arange(0, 48, 6)); axes[0].set_yticks(np.arange(0, 48, 6))
+    axes[0].grid(color="lightgray", lw=0.5, alpha=0.5)
+    set_jp_font(axes[0], is_bold=True, size=14)
+    # 2. カーネル (3x3)
     axes[1].set_xlim(-0.5, 2.5); axes[1].set_ylim(2.5, -0.5); axes[1].set_aspect("equal")
+    axes[1].set_xticks([0, 1, 2]); axes[1].set_yticks([0, 1, 2])
+    axes[1].grid(color="black", lw=1)
     for r in range(3):
         for c in range(3):
+            # カーネルの数値を描画
+            t = axes[1].text(c, r, f"{int(kernel[r,c])}", ha="center", va="center", fontsize=24)
             set_jp_font(t, is_bold=True)
+            # 対応する入力ピクセルが黒(1)なら背景を赤くする
+            if patch[r,c] > 0:
+                t.set_bbox(dict(facecolor="mistyrose", edgecolor="none", alpha=0.8, boxstyle="round,pad=0.2"))
+    axes[1].set_title(f"3x3 カーネル ({k_name})")
+    set_jp_font(axes[1], is_bold=True, size=14)
+    # 3. 畳み込み結果 (46x46)
+    # 表示用に正規化 (-1~1)
     norm_conv = conv_full / (np.max(np.abs(conv_full)) + 1e-12)
+    axes[2].imshow(norm_conv, cmap="bwr", vmin=-1, vmax=1, interpolation="nearest")
+    # 現在の結果位置を金枠で表示
+    axes[2].add_patch(patches.Rectangle((col-0.5, row-0.5), 1, 1, lw=2.5, ec="gold", fc="none"))
+    res_val_str = '?' if not show_ans else str(int(val))
+    axes[2].set_title(f"結果 (46x46) 現在値: {res_val_str}")
+    axes[2].set_xticks(np.arange(0, 46, 6)); axes[2].set_yticks(np.arange(0, 46, 6))
+    axes[2].grid(color="lightgray", lw=0.5, alpha=0.5)
+    set_jp_font(axes[2], is_bold=True, size=14)
     st.pyplot(fig)
 # -----------------------------
+# 5. メインアプリ
 # -----------------------------
 def main():
     st.set_page_config(page_title="CNN Convolution Demo", layout="wide")
+    st.title("🔢 CNNの畳み込み計算を���てみよう")
+    st.markdown("48x48のネコの顔画像に対して、3x3のカーネル（フィルタ）を滑らせて、エッジを抽出する様子を可視化します。")
+    # セッション状態の初期化 (前回の位置を記憶)
+    if "idx" not in st.session_state: st.session_state.idx = 1110 # 顔の中心付近
     img = get_cat_image()
+    # Validモードなので出力サイズは N - K + 1
+    output_size = img.shape[0] - 2 # 46
+    # --- サイドバー操作パネル ---
     with st.sidebar:
+        st.header("⚙️ 操作パネル")
+        k_name = st.radio("カーネル（フィルタ）を選ぶ", list(KERNELS.keys()), index=2) # デフォルト輪郭
+        show_ans = st.checkbox("計算の答えを表示する", value=True)
         st.divider()
+        st.write("📍 **位置を移動する**")
+        # ボタンによる移動
         c1, c2 = st.columns(2)
+        if c1.button("← 前へ (1px)", use_container_width=True):
+            st.session_state.idx = max(0, st.session_state.idx - 1)
+        if c2.button("次へ → (1px)", use_container_width=True):
+            st.session_state.idx = min(output_size**2 - 1, st.session_state.idx + 1)
+        # スライダーによる移動
+        st.session_state.idx = st.slider(
+            "スライダーで連続移動",
+            0, output_size**2 - 1,
+            st.session_state.idx,
+            label_visibility="collapsed"
+        )
+        # 1次元インデックスを2次元座標(行i, 列j)に変換
         row, col = divmod(st.session_state.idx, output_size)
+        st.caption(f"現在の中心座標 (Valid領域): 行={row}, 列={col}")
+        st.info("赤枠（入力）の9マスの数値と、カーネルの9マスの数値を掛け算して合計したものが、金枠（結果）の1マスの数値になります。")
+    # --- メインエリア描画 ---
+    render_main_figures(img, k_name, row, col, show_ans)
+    # --- 下部詳細エリア（拡大図と計算式） ---
+    col_left, col_right = st.columns([1, 1.2])
     patch = img[row:row+3, col:col+3]
     kernel = KERNELS[k_name]
+    products = patch * kernel
     with col_left:
+        st.subheader("🔍 現在位置の拡大図 (3x3)")
+        fig_sub, axes_sub = plt.subplots(1, 2, figsize=(7, 3.5), constrained_layout=True)
+        # 拡大した抽出領域
+        axes_sub[0].imshow(patch, cmap="gray_r", interpolation="nearest")
+        axes_sub[0].set_title("抽出された3x3領域\n(0:白, 1:黒)")
+        set_jp_font(axes_sub[0], size=11)
+        # 要素ごとの積
+        axes_sub[1].imshow(products, cmap="bwr", vmin=-2, vmax=8, interpolation="nearest")
+        axes_sub[1].set_title("要素ごとの掛け算の結果\n(Patch × Kernel)")
+        set_jp_font(axes_sub[1], size=11)
+        for ax in axes_sub:
+            ax.set_xticks([0, 1, 2]); ax.set_yticks([0, 1, 2])
+            ax.grid(color="gray", lw=0.5)
+            # 数値をオーバーレイ
+            mat = patch if ax == axes_sub[0] else products
+            for r in range(3):
+                for c in range(3):
+                    t = ax.text(c, r, f"{int(mat[r,c])}", ha="center", va="center", fontsize=16)
+                    set_jp_font(t, is_bold=True)
+                    t.set_bbox(dict(facecolor="white", alpha=0.5, edgecolor="none"))
         st.pyplot(fig_sub)
     with col_right:
+        st.subheader(f"🧮 計算式 (行={row}, 列={col})")
         if show_ans:
+            # フラット化して計算式を生成
+            p_f = patch.flatten().astype(int)
+            k_f = kernel.flatten().astype(int)
+            # 3x3の形式で見せるための改行付きリスト
+            formula_lines = []
+            total_val = 0
+            for r in range(3):
+                line_terms = []
+                for c in range(3):
+                    p, k = int(patch[r, c]), int(kernel[r, c])
+                    total_val += p * k
+                    # 教材用に分かりやすく (入力 × カーネル)
+                    line_terms.append(f"({p}×{k:2})")
+                formula_lines.append(" + ".join(line_terms))
+            formula_text = "  \n+ ".join(formula_lines)
+            st.code(
+                f"要素ごとの積の合計:\n\n   {formula_text}\n\n= 合計: {total_val}",
+                language="text"
+            )
+            st.metric(label="この位置の畳み込み出力値", value=total_val)
         else:
+            st.warning("サイドバーの「計算の答えを表示する」をチェックして、手計算の結果を確認してください。")
+            st.code(
+                "要素ごとの積の合計:\n\n（ここを計算してみよう）\n\n= 合計: ?",
+                language="text"
+            )
 if __name__ == "__main__":
     main()