InsideYolo

Sleeping

App Files Files Community

PraneshJs commited on Dec 4, 2025

Commit

32fb896

verified ·

1 Parent(s): f8ed6f1

Create app.py

Browse files

Files changed (1) hide show

app.py +251 -0

app.py ADDED Viewed

	@@ -0,0 +1,251 @@

+# ==========================================================
+#  YOLOv5n Visualizer — "Inside Object Detection"
+#  - Uses small YOLOv5n (CPU-friendly)
+#  - Shows detections + early/mid/late feature maps
+#  - Gradio 5 compatible (theme supported)
+# ==========================================================
+import gradio as gr
+import torch
+import numpy as np
+from PIL import Image
+# ------------------- GLOBALS -------------------
+MODEL_NAME = "yolov5n"   # smallest YOLOv5 model (fast & light)
+DEVICE = "cpu"
+MODEL = None
+FEATURE_MAPS = {}  # {layer_name: tensor(B,C,H,W)}
+# ------------------- MODEL LOADING -------------------
+def load_model():
+    """
+    Load YOLOv5n from torch.hub (ultralytics/yolov5) and
+    register forward hooks to capture internal feature maps.
+    """
+    global MODEL, FEATURE_MAPS
+    if MODEL is not None:
+        return MODEL
+    # Download and load YOLOv5n from GitHub (only on first run)
+    # repo 'ultralytics/yolov5' must be reachable during build/first call.
+    model = torch.hub.load("ultralytics/yolov5", MODEL_NAME, pretrained=True)
+    model.to(DEVICE)
+    model.eval()
+    FEATURE_MAPS = {}
+    def make_hook(name):
+        def hook(module, input, output):
+            # YOLO can run on GPU or CPU but we store CPU tensors for visualization
+            with torch.no_grad():
+                FEATURE_MAPS[name] = output.detach().cpu()
+        return hook
+    # Register hooks on some main layers in the YOLOv5 backbone/head
+    # We choose Conv / C3 / SPPF etc. so we can show early, mid, late stages.
+    for idx, m in enumerate(model.model):
+        cls_name = m.__class__.__name__
+        if cls_name in ["Conv", "C3", "Bottleneck", "BottleneckCSP", "SPPF"]:
+            m.register_forward_hook(make_hook(str(idx)))
+    MODEL = model
+    return MODEL
+# ------------------- FEATURE MAP UTILITIES -------------------
+def tensor_to_heatmap(fm, out_size):
+    """
+    Convert a feature map tensor (C,H,W) to a grayscale heatmap PIL image.
+    Steps:
+      - average over channels
+      - normalize to 0..1
+      - upscale to out_size
+    """
+    if fm.ndim != 3:
+        return None
+    fm_np = fm.numpy().astype(np.float32)  # (C,H,W)
+    # average over channels -> (H,W)
+    heat = fm_np.mean(axis=0)
+    if np.allclose(heat, 0):
+        heat = np.zeros_like(heat)
+    else:
+        heat = heat - heat.min()
+        maxv = heat.max()
+        if maxv > 0:
+            heat = heat / maxv
+    heat_img = (heat * 255).astype("uint8")
+    pil = Image.fromarray(heat_img, mode="L")
+    pil = pil.resize(out_size, Image.NEAREST)
+    return pil
+def pick_feature_maps():
+    """
+    After a forward pass, FEATURE_MAPS has many layers.
+    We pick up to 3 layers: early, middle, late.
+    Returns: list of (name, tensor(C,H,W))
+    """
+    if not FEATURE_MAPS:
+        return []
+    # keys are layer indices as strings: "0", "1", "4", ...
+    keys = sorted(FEATURE_MAPS.keys(), key=lambda x: int(x))
+    fms = [FEATURE_MAPS[k][0] for k in keys]  # take batch 0
+    # pick early, mid, late
+    idxs = [0, len(fms) // 2, len(fms) - 1]
+    idxs = sorted(list(set(idxs)))  # remove duplicate indices
+    chosen = []
+    for i in idxs:
+        chosen.append((keys[i], fms[i]))
+    return chosen
+# ------------------- MAIN ANALYSIS FUNCTION -------------------
+def analyze_yolo(img, conf_thres, iou_thres, simple_mode):
+    """
+    Run YOLO on the input image and return:
+      - detection overlay image
+      - early feature map heatmap
+      - mid feature map heatmap
+      - late feature map heatmap
+      - explanation markdown
+    """
+    if img is None:
+        return (
+            None,  # det img
+            None,  # early fm
+            None,  # mid fm
+            None,  # late fm
+            "⚠️ Please upload an image first."
+        )
+    model = load_model()
+    # Clear old feature maps
+    FEATURE_MAPS.clear()
+    # In Gradio, `type="pil"` gives a PIL image already
+    pil = img
+    # Configure thresholds
+    model.conf = float(conf_thres)
+    model.iou = float(iou_thres)
+    with torch.no_grad():
+        results = model(pil)
+    # YOLOv5 .render() draws boxes and labels on the image
+    rendered = results.render()[0]  # numpy array (H,W,C)
+    det_img = Image.fromarray(rendered)
+    # Collect feature maps from hooks
+    chosen_fms = pick_feature_maps()
+    W, H = pil.size
+    heatmaps = [None, None, None]  # early, mid, late
+    for idx, item in enumerate(chosen_fms):
+        name, fm = item
+        hm = tensor_to_heatmap(fm, (W, H))
+        heatmaps[idx] = hm
+    # Build readable explanation
+    if simple_mode:
+        explanation = (
+            "🧒 **Simple explanation of what you see:**\n\n"
+            "1. YOLO first looks at your image and tries to find basic patterns like edges and corners.\n"
+            "2. Then it builds more complex shapes (like parts of objects: wheels, faces, etc.).\n"
+            "3. In the last layers, it focuses on whole objects and decides **what** and **where** they are.\n\n"
+            "**From top to bottom:**\n"
+            "- Left: final detections (boxes + labels).\n"
+            "- Early heatmap: where YOLO sees low-level details.\n"
+            "- Middle heatmap: where it sees object parts.\n"
+            "- Late heatmap: where it focuses on full objects.\n"
+        )
+    else:
+        explanation = (
+            "🔬 **Technical explanation:**\n\n"
+            "- We run `yolov5n` (small YOLOv5) on CPU.\n"
+            "- Forward hooks capture intermediate feature maps from several Conv/C3/SPPF blocks.\n"
+            "- For each selected layer, we take the tensor `(C,H,W)`, average over channels to get a 2D\n"
+            "  activation map `(H,W)`, normalize it, and upscale it to the original image size.\n"
+            "- Early feature map ≈ low-level features (edges, textures).\n"
+            "- Middle feature map ≈ mid-level features (parts, shapes).\n"
+            "- Late feature map ≈ high-level features (object-centric regions used for detection).\n"
+        )
+    # Append layer shapes info if available
+    fm_shapes_info = []
+    for name, fm in chosen_fms:
+        fm_shapes_info.append(f"Layer {name}: shape {tuple(fm.shape)} (C,H,W)")
+    if fm_shapes_info:
+        explanation += "\n**Feature map shapes captured:**\n" + "\n".join(f"- {s}" for s in fm_shapes_info)
+    return det_img, heatmaps[0], heatmaps[1], heatmaps[2], explanation
+# ------------------- GRADIO UI (GRADIO 5) -------------------
+with gr.Blocks(
+    title="YOLOv5n Visualizer — Inside Object Detection",
+    theme=gr.themes.Soft()
+) as demo:
+    gr.Markdown("# 🧠 YOLOv5n Visualizer — See Inside Object Detection")
+    gr.Markdown(
+        "Upload an image and see YOLO work **step by step**:\n"
+        "1. Final detections (boxes & labels)\n"
+        "2. Early feature activations (edges/textures)\n"
+        "3. Middle feature activations (parts/shapes)\n"
+        "4. Late feature activations (object focus)\n"
+        "Use the explanation toggle for simple or technical view."
+    )
+    with gr.Row():
+        with gr.Column(scale=1):
+            in_img = gr.Image(
+                label="Step 0 — Input image",
+                type="pil"
+            )
+            conf_slider = gr.Slider(
+                0.1, 0.9, step=0.05, value=0.25,
+                label="Confidence threshold"
+            )
+            iou_slider = gr.Slider(
+                0.1, 0.9, step=0.05, value=0.45,
+                label="IoU threshold (for NMS)"
+            )
+            simple_ck = gr.Checkbox(
+                label="Explain in simple terms (kids/elders)",
+                value=True
+            )
+            run_btn = gr.Button("Run YOLO & Visualize", variant="primary")
+        with gr.Column(scale=1):
+            out_det = gr.Image(label="Step 4 — Final detections (YOLOv5n)")
+            explanation_md = gr.Markdown(label="Explanation")
+    gr.Markdown("### 🔍 Steps inside the network (feature maps)")
+    with gr.Row():
+        fm1 = gr.Image(label="Step 1 — Early layer activation (edges & textures)", interactive=False)
+        fm2 = gr.Image(label="Step 2 — Middle layer activation (parts & shapes)", interactive=False)
+        fm3 = gr.Image(label="Step 3 — Late layer activation (objects)", interactive=False)
+    run_btn.click(
+        analyze_yolo,
+        inputs=[in_img, conf_slider, iou_slider, simple_ck],
+        outputs=[out_det, fm1, fm2, fm3, explanation_md]
+    )
+demo.launch()