Spaces:

AnikS22
/

MidasMap

Runtime error

App Files Files Community

AnikS22 commited on Mar 28

Commit

86c24cb

verified ·

1 Parent(s): cb40f1e

Deploy MidasMap Gradio app, src, requirements, checkpoint

Browse files

Files changed (21) hide show

README.md +30 -5
app.py +751 -0
checkpoints/final/final_model.pth +3 -0
requirements.txt +16 -0
src/.DS_Store +0 -0
src/__init__.py +1 -0
src/__pycache__/__init__.cpython-311.pyc +0 -0
src/__pycache__/ensemble.cpython-311.pyc +0 -0
src/__pycache__/evaluate.cpython-311.pyc +0 -0
src/__pycache__/heatmap.cpython-311.pyc +0 -0
src/__pycache__/loss.cpython-311.pyc +0 -0
src/__pycache__/model.cpython-311.pyc +0 -0
src/dataset.py +438 -0
src/ensemble.py +236 -0
src/evaluate.py +203 -0
src/heatmap.py +187 -0
src/loss.py +137 -0
src/model.py +382 -0
src/postprocess.py +157 -0
src/preprocessing.py +284 -0
src/visualize.py +244 -0

README.md CHANGED Viewed

@@ -1,13 +1,38 @@
 ---
 title: MidasMap
-emoji: 🦀
-colorFrom: purple
 colorTo: blue
 sdk: gradio
-sdk_version: 6.10.0
 app_file: app.py
 pinned: false
-short_description: Detects Immunogold particles in EM images
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: MidasMap
+emoji: 🔬
+colorFrom: gray
 colorTo: blue
 sdk: gradio
+sdk_version: 4.44.1
 app_file: app.py
 pinned: false
+license: mit
 ---
+# MidasMap Space
+This folder is a **template** for creating a [Hugging Face Space](https://huggingface.co/docs/hub/spaces-overview).
+**Why not Vercel for the model?** Vercel serverless functions have strict size and time limits; they are not meant for PyTorch + a ~100MB checkpoint and multi-second GPU/CPU inference. **Host the Gradio app + weights on a Space** (CPU free tier or GPU upgrade).
+## Create the Space
+1. On Hugging Face: **New Space** → SDK **Gradio** → name e.g. `MidasMap`.
+2. Clone the Space repo locally, or connect **GitHub** and set the Space root to this monorepo with **App file** pointing to the copied `app.py`.
+3. Copy into the Space repository root:
+   - `app.py` from the main MidasMap repo (project root), **or** symlink / duplicate.
+   - `src/` (entire package)
+   - `requirements-space.txt` from this folder as **`requirements.txt`**
+4. In Space **Settings → Repository secrets** (if needed): none required for public weights.
+5. Ensure `checkpoints/final/final_model.pth` is present:
+   - Upload via **Files** tab, or
+   - Add a startup script to download from `AnikS22/MidasMap` on the Hub (see HF docs for `hf_hub_download`).
+After the Space builds, point your **Vercel** site (`vercel-site`) at it:
+`https://yoursite.vercel.app/?embed=https://huggingface.co/spaces/YOUR_USER/YOUR_SPACE`
+---
+Gradio app and model logic: [github.com/AnikS22/MidasMap](https://github.com/AnikS22/MidasMap)

app.py ADDED Viewed

	@@ -0,0 +1,751 @@

+"""
+MidasMap — Immunogold particle analysis for FFRIL / TEM synapse imaging
+Web UI for neuroscientists: calibrated coordinates (µm), receptor labels,
+export for quantification, and clear interpretation of model limits.
+Usage:
+    python app.py
+    python app.py --checkpoint checkpoints/final/final_model.pth
+    python app.py --share
+"""
+from __future__ import annotations
+import argparse
+import os
+import tempfile
+from pathlib import Path
+import gradio as gr
+import gradio_client.utils as _gcu
+# Pydantic v2 can emit JSON Schema with additionalProperties: true (bool);
+# Gradio 4.4x gradio_client assumes a dict and crashes rendering "/".
+_orig_json_type = _gcu._json_schema_to_python_type
+def _json_schema_to_python_type_safe(schema, defs=None):
+    if schema is True or schema is False:
+        return "Any"
+    if not isinstance(schema, dict):
+        return "Any"
+    return _orig_json_type(schema, defs)
+_gcu._json_schema_to_python_type = _json_schema_to_python_type_safe
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.patheffects as pe
+import matplotlib.pyplot as plt
+from matplotlib.patches import Patch
+import numpy as np
+import pandas as pd
+import torch
+import tifffile
+from src.ensemble import sliding_window_inference
+from src.heatmap import extract_peaks
+from src.model import ImmunogoldCenterNet
+from src.postprocess import cross_class_nms
+# Calibration used for training / published metrics (change in UI if your scope differs)
+DEFAULT_PX_PER_UM = 1790.0
+plt.rcParams.update(
+    {
+        "figure.facecolor": "white",
+        "figure.dpi": 120,
+        "savefig.facecolor": "white",
+        "axes.facecolor": "#fafafa",
+        "axes.edgecolor": "#cbd5e1",
+        "axes.linewidth": 0.8,
+        "axes.labelcolor": "#1e293b",
+        "axes.titlecolor": "#0f172a",
+        "axes.grid": False,
+        "xtick.color": "#475569",
+        "ytick.color": "#475569",
+        "font.size": 10,
+        "axes.titlesize": 11,
+        "axes.labelsize": 10,
+        "legend.frameon": True,
+        "legend.framealpha": 0.92,
+        "legend.edgecolor": "#e2e8f0",
+    }
+)
+MODEL = None
+DEVICE = None
+def load_model(checkpoint_path: str):
+    global MODEL, DEVICE
+    DEVICE = torch.device(
+        "cuda"
+        if torch.cuda.is_available()
+        else "mps"
+        if torch.backends.mps.is_available()
+        else "cpu"
+    )
+    MODEL = ImmunogoldCenterNet(bifpn_channels=128, bifpn_rounds=2)
+    ckpt = torch.load(checkpoint_path, map_location="cpu", weights_only=False)
+    MODEL.load_state_dict(ckpt["model_state_dict"])
+    MODEL.to(DEVICE)
+    MODEL.eval()
+    print(f"Model loaded from {checkpoint_path} on {DEVICE}")
+def _receptor_label(class_name: str) -> str:
+    return "AMPA receptor" if class_name == "6nm" else "NR1 (NMDA receptor)"
+def _gold_nm(class_name: str) -> int:
+    return 6 if class_name == "6nm" else 12
+def _pick_scale_bar_um(field_width_um: float) -> float:
+    """Pick a readable scale bar (~15–30% of field width)."""
+    if field_width_um <= 0:
+        return 0.2
+    target = field_width_um * 0.22
+    candidates = (0.05, 0.1, 0.2, 0.25, 0.5, 1.0, 2.0, 5.0)
+    best = candidates[0]
+    for c in candidates:
+        if abs(c - target) < abs(best - target):
+            best = c
+    # Keep bar from dominating the field
+    while best > 0 and best / field_width_um > 0.45:
+        best = max(0.05, best / 2)
+    return float(best)
+def _draw_scale_bar_um(ax, w: int, h: int, px_per_um: float) -> None:
+    field_um = max(w, h) / px_per_um
+    bar_um = _pick_scale_bar_um(field_um)
+    bar_px = bar_um * px_per_um
+    margin = max(12, int(min(w, h) * 0.025))
+    y_line = h - margin
+    x0, x1 = margin, margin + bar_px
+    for lw, color in ((5, "white"), (2, "#0f172a")):
+        ax.plot([x0, x1], [y_line, y_line], color=color, linewidth=lw, solid_capstyle="butt", clip_on=False)
+    t = ax.text(
+        (x0 + x1) / 2,
+        y_line - margin * 0.35,
+        f"{bar_um:g} µm",
+        ha="center",
+        va="bottom",
+        color="white",
+        fontsize=9,
+        fontweight="600",
+    )
+    t.set_path_effects([pe.withStroke(linewidth=2.5, foreground="#0f172a")])
+def _export_columns() -> list[str]:
+    return [
+        "particle_id",
+        "receptor",
+        "gold_diameter_nm",
+        "x_px",
+        "y_px",
+        "x_um",
+        "y_um",
+        "confidence",
+        "class_model",
+        "calibration_px_per_um",
+    ]
+def _empty_results_df() -> pd.DataFrame:
+    return pd.DataFrame(columns=_export_columns())
+def _df_to_preview_html(df: pd.DataFrame) -> str:
+    if df is None or len(df) == 0:
+        return "<p class='mm-table-empty'><em>No particles above the current threshold.</em></p>"
+    return df.to_html(
+        classes=["mm-table"],
+        index=False,
+        border=0,
+        justify="left",
+        escape=True,
+    )
+def detect_particles(
+    image_file,
+    conf_threshold: float = 0.25,
+    nms_6nm: int = 3,
+    nms_12nm: int = 5,
+    px_per_um: float = DEFAULT_PX_PER_UM,
+    progress=gr.Progress(track_tqdm=False),
+):
+    """Run detection; returns figures, CSV path, table HTML, and summary HTML."""
+    empty_table = "<p class='mm-table-empty'><em>Run detection to populate the table.</em></p>"
+    if MODEL is None:
+        msg = "<p class='mm-callout mm-callout-warn'>Model not loaded. Use <code>--checkpoint</code> with a valid <code>.pth</code> file.</p>"
+        return None, None, None, None, empty_table, msg
+    if image_file is None:
+        msg = "<p class='mm-callout'>Upload a micrograph, set calibration if needed, then run detection.</p>"
+        return None, None, None, None, empty_table, msg
+    try:
+        px_per_um = float(px_per_um)
+    except (TypeError, ValueError):
+        px_per_um = DEFAULT_PX_PER_UM
+    if px_per_um <= 0:
+        px_per_um = DEFAULT_PX_PER_UM
+    progress(0.05, desc="Loading image…")
+    if isinstance(image_file, str):
+        img = tifffile.imread(image_file)
+    elif hasattr(image_file, "name"):
+        img = tifffile.imread(image_file.name)
+    else:
+        img = np.array(image_file)
+    if img.ndim == 3:
+        img = img[:, :, 0] if img.shape[2] <= 4 else img[0]
+    img = img.astype(np.uint8)
+    h, w = img.shape[:2]
+    field_w_um = w / px_per_um
+    field_h_um = h / px_per_um
+    progress(0.15, desc="Neural network (sliding window)…")
+    with torch.no_grad():
+        hm_np, off_np = sliding_window_inference(
+            MODEL,
+            img,
+            patch_size=512,
+            overlap=128,
+            device=DEVICE,
+        )
+    progress(0.72, desc="Peak extraction & NMS…")
+    dets = extract_peaks(
+        torch.from_numpy(hm_np),
+        torch.from_numpy(off_np),
+        stride=2,
+        conf_threshold=conf_threshold,
+        nms_kernel_sizes={"6nm": nms_6nm, "12nm": nms_12nm},
+    )
+    dets = cross_class_nms(dets, distance_threshold=8)
+    n_6nm = sum(1 for d in dets if d["class"] == "6nm")
+    n_12nm = sum(1 for d in dets if d["class"] == "12nm")
+    confs_6 = [d["conf"] for d in dets if d["class"] == "6nm"]
+    confs_12 = [d["conf"] for d in dets if d["class"] == "12nm"]
+    progress(0.78, desc="Rendering figures…")
+    from skimage.transform import resize
+    hm6_up = resize(hm_np[0], (h, w), order=1)
+    hm12_up = resize(hm_np[1], (h, w), order=1)
+    # --- Overlay (publication-style legend + scale bar) ---
+    fig_overlay, ax = plt.subplots(figsize=(11, 11))
+    ax.imshow(img, cmap="gray", aspect="equal")
+    for d in dets:
+        color = "#06b6d4" if d["class"] == "6nm" else "#ca8a04"
+        radius = 7 if d["class"] == "6nm" else 12
+        ax.add_patch(
+            plt.Circle(
+                (d["x"], d["y"]),
+                radius,
+                fill=False,
+                edgecolor=color,
+                linewidth=1.8,
+            )
+        )
+    _draw_scale_bar_um(ax, w, h, px_per_um)
+    ax.set_title(
+        f"Immunogold detections · AMPA (6 nm): {n_6nm} · NR1 (12 nm): {n_12nm} · Total: {len(dets)}",
+        fontsize=11,
+        pad=12,
+    )
+    ax.axis("off")
+    legend_elems = [
+        Patch(facecolor="none", edgecolor="#06b6d4", linewidth=2, label="6 nm gold — AMPA receptor"),
+        Patch(facecolor="none", edgecolor="#ca8a04", linewidth=2, label="12 nm gold — NR1 (NMDAR)"),
+    ]
+    ax.legend(
+        handles=legend_elems,
+        loc="upper right",
+        fontsize=8.5,
+        title="Label class",
+        title_fontsize=9,
+    )
+    plt.tight_layout()
+    fig_overlay.canvas.draw()
+    overlay_img = np.asarray(fig_overlay.canvas.renderer.buffer_rgba())[:, :, :3]
+    plt.close(fig_overlay)
+    # --- Heatmaps ---
+    fig_hm, axes = plt.subplots(1, 2, figsize=(14, 6.2))
+    axes[0].imshow(img, cmap="gray", aspect="equal")
+    axes[0].imshow(hm6_up, cmap="magma", alpha=0.55, vmin=0, vmax=max(0.3, float(hm6_up.max())))
+    axes[0].set_title(f"AMPA (6 nm) channel · n = {n_6nm}", fontsize=11)
+    axes[0].axis("off")
+    axes[1].imshow(img, cmap="gray", aspect="equal")
+    axes[1].imshow(hm12_up, cmap="inferno", alpha=0.55, vmin=0, vmax=max(0.3, float(hm12_up.max())))
+    axes[1].set_title(f"NR1 (12 nm) channel · n = {n_12nm}", fontsize=11)
+    axes[1].axis("off")
+    plt.tight_layout()
+    fig_hm.canvas.draw()
+    heatmap_img = np.asarray(fig_hm.canvas.renderer.buffer_rgba())[:, :, :3]
+    plt.close(fig_hm)
+    # --- Stats (µm where helpful) ---
+    fig_stats, axes = plt.subplots(1, 3, figsize=(16, 4.8))
+    if dets:
+        if confs_6:
+            axes[0].hist(confs_6, bins=18, alpha=0.75, color="#0891b2", label=f"AMPA (n={len(confs_6)})")
+        if confs_12:
+            axes[0].hist(confs_12, bins=18, alpha=0.75, color="#a16207", label=f"NR1 (n={len(confs_12)})")
+        axes[0].axvline(conf_threshold, color="#be123c", linestyle="--", linewidth=1.2, label=f"Threshold = {conf_threshold:.2f}")
+        axes[0].legend(fontsize=8)
+    axes[0].set_xlabel("Confidence score")
+    axes[0].set_ylabel("Count")
+    axes[0].set_title("Score distribution")
+    axes[0].spines["top"].set_visible(False)
+    axes[0].spines["right"].set_visible(False)
+    if dets:
+        xs_um = np.array([d["x"] for d in dets]) / px_per_um
+        ys_um = np.array([d["y"] for d in dets]) / px_per_um
+        colors = ["#0891b2" if d["class"] == "6nm" else "#a16207" for d in dets]
+        axes[1].scatter(xs_um, ys_um, c=colors, s=22, alpha=0.75, edgecolors="none")
+        axes[1].set_xlim(0, field_w_um)
+        axes[1].set_ylim(field_h_um, 0)
+    axes[1].set_xlabel("x (µm)")
+    axes[1].set_ylabel("y (µm)")
+    axes[1].set_title("Positions (image coordinates)")
+    axes[1].set_aspect("equal")
+    axes[1].spines["top"].set_visible(False)
+    axes[1].spines["right"].set_visible(False)
+    axes[2].axis("off")
+    table_data = [
+        ["Field of view", f"{field_w_um:.3f} × {field_h_um:.3f} µm"],
+        ["Calibration", f"{px_per_um:.1f} px/µm"],
+        ["AMPA (6 nm)", str(n_6nm)],
+        ["NR1 (12 nm)", str(n_12nm)],
+        ["Total particles", str(len(dets))],
+        ["Score threshold", f"{conf_threshold:.2f}"],
+        ["Mean score · AMPA", f"{float(np.mean(confs_6)):.3f}" if confs_6 else "—"],
+        ["Mean score · NR1", f"{float(np.mean(confs_12)):.3f}" if confs_12 else "—"],
+    ]
+    tbl = axes[2].table(
+        cellText=table_data,
+        colLabels=["Quantity", "Value"],
+        loc="center",
+        cellLoc="left",
+    )
+    tbl.auto_set_font_size(False)
+    tbl.set_fontsize(10)
+    tbl.scale(1.05, 1.65)
+    for (row, col), cell in tbl.get_celld().items():
+        if row == 0:
+            cell.set_text_props(fontweight="600")
+            cell.set_facecolor("#e2e8f0")
+    axes[2].set_title("Summary", fontsize=11, pad=12)
+    plt.tight_layout()
+    fig_stats.canvas.draw()
+    stats_img = np.asarray(fig_stats.canvas.renderer.buffer_rgba())[:, :, :3]
+    plt.close(fig_stats)
+    rows = []
+    for i, d in enumerate(dets):
+        rows.append(
+            {
+                "particle_id": i + 1,
+                "receptor": _receptor_label(d["class"]),
+                "gold_diameter_nm": _gold_nm(d["class"]),
+                "x_px": round(d["x"], 2),
+                "y_px": round(d["y"], 2),
+                "x_um": round(d["x"] / px_per_um, 5),
+                "y_um": round(d["y"] / px_per_um, 5),
+                "confidence": round(d["conf"], 4),
+                "class_model": d["class"],
+                "calibration_px_per_um": round(px_per_um, 4),
+            }
+        )
+    df = pd.DataFrame(rows, columns=_export_columns()) if rows else _empty_results_df()
+    csv_f = tempfile.NamedTemporaryFile(suffix=".csv", delete=False, mode="w", encoding="utf-8")
+    df.to_csv(csv_f.name, index=False)
+    csv_f.close()
+    progress(1.0, desc="Done")
+    density_note = ""
+    if field_w_um > 0 and field_h_um > 0:
+        area = field_w_um * field_h_um
+        density_note = f"<span class='mm-density'>Areal density (all): {len(dets) / area:.2f} particles/µm² · AMPA: {n_6nm / area:.2f} · NR1: {n_12nm / area:.2f}</span>"
+    summary = f"""<div class="mm-summary">
+<div class="mm-stat"><span class="mm-stat-label">AMPA · 6 nm gold</span>
+<span class="mm-stat-value mm-teal">{n_6nm}</span></div>
+<div class="mm-stat"><span class="mm-stat-label">NR1 · 12 nm gold</span>
+<span class="mm-stat-value mm-amber">{n_12nm}</span></div>
+<div class="mm-stat"><span class="mm-stat-label">Total</span>
+<span class="mm-stat-value">{len(dets)}</span></div>
+<div class="mm-stat mm-stat-wide"><span class="mm-stat-label">Field & calibration</span>
+<span class="mm-stat-meta">{field_w_um:.3f} × {field_h_um:.3f} µm · {px_per_um:.1f} px/µm · {DEVICE}</span></div>
+{density_note and f'<div class="mm-stat mm-stat-wide">{density_note}</div>'}
+</div>"""
+    return overlay_img, heatmap_img, stats_img, csv_f.name, _df_to_preview_html(df), summary
+MM_CSS = """
+.gradio-container { max-width: 1320px !important; margin: auto !important; }
+.mm-brand-bar {
+  display: flex; align-items: center; justify-content: space-between;
+  flex-wrap: wrap; gap: 0.75rem;
+  padding: 0.6rem 0 1.25rem;
+  border-bottom: 1px solid var(--border-color-primary);
+  margin-bottom: 1.25rem;
+}
+.mm-brand-bar span {
+  font-size: 0.72rem; letter-spacing: 0.14em; text-transform: uppercase;
+  color: var(--body-text-color-subdued); font-weight: 600;
+}
+.mm-hero {
+  padding: 1.5rem 1.35rem 1.35rem;
+  margin-bottom: 0.25rem;
+  border-radius: 10px;
+  background: linear-gradient(145deg, #0c4a6e22 0%, #0f172a 48%, #1e1b4b33 100%);
+  border: 1px solid #33415588;
+}
+.mm-hero h1 {
+  font-family: "Libre Baskerville", Georgia, serif;
+  font-weight: 700;
+  letter-spacing: -0.02em;
+  margin: 0 0 0.4rem 0;
+  font-size: 1.65rem;
+  color: #f1f5f9;
+}
+.mm-hero .mm-sub {
+  margin: 0 0 0.85rem 0;
+  color: #94a3b8;
+  font-size: 0.92rem;
+  line-height: 1.55;
+  max-width: 58ch;
+}
+.mm-badge-row { display: flex; flex-wrap: wrap; gap: 0.4rem; }
+.mm-badge {
+  font-size: 0.65rem; text-transform: uppercase; letter-spacing: 0.07em;
+  padding: 0.2rem 0.5rem; border-radius: 4px;
+  background: #0e749033; color: #99f6e4; border: 1px solid #14b8a644;
+}
+.mm-layout { display: flex; gap: 1.25rem; align-items: flex-start; flex-wrap: wrap; }
+.mm-sidebar {
+  flex: 1 1 280px; max-width: 340px;
+  padding: 1rem 1.1rem; border-radius: 10px;
+  border: 1px solid var(--border-color-primary);
+  background: var(--block-background-fill);
+}
+.mm-main { flex: 3 1 520px; min-width: 0; }
+.mm-panel-title {
+  font-size: 0.7rem; text-transform: uppercase; letter-spacing: 0.1em;
+  color: var(--body-text-color-subdued); font-weight: 600; margin: 0 0 0.65rem 0;
+}
+.mm-callout {
+  margin: 0; padding: 0.75rem 0.9rem; border-radius: 8px;
+  background: #1e293b66; border: 1px solid var(--border-color-primary);
+  font-size: 0.88rem; line-height: 1.45; color: var(--body-text-color);
+}
+.mm-callout-warn { border-color: #f59e0b55; background: #78350f22; }
+.mm-science {
+  margin-top: 1rem; font-size: 0.82rem; line-height: 1.5;
+  color: var(--body-text-color-subdued);
+}
+.mm-science h4 { margin: 0.5rem 0 0.35rem; font-size: 0.78rem; text-transform: uppercase; letter-spacing: 0.06em; color: #94a3b8; }
+.mm-science ul { margin: 0.25rem 0 0 1rem; padding: 0; }
+.mm-summary { display: flex; flex-wrap: wrap; gap: 0.65rem; margin: 0 0 1rem 0; }
+.mm-stat {
+  flex: 1 1 118px; padding: 0.75rem 0.95rem; border-radius: 8px;
+  background: var(--block-background-fill);
+  border: 1px solid var(--border-color-primary);
+}
+.mm-stat-wide { flex: 1 1 100%; }
+.mm-stat-label {
+  display: block; font-size: 0.68rem; text-transform: uppercase;
+  letter-spacing: 0.06em; opacity: 0.72; margin-bottom: 0.2rem;
+}
+.mm-stat-value { font-size: 1.4rem; font-weight: 700; font-variant-numeric: tabular-nums; letter-spacing: -0.02em; }
+.mm-stat-value.mm-teal { color: #2dd4bf; }
+.mm-stat-value.mm-amber { color: #fbbf24; }
+.mm-stat-meta { font-size: 0.84rem; opacity: 0.92; line-height: 1.35; }
+.mm-density { font-size: 0.84rem; opacity: 0.9; }
+table.mm-table {
+  width: 100%; border-collapse: collapse; font-size: 0.82rem;
+  margin: 0.25rem 0 0.75rem 0;
+}
+table.mm-table th {
+  text-align: left; padding: 0.45rem 0.5rem;
+  border-bottom: 1px solid var(--border-color-primary);
+  color: var(--body-text-color-subdued); font-weight: 600;
+}
+table.mm-table td { padding: 0.35rem 0.5rem; border-bottom: 1px solid #33415544; }
+.mm-table-empty { margin: 0.5rem 0; opacity: 0.75; font-size: 0.9rem; }
+.mm-foot {
+  margin-top: 2rem; padding-top: 1rem;
+  border-top: 1px solid var(--border-color-primary);
+  font-size: 0.78rem; line-height: 1.45;
+  color: var(--body-text-color-subdued);
+}
+.mm-foot code { font-size: 0.76rem; }
+"""
+def build_app():
+    theme = gr.themes.Soft(
+        primary_hue=gr.themes.Color(
+            c50="#f0fdfa",
+            c100="#ccfbf1",
+            c200="#99f6e4",
+            c300="#5eead4",
+            c400="#2dd4bf",
+            c500="#14b8a6",
+            c600="#0d9488",
+            c700="#0f766e",
+            c800="#115e59",
+            c900="#134e4a",
+            c950="#042f2e",
+        ),
+        neutral_hue=gr.themes.colors.slate,
+        font=("Source Sans 3", "ui-sans-serif", "system-ui", "sans-serif"),
+        font_mono=("IBM Plex Mono", "ui-monospace", "monospace"),
+    ).set(
+        body_background_fill_dark="*neutral_950",
+        block_background_fill_dark="*neutral_900",
+        border_color_primary="*neutral_700",
+        button_primary_background_fill="*primary_600",
+        button_primary_background_fill_hover="*primary_500",
+        block_label_text_size="*text_sm",
+    )
+    head = """
+    <link href="https://fonts.googleapis.com/css2?family=Libre+Baskerville:wght@700&family=Source+Sans+3:wght@400;600;700&display=swap" rel="stylesheet">
+    """
+    with gr.Blocks(
+        title="MidasMap — Immunogold analysis",
+        theme=theme,
+        css=MM_CSS,
+        head=head,
+    ) as app:
+        gr.HTML(
+            """
+            <div class="mm-brand-bar">
+              <span>Quantitative EM · synapse immunogold</span>
+              <span>Research use · validate critical counts manually</span>
+            </div>
+            <div class="mm-hero">
+              <h1>MidasMap</h1>
+              <p class="mm-sub">
+                Automated particle picking for <strong>freeze-fracture replica immunolabeling (FFRIL)</strong> TEM:
+                <strong>6 nm</strong> gold (AMPA receptors) and <strong>12 nm</strong> gold (NR1 / NMDA receptors).
+                Coordinates export in <strong>µm</strong> for comparison to physiology and super-resolution data—set calibration to match your microscope.
+              </p>
+              <div class="mm-badge-row">
+                <span class="mm-badge">FFRIL / TEM</span>
+                <span class="mm-badge">CenterNet</span>
+                <span class="mm-badge">CEM500K backbone</span>
+                <span class="mm-badge">LOOCV F1 ≈ 0.94</span>
+              </div>
+            </div>
+            """
+        )
+        with gr.Row(elem_classes=["mm-layout"]):
+            with gr.Column(elem_classes=["mm-sidebar"]):
+                gr.HTML('<p class="mm-panel-title">Micrograph & calibration</p>')
+                image_input = gr.File(
+                    label="Upload image",
+                    file_types=[".tif", ".tiff", ".png", ".jpg", ".jpeg"],
+                )
+                px_per_um_in = gr.Number(
+                    value=DEFAULT_PX_PER_UM,
+                    label="Calibration (pixels per µm)",
+                    info=f"Default {DEFAULT_PX_PER_UM:.0f} matches the published training set. "
+                    "Update if your acquisition scale differs.",
+                    minimum=1,
+                    maximum=1e6,
+                )
+                conf_slider = gr.Slider(
+                    minimum=0.05,
+                    maximum=0.95,
+                    value=0.25,
+                    step=0.05,
+                    label="Confidence threshold",
+                    info="Higher → fewer, sharper peaks. Lower → recall with more false positives.",
+                )
+                with gr.Accordion("Advanced · non-max suppression", open=False):
+                    nms_6nm = gr.Slider(
+                        minimum=1,
+                        maximum=9,
+                        value=3,
+                        step=2,
+                        label="NMS · 6 nm channel",
+                        info="Minimum spacing between AMPA peaks on the heatmap grid.",
+                    )
+                    nms_12nm = gr.Slider(
+                        minimum=1,
+                        maximum=9,
+                        value=5,
+                        step=2,
+                        label="NMS · 12 nm channel",
+                    )
+                detect_btn = gr.Button("Run detection", variant="primary", size="lg")
+                with gr.Accordion("For neuroscientists — interpretation", open=False):
+                    gr.Markdown(
+                        """
+#### What the model outputs
+- **Circles** mark predicted gold centers; **scores** are CNN confidences, not p-values.
+- **AMPA** = 6 nm class; **NR1** = 12 nm class (NMDA receptor subunit). Verify ambiguous sites on the raw image.
+#### When to trust it
+- Trained on **10 FFRIL synapse images** (453 hand-placed particles). Expect best performance on **similar prep, contrast, and magnification**.
+- **Always spot-check** counts used for publication, especially near membranes and dense clusters.
+#### Coordinates & CSV
+- **x, y** follow image pixel order (origin top-left). **µm** columns use your calibration above.
+- CSV includes **receptor**, **gold diameter**, and **calibration** used for provenance.
+#### Citation
+Sahai, A. (2026). *MidasMap* (software). https://github.com/AnikS22/MidasMap
+                        """
+                    )
+            with gr.Column(elem_classes=["mm-main"]):
+                summary_md = gr.HTML(
+                    value="<p class='mm-callout'>Upload a synapse micrograph to begin. Adjust calibration before export if your scale differs from the default.</p>"
+                )
+                with gr.Tabs():
+                    with gr.Tab("Overlay"):
+                        overlay_output = gr.Image(
+                            label="Detections + scale bar",
+                            type="numpy",
+                            height=540,
+                        )
+                    with gr.Tab("Heatmaps"):
+                        heatmap_output = gr.Image(
+                            label="Class-specific maps",
+                            type="numpy",
+                            height=540,
+                        )
+                    with gr.Tab("Quant summary"):
+                        stats_output = gr.Image(
+                            label="Distributions & table",
+                            type="numpy",
+                            height=440,
+                        )
+                    with gr.Tab("Table & export"):
+                        table_output = gr.HTML(
+                            label="Detections (preview)",
+                            value="<p class='mm-table-empty'><em>Results appear here after detection.</em></p>",
+                        )
+                        csv_output = gr.File(label="Download CSV")
+        gr.HTML(
+            f"""
+            <div class="mm-foot">
+              <strong>Training context:</strong> LOOCV mean F1 ≈ 0.94 on eight well-annotated folds;
+              raw grayscale input (avoid heavy filtering). Not a clinical device.
+              Model weights: <code>checkpoints/final/final_model.pth</code> or
+              <a href="https://huggingface.co/AnikS22/MidasMap" target="_blank" rel="noopener">Hugging Face</a>.
+            </div>
+            """
+        )
+        detect_btn.click(
+            fn=detect_particles,
+            inputs=[image_input, conf_slider, nms_6nm, nms_12nm, px_per_um_in],
+            outputs=[
+                overlay_output,
+                heatmap_output,
+                stats_output,
+                csv_output,
+                table_output,
+                summary_md,
+            ],
+        )
+    return app
+def main():
+    parser = argparse.ArgumentParser(description="MidasMap web dashboard")
+    parser.add_argument(
+        "--checkpoint",
+        type=str,
+        default="checkpoints/final/final_model.pth",
+        help="Path to trained checkpoint (.pth)",
+    )
+    parser.add_argument("--share", action="store_true", help="Gradio public share link (use if localhost is blocked)")
+    parser.add_argument(
+        "--server-name",
+        type=str,
+        default=None,
+        metavar="HOST",
+        help='Bind address, e.g. 0.0.0.0 for LAN (default: 127.0.0.1)',
+    )
+    parser.add_argument("--port", type=int, default=7860)
+    args = parser.parse_args()
+    if os.environ.get("GRADIO_SHARE", "").lower() in ("1", "true", "yes"):
+        args.share = True
+    ckpt = Path(args.checkpoint)
+    if not ckpt.is_file():
+        raise SystemExit(
+            f"Checkpoint not found: {ckpt}\n"
+            "Train with train_final.py or download from Hugging Face:\n"
+            "  huggingface-cli download AnikS22/MidasMap checkpoints/final/final_model.pth "
+            "--local-dir ."
+        )
+    load_model(str(ckpt))
+    demo = build_app()
+    launch_kw = dict(
+        share=args.share,
+        server_port=args.port,
+        server_name=args.server_name,
+        show_api=False,
+        inbrowser=False,
+    )
+    try:
+        demo.launch(**launch_kw)
+    except ValueError as err:
+        if (
+            "localhost is not accessible" in str(err)
+            and not launch_kw.get("share")
+            and os.environ.get("GRADIO_SHARE", "").lower() not in ("1", "true", "yes")
+        ):
+            print(
+                "Localhost check failed in this environment; starting with share=True "
+                "(Gradio tunnel). Use --share next time, or set GRADIO_SHARE=1."
+            )
+            build_app().launch(**{**launch_kw, "share": True})
+        else:
+            raise
+if __name__ == "__main__":
+    main()

checkpoints/final/final_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:735d37e839019318cb0e4c7e40d99194abd59f57efd8594ca51602ce3451dfb6
+size 98043418

requirements.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+# Pin versions compatible with HF Spaces (adjust if Space build fails).
+# Rename to requirements.txt in the Space repo root.
+numpy>=1.24,<2
+torch>=2.0.0
+torchvision>=0.15.0
+scipy>=1.10.0
+scikit-image>=0.21.0
+matplotlib>=3.7.0
+tifffile>=2023.4.0
+pandas>=2.0.0
+PyYAML>=6.0
+albumentations>=1.3.0
+opencv-python-headless>=4.7.0
+gradio==4.44.1
+huggingface_hub>=0.20.0,<0.25.0
+tqdm>=4.65.0

src/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

src/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Immunogold particle detection system for TEM images."""

src/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (224 Bytes). View file

src/__pycache__/ensemble.cpython-311.pyc ADDED Viewed

Binary file (11.5 kB). View file

src/__pycache__/evaluate.cpython-311.pyc ADDED Viewed

Binary file (8.73 kB). View file

src/__pycache__/heatmap.cpython-311.pyc ADDED Viewed

Binary file (7.64 kB). View file

src/__pycache__/loss.cpython-311.pyc ADDED Viewed

Binary file (5.31 kB). View file

src/__pycache__/model.cpython-311.pyc ADDED Viewed

Binary file (22.3 kB). View file

src/dataset.py ADDED Viewed

	@@ -0,0 +1,438 @@

+"""
+PyTorch Dataset for immunogold particle detection.
+Implements patch-based training with:
+- 70% hard mining (patches centered near particles)
+- 30% random patches (background recognition)
+- Copy-paste augmentation with Gaussian-blended bead bank
+- Albumentations pipeline with keypoint co-transforms
+"""
+import random
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+import albumentations as A
+import cv2
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+from src.heatmap import generate_heatmap_gt
+from src.preprocessing import (
+    SynapseRecord,
+    load_all_annotations,
+    load_image,
+    load_mask,
+)
+# ---------------------------------------------------------------------------
+# Augmentation pipeline
+# ---------------------------------------------------------------------------
+def get_train_augmentation() -> A.Compose:
+    """
+    Training augmentation pipeline.
+    Conservative intensity limits: contrast delta is only 11-39 units on uint8.
+    DO NOT use Cutout/Mixup/JPEG artifacts — they destroy or mimic particles.
+    """
+    return A.Compose(
+        [
+            # Geometric (co-transform keypoints)
+            A.RandomRotate90(p=1.0),  # EM is rotation invariant
+            A.HorizontalFlip(p=0.5),
+            A.VerticalFlip(p=0.5),
+            # Only ±10° to avoid interpolation artifacts that destroy contrast
+            A.Rotate(
+                limit=10,
+                border_mode=cv2.BORDER_REFLECT_101,
+                p=0.5,
+            ),
+            # Mild elastic deformation (simulates section flatness variation)
+            A.ElasticTransform(alpha=30, sigma=5, p=0.3),
+            # Intensity (image only)
+            A.RandomBrightnessContrast(
+                brightness_limit=0.08,  # NOT default 0.2
+                contrast_limit=0.08,
+                p=0.7,
+            ),
+            # EM shot noise simulation
+            A.GaussNoise(p=0.5),
+            # Mild blur — simulate slight defocus
+            A.GaussianBlur(blur_limit=(3, 3), p=0.2),
+        ],
+        keypoint_params=A.KeypointParams(
+            format="xy",
+            remove_invisible=True,
+            label_fields=["class_labels"],
+        ),
+    )
+def get_val_augmentation() -> A.Compose:
+    """No augmentation for validation — identity transform."""
+    return A.Compose(
+        [],
+        keypoint_params=A.KeypointParams(
+            format="xy",
+            remove_invisible=True,
+            label_fields=["class_labels"],
+        ),
+    )
+# ---------------------------------------------------------------------------
+# Bead bank for copy-paste augmentation
+# ---------------------------------------------------------------------------
+class BeadBank:
+    """
+    Pre-extracted particle crops for copy-paste augmentation.
+    Stores small patches centered on annotated particles from training
+    images. During training, random beads are pasted onto patches to
+    increase particle density and address class imbalance.
+    """
+    def __init__(self):
+        self.crops: Dict[str, List[Tuple[np.ndarray, int]]] = {
+            "6nm": [],
+            "12nm": [],
+        }
+        self.crop_sizes = {"6nm": 32, "12nm": 48}
+    def extract_from_image(
+        self,
+        image: np.ndarray,
+        annotations: Dict[str, np.ndarray],
+    ):
+        """Extract bead crops from a training image."""
+        h, w = image.shape[:2]
+        for cls, coords in annotations.items():
+            crop_size = self.crop_sizes[cls]
+            half = crop_size // 2
+            for x, y in coords:
+                xi, yi = int(round(x)), int(round(y))
+                # Skip if too close to edge
+                if yi - half < 0 or yi + half > h or xi - half < 0 or xi + half > w:
+                    continue
+                crop = image[yi - half : yi + half, xi - half : xi + half].copy()
+                if crop.shape == (crop_size, crop_size):
+                    self.crops[cls].append((crop, half))
+    def paste_beads(
+        self,
+        image: np.ndarray,
+        coords_6nm: List[Tuple[float, float]],
+        coords_12nm: List[Tuple[float, float]],
+        class_labels: List[str],
+        mask: Optional[np.ndarray] = None,
+        n_paste_per_class: int = 5,
+        rng: Optional[np.random.Generator] = None,
+    ) -> Tuple[np.ndarray, List[Tuple[float, float]], List[Tuple[float, float]], List[str]]:
+        """
+        Paste random beads onto image with Gaussian alpha blending.
+        Returns augmented image and updated coordinate lists.
+        """
+        if rng is None:
+            rng = np.random.default_rng()
+        image = image.copy()
+        h, w = image.shape[:2]
+        new_coords_6nm = list(coords_6nm)
+        new_coords_12nm = list(coords_12nm)
+        new_labels = list(class_labels)
+        for cls in ["6nm", "12nm"]:
+            if not self.crops[cls]:
+                continue
+            crop_size = self.crop_sizes[cls]
+            half = crop_size // 2
+            n_paste = min(n_paste_per_class, len(self.crops[cls]))
+            for _ in range(n_paste):
+                # Random paste location (within image bounds)
+                px = rng.integers(half + 5, w - half - 5)
+                py = rng.integers(half + 5, h - half - 5)
+                # Skip if outside tissue mask
+                if mask is not None:
+                    if py >= mask.shape[0] or px >= mask.shape[1] or not mask[py, px]:
+                        continue
+                # Check minimum distance from existing particles (avoid overlap)
+                too_close = False
+                all_existing = new_coords_6nm + new_coords_12nm
+                for ex, ey in all_existing:
+                    if (ex - px) ** 2 + (ey - py) ** 2 < (half * 1.5) ** 2:
+                        too_close = True
+                        break
+                if too_close:
+                    continue
+                # Select random crop
+                crop, _ = self.crops[cls][rng.integers(len(self.crops[cls]))]
+                # Gaussian alpha mask for soft blending
+                yy, xx = np.mgrid[:crop_size, :crop_size]
+                center = crop_size / 2
+                sigma = half * 0.7
+                alpha = np.exp(-((xx - center) ** 2 + (yy - center) ** 2) / (2 * sigma ** 2))
+                # Blend
+                region = image[py - half : py + half, px - half : px + half]
+                if region.shape != crop.shape:
+                    continue
+                blended = (alpha * crop + (1 - alpha) * region).astype(np.uint8)
+                image[py - half : py + half, px - half : px + half] = blended
+                # Add to annotations
+                if cls == "6nm":
+                    new_coords_6nm.append((float(px), float(py)))
+                else:
+                    new_coords_12nm.append((float(px), float(py)))
+                new_labels.append(cls)
+        return image, new_coords_6nm, new_coords_12nm, new_labels
+# ---------------------------------------------------------------------------
+# Dataset
+# ---------------------------------------------------------------------------
+class ImmunogoldDataset(Dataset):
+    """
+    Patch-based dataset for immunogold particle detection.
+    Sampling strategy:
+    - 70% of patches centered within 100px of a known particle (hard mining)
+    - 30% of patches at random locations (background recognition)
+    This ensures the model sees particles in nearly every batch despite
+    particles occupying <0.1% of image area.
+    """
+    def __init__(
+        self,
+        records: List[SynapseRecord],
+        fold_id: str,
+        mode: str = "train",
+        patch_size: int = 512,
+        stride: int = 2,
+        hard_mining_fraction: float = 0.7,
+        copy_paste_per_class: int = 5,
+        sigmas: Optional[Dict[str, float]] = None,
+        samples_per_epoch: int = 200,
+        seed: int = 42,
+    ):
+        """
+        Args:
+            records: all SynapseRecord entries
+            fold_id: synapse_id to hold out (test set)
+            mode: 'train' or 'val'
+            patch_size: training patch size
+            stride: model output stride
+            hard_mining_fraction: fraction of patches near particles
+            copy_paste_per_class: beads to paste per class
+            sigmas: heatmap Gaussian sigmas per class
+            samples_per_epoch: virtual epoch size
+            seed: random seed
+        """
+        super().__init__()
+        self.patch_size = patch_size
+        self.stride = stride
+        self.hard_mining_fraction = hard_mining_fraction
+        self.copy_paste_per_class = copy_paste_per_class if mode == "train" else 0
+        self.sigmas = sigmas or {"6nm": 1.0, "12nm": 1.5}
+        self.samples_per_epoch = samples_per_epoch
+        self.mode = mode
+        self._base_seed = seed
+        self.rng = np.random.default_rng(seed)
+        # Split records
+        if mode == "train":
+            self.records = [r for r in records if r.synapse_id != fold_id]
+        elif mode == "val":
+            self.records = [r for r in records if r.synapse_id == fold_id]
+        else:
+            self.records = records
+        # Pre-load all images and annotations into memory (~4MB each × 10 = 40MB)
+        self.images = {}
+        self.masks = {}
+        self.annotations = {}
+        for record in self.records:
+            sid = record.synapse_id
+            self.images[sid] = load_image(record.image_path)
+            if record.mask_path:
+                self.masks[sid] = load_mask(record.mask_path)
+            self.annotations[sid] = load_all_annotations(record, self.images[sid].shape)
+        # Build particle index for hard mining
+        self._build_particle_index()
+        # Build bead bank for copy-paste
+        self.bead_bank = BeadBank()
+        if mode == "train":
+            for sid in self.images:
+                self.bead_bank.extract_from_image(
+                    self.images[sid], self.annotations[sid]
+                )
+        # Augmentation
+        if mode == "train":
+            self.transform = get_train_augmentation()
+        else:
+            self.transform = get_val_augmentation()
+    def _build_particle_index(self):
+        """Build flat index of all particles for hard mining."""
+        self.particle_list = []  # (synapse_id, x, y, class)
+        for sid, annots in self.annotations.items():
+            for cls in ["6nm", "12nm"]:
+                for x, y in annots[cls]:
+                    self.particle_list.append((sid, x, y, cls))
+    @staticmethod
+    def worker_init_fn(worker_id: int):
+        """Re-seed RNG per DataLoader worker to avoid identical sequences."""
+        import torch
+        seed = torch.initial_seed() % (2**32) + worker_id
+        np.random.seed(seed)
+    def __len__(self) -> int:
+        return self.samples_per_epoch
+    def __getitem__(self, idx: int) -> dict:
+        # Reseed RNG using idx so each call produces a unique patch.
+        # Without this, the same 200 patches repeat every epoch → instant overfitting.
+        self.rng = np.random.default_rng(self._base_seed + idx + int(torch.initial_seed() % 100000))
+        """
+        Sample a patch with ground truth heatmap.
+        Returns dict with:
+            'image': (1, patch_size, patch_size) float32 tensor
+            'heatmap': (2, patch_size//stride, patch_size//stride) float32
+            'offsets': (2, patch_size//stride, patch_size//stride) float32
+            'offset_mask': (patch_size//stride, patch_size//stride) bool
+            'conf_map': (2, patch_size//stride, patch_size//stride) float32
+        """
+        # Decide: hard or random patch
+        do_hard = (self.rng.random() < self.hard_mining_fraction
+                   and len(self.particle_list) > 0
+                   and self.mode == "train")
+        if do_hard:
+            # Pick random particle, center patch on it with jitter
+            pidx = self.rng.integers(len(self.particle_list))
+            sid, px, py, _ = self.particle_list[pidx]
+            # Jitter center up to 128px
+            jitter = 128
+            cx = int(px + self.rng.integers(-jitter, jitter + 1))
+            cy = int(py + self.rng.integers(-jitter, jitter + 1))
+        else:
+            # Random image and location
+            sid = list(self.images.keys())[
+                self.rng.integers(len(self.images))
+            ]
+            h, w = self.images[sid].shape[:2]
+            cx = self.rng.integers(self.patch_size // 2, w - self.patch_size // 2)
+            cy = self.rng.integers(self.patch_size // 2, h - self.patch_size // 2)
+        # Extract patch
+        image = self.images[sid]
+        h, w = image.shape[:2]
+        half = self.patch_size // 2
+        # Clamp to image bounds
+        cx = max(half, min(w - half, cx))
+        cy = max(half, min(h - half, cy))
+        x0, x1 = cx - half, cx + half
+        y0, y1 = cy - half, cy + half
+        patch = image[y0:y1, x0:x1].copy()
+        # Pad if needed (edge cases)
+        if patch.shape[0] != self.patch_size or patch.shape[1] != self.patch_size:
+            padded = np.zeros((self.patch_size, self.patch_size), dtype=np.uint8)
+            ph, pw = patch.shape[:2]
+            padded[:ph, :pw] = patch
+            patch = padded
+        # Get annotations within this patch (convert to patch-local coordinates)
+        keypoints = []
+        class_labels = []
+        for cls in ["6nm", "12nm"]:
+            for ax, ay in self.annotations[sid][cls]:
+                # Convert to patch-local coords
+                lx = ax - x0
+                ly = ay - y0
+                if 0 <= lx < self.patch_size and 0 <= ly < self.patch_size:
+                    keypoints.append((lx, ly))
+                    class_labels.append(cls)
+        # Copy-paste augmentation (before geometric transforms)
+        if self.copy_paste_per_class > 0 and self.mode == "train":
+            local_6nm = [(x, y) for (x, y), c in zip(keypoints, class_labels) if c == "6nm"]
+            local_12nm = [(x, y) for (x, y), c in zip(keypoints, class_labels) if c == "12nm"]
+            mask_patch = None
+            if sid in self.masks:
+                mask_patch = self.masks[sid][y0:y1, x0:x1]
+            patch, local_6nm, local_12nm, class_labels = self.bead_bank.paste_beads(
+                patch, local_6nm, local_12nm, class_labels,
+                mask=mask_patch,
+                n_paste_per_class=self.copy_paste_per_class,
+                rng=self.rng,
+            )
+            # Rebuild keypoints from updated coords
+            keypoints = [(x, y) for x, y in local_6nm] + [(x, y) for x, y in local_12nm]
+            class_labels = ["6nm"] * len(local_6nm) + ["12nm"] * len(local_12nm)
+        # Apply augmentation (co-transforms keypoints)
+        transformed = self.transform(
+            image=patch,
+            keypoints=keypoints,
+            class_labels=class_labels,
+        )
+        patch_aug = transformed["image"]
+        kp_aug = transformed["keypoints"]
+        cl_aug = transformed["class_labels"]
+        # Separate keypoints by class
+        coords_6nm = np.array(
+            [(x, y) for (x, y), c in zip(kp_aug, cl_aug) if c == "6nm"],
+            dtype=np.float64,
+        ).reshape(-1, 2)
+        coords_12nm = np.array(
+            [(x, y) for (x, y), c in zip(kp_aug, cl_aug) if c == "12nm"],
+            dtype=np.float64,
+        ).reshape(-1, 2)
+        # Generate heatmap GT from TRANSFORMED coordinates (never warp heatmap)
+        heatmap, offsets, offset_mask, conf_map = generate_heatmap_gt(
+            coords_6nm, coords_12nm,
+            self.patch_size, self.patch_size,
+            sigmas=self.sigmas,
+            stride=self.stride,
+        )
+        # Convert to tensors
+        patch_tensor = torch.from_numpy(patch_aug).float().unsqueeze(0) / 255.0
+        return {
+            "image": patch_tensor,
+            "heatmap": torch.from_numpy(heatmap),
+            "offsets": torch.from_numpy(offsets),
+            "offset_mask": torch.from_numpy(offset_mask),
+            "conf_map": torch.from_numpy(conf_map),
+        }

src/ensemble.py ADDED Viewed

	@@ -0,0 +1,236 @@

+"""
+Test-time augmentation (D4 dihedral group) and model ensemble averaging.
+D4 TTA: 4 rotations x 2 reflections = 8 geometric views
++ 2 intensity variants = 10 total forward passes.
+Gold beads are rotationally invariant — D4 TTA is maximally effective.
+Expected F1 gain: +1-3% over single forward pass.
+"""
+import numpy as np
+import torch
+import torch.nn.functional as F
+from typing import List, Optional
+from src.model import ImmunogoldCenterNet
+def d4_tta_predict(
+    model: ImmunogoldCenterNet,
+    image: np.ndarray,
+    device: torch.device = torch.device("cpu"),
+) -> tuple:
+    """
+    Test-time augmentation over D4 dihedral group + intensity variants.
+    Args:
+        model: trained CenterNet model
+        image: (H, W) uint8 preprocessed image
+        device: torch device
+    Returns:
+        averaged_heatmap: (2, H/2, W/2) numpy array
+        averaged_offsets: (2, H/2, W/2) numpy array
+    """
+    model.eval()
+    heatmaps = []
+    offsets_list = []
+    # Ensure image dimensions are divisible by 32 for the encoder
+    h, w = image.shape[:2]
+    pad_h = (32 - h % 32) % 32
+    pad_w = (32 - w % 32) % 32
+    def _forward(img_np):
+        """Run model on numpy image, return heatmap and offsets."""
+        # Pad to multiple of 32
+        if pad_h > 0 or pad_w > 0:
+            img_np = np.pad(img_np, ((0, pad_h), (0, pad_w)), mode="reflect")
+        tensor = (
+            torch.from_numpy(img_np)
+            .float()
+            .unsqueeze(0)
+            .unsqueeze(0)  # (1, 1, H, W)
+            / 255.0
+        ).to(device)
+        with torch.no_grad():
+            hm, off = model(tensor)
+        hm = hm.squeeze(0).cpu().numpy()   # (2, H/2, W/2)
+        off = off.squeeze(0).cpu().numpy()  # (2, H/2, W/2)
+        # Remove padding from output
+        hm_h = h // 2
+        hm_w = w // 2
+        return hm[:, :hm_h, :hm_w], off[:, :hm_h, :hm_w]
+    # D4 group: 4 rotations x 2 reflections = 8 geometric views
+    for k in range(4):
+        for flip in [False, True]:
+            aug = np.rot90(image, k).copy()
+            if flip:
+                aug = np.fliplr(aug).copy()
+            hm, off = _forward(aug)
+            # Inverse transforms on heatmap and offsets
+            if flip:
+                hm = np.flip(hm, axis=2).copy()   # flip W axis
+                off = np.flip(off, axis=2).copy()
+                off[0] = -off[0]  # negate x offset for horizontal flip
+            if k > 0:
+                hm = np.rot90(hm, -k, axes=(1, 2)).copy()
+                off = np.rot90(off, -k, axes=(1, 2)).copy()
+                # Rotate offset vectors
+                if k == 1:  # 90° CCW undo
+                    off = np.stack([-off[1], off[0]], axis=0)
+                elif k == 2:  # 180°
+                    off = np.stack([-off[0], -off[1]], axis=0)
+                elif k == 3:  # 270° CCW undo
+                    off = np.stack([off[1], -off[0]], axis=0)
+            heatmaps.append(hm)
+            offsets_list.append(off)
+    # 2 intensity variants
+    for factor in [0.9, 1.1]:
+        aug = np.clip(image.astype(np.float32) * factor, 0, 255).astype(np.uint8)
+        hm, off = _forward(aug)
+        heatmaps.append(hm)
+        offsets_list.append(off)
+    # Average all views
+    avg_heatmap = np.mean(heatmaps, axis=0)
+    avg_offsets = np.mean(offsets_list, axis=0)
+    return avg_heatmap, avg_offsets
+def ensemble_predict(
+    models: List[ImmunogoldCenterNet],
+    image: np.ndarray,
+    device: torch.device = torch.device("cpu"),
+    use_tta: bool = True,
+) -> tuple:
+    """
+    Ensemble prediction: average heatmaps from N models.
+    Args:
+        models: list of trained models (e.g., 5 seeds x 3 snapshots = 15)
+        image: (H, W) uint8 preprocessed image
+        device: torch device
+        use_tta: whether to apply D4 TTA per model
+    Returns:
+        averaged_heatmap: (2, H/2, W/2) numpy array
+        averaged_offsets: (2, H/2, W/2) numpy array
+    """
+    all_heatmaps = []
+    all_offsets = []
+    for model in models:
+        model.eval()
+        model.to(device)
+        if use_tta:
+            hm, off = d4_tta_predict(model, image, device)
+        else:
+            h, w = image.shape[:2]
+            pad_h = (32 - h % 32) % 32
+            pad_w = (32 - w % 32) % 32
+            img_padded = np.pad(image, ((0, pad_h), (0, pad_w)), mode="reflect")
+            tensor = (
+                torch.from_numpy(img_padded)
+                .float()
+                .unsqueeze(0)
+                .unsqueeze(0)
+                / 255.0
+            ).to(device)
+            with torch.no_grad():
+                hm_t, off_t = model(tensor)
+            hm = hm_t.squeeze(0).cpu().numpy()[:, : h // 2, : w // 2]
+            off = off_t.squeeze(0).cpu().numpy()[:, : h // 2, : w // 2]
+        all_heatmaps.append(hm)
+        all_offsets.append(off)
+    return np.mean(all_heatmaps, axis=0), np.mean(all_offsets, axis=0)
+def sliding_window_inference(
+    model: ImmunogoldCenterNet,
+    image: np.ndarray,
+    patch_size: int = 512,
+    overlap: int = 128,
+    device: torch.device = torch.device("cpu"),
+) -> tuple:
+    """
+    Full-image inference via sliding window with overlap stitching.
+    Tiles the image into overlapping patches, runs the model on each,
+    and stitches heatmaps using max in overlap regions.
+    Args:
+        model: trained model
+        image: (H, W) uint8 preprocessed image
+        patch_size: tile size
+        overlap: overlap between tiles
+        device: torch device
+    Returns:
+        heatmap: (2, H/2, W/2) numpy array
+        offsets: (2, H/2, W/2) numpy array
+    """
+    model.eval()
+    h, w = image.shape[:2]
+    stride_step = patch_size - overlap
+    # Output dimensions at model stride
+    out_h = h // 2
+    out_w = w // 2
+    out_patch = patch_size // 2
+    heatmap = np.zeros((2, out_h, out_w), dtype=np.float32)
+    offsets = np.zeros((2, out_h, out_w), dtype=np.float32)
+    count = np.zeros((out_h, out_w), dtype=np.float32)
+    for y0 in range(0, h - patch_size + 1, stride_step):
+        for x0 in range(0, w - patch_size + 1, stride_step):
+            patch = image[y0 : y0 + patch_size, x0 : x0 + patch_size]
+            tensor = (
+                torch.from_numpy(patch)
+                .float()
+                .unsqueeze(0)
+                .unsqueeze(0)
+                / 255.0
+            ).to(device)
+            with torch.no_grad():
+                hm, off = model(tensor)
+            hm_np = hm.squeeze(0).cpu().numpy()
+            off_np = off.squeeze(0).cpu().numpy()
+            # Output coordinates
+            oy0 = y0 // 2
+            ox0 = x0 // 2
+            # Max-stitch heatmap, average-stitch offsets
+            heatmap[:, oy0 : oy0 + out_patch, ox0 : ox0 + out_patch] = np.maximum(
+                heatmap[:, oy0 : oy0 + out_patch, ox0 : ox0 + out_patch],
+                hm_np,
+            )
+            offsets[:, oy0 : oy0 + out_patch, ox0 : ox0 + out_patch] += off_np
+            count[oy0 : oy0 + out_patch, ox0 : ox0 + out_patch] += 1
+    # Average offsets where counted
+    count = np.maximum(count, 1)
+    offsets /= count[np.newaxis, :, :]
+    return heatmap, offsets

src/evaluate.py ADDED Viewed

	@@ -0,0 +1,203 @@

+"""
+Evaluation: Hungarian matching, per-class metrics, LOOCV runner.
+Uses scipy linear_sum_assignment for optimal bipartite matching between
+predictions and ground truth with class-specific match radii.
+"""
+import numpy as np
+from scipy.optimize import linear_sum_assignment
+from scipy.spatial.distance import cdist
+from typing import Dict, List, Optional, Tuple
+def compute_f1(tp: int, fp: int, fn: int, eps: float = 1e-6) -> Tuple[float, float, float]:
+    """Compute F1, precision, recall from TP/FP/FN counts."""
+    precision = tp / (tp + fp + eps)
+    recall = tp / (tp + fn + eps)
+    f1 = 2 * precision * recall / (precision + recall + eps)
+    return f1, precision, recall
+def match_detections_to_gt(
+    detections: List[dict],
+    gt_coords_6nm: np.ndarray,
+    gt_coords_12nm: np.ndarray,
+    match_radii: Optional[Dict[str, float]] = None,
+) -> Dict[str, dict]:
+    """
+    Hungarian matching between predictions and ground truth.
+    A detection matches GT only if:
+    1. Euclidean distance < match_radius[class]
+    2. Predicted class == GT class
+    Args:
+        detections: list of {'x', 'y', 'class', 'conf'}
+        gt_coords_6nm: (N, 2) array of (x, y) GT for 6nm
+        gt_coords_12nm: (M, 2) array of (x, y) GT for 12nm
+        match_radii: per-class match radius in pixels
+    Returns:
+        Dict with per-class and overall TP/FP/FN/F1/precision/recall.
+    """
+    if match_radii is None:
+        match_radii = {"6nm": 9.0, "12nm": 15.0}
+    gt_by_class = {"6nm": gt_coords_6nm, "12nm": gt_coords_12nm}
+    results = {}
+    total_tp = 0
+    total_fp = 0
+    total_fn = 0
+    for cls in ["6nm", "12nm"]:
+        cls_dets = [d for d in detections if d["class"] == cls]
+        gt = gt_by_class[cls]
+        radius = match_radii[cls]
+        if len(cls_dets) == 0 and len(gt) == 0:
+            results[cls] = {
+                "tp": 0, "fp": 0, "fn": 0,
+                "f1": 1.0, "precision": 1.0, "recall": 1.0,
+            }
+            continue
+        if len(cls_dets) == 0:
+            results[cls] = {
+                "tp": 0, "fp": 0, "fn": len(gt),
+                "f1": 0.0, "precision": 0.0, "recall": 0.0,
+            }
+            total_fn += len(gt)
+            continue
+        if len(gt) == 0:
+            results[cls] = {
+                "tp": 0, "fp": len(cls_dets), "fn": 0,
+                "f1": 0.0, "precision": 0.0, "recall": 0.0,
+            }
+            total_fp += len(cls_dets)
+            continue
+        # Build cost matrix
+        pred_coords = np.array([[d["x"], d["y"]] for d in cls_dets])
+        cost = cdist(pred_coords, gt)
+        # Set costs beyond radius to a large value (forbid match)
+        cost_masked = cost.copy()
+        cost_masked[cost_masked > radius] = 1e6
+        # Hungarian matching
+        row_ind, col_ind = linear_sum_assignment(cost_masked)
+        # Count valid matches (within radius)
+        tp = sum(
+            1 for r, c in zip(row_ind, col_ind)
+            if cost_masked[r, c] <= radius
+        )
+        fp = len(cls_dets) - tp
+        fn = len(gt) - tp
+        f1, prec, rec = compute_f1(tp, fp, fn)
+        results[cls] = {
+            "tp": tp, "fp": fp, "fn": fn,
+            "f1": f1, "precision": prec, "recall": rec,
+        }
+        total_tp += tp
+        total_fp += fp
+        total_fn += fn
+    # Overall
+    f1_overall, prec_overall, rec_overall = compute_f1(total_tp, total_fp, total_fn)
+    results["overall"] = {
+        "tp": total_tp, "fp": total_fp, "fn": total_fn,
+        "f1": f1_overall, "precision": prec_overall, "recall": rec_overall,
+    }
+    # Mean F1 across classes
+    class_f1s = [results[c]["f1"] for c in ["6nm", "12nm"] if results[c]["fn"] + results[c]["tp"] > 0]
+    results["mean_f1"] = np.mean(class_f1s) if class_f1s else 0.0
+    return results
+def evaluate_fold(
+    detections: List[dict],
+    gt_annotations: Dict[str, np.ndarray],
+    match_radii: Optional[Dict[str, float]] = None,
+    has_6nm: bool = True,
+) -> Dict[str, dict]:
+    """
+    Evaluate detections for a single LOOCV fold.
+    Args:
+        detections: model predictions
+        gt_annotations: {'6nm': Nx2, '12nm': Mx2}
+        match_radii: per-class match radii
+        has_6nm: whether this fold has 6nm GT (False for S7, S15)
+    Returns:
+        Evaluation metrics dict.
+    """
+    gt_6nm = gt_annotations.get("6nm", np.empty((0, 2)))
+    gt_12nm = gt_annotations.get("12nm", np.empty((0, 2)))
+    results = match_detections_to_gt(detections, gt_6nm, gt_12nm, match_radii)
+    if not has_6nm:
+        results["6nm"]["note"] = "N/A (missing annotations)"
+    return results
+def compute_average_precision(
+    detections: List[dict],
+    gt_coords: np.ndarray,
+    match_radius: float,
+) -> float:
+    """
+    Compute Average Precision (AP) for a single class.
+    Follows PASCAL VOC style: sort by confidence, compute precision-recall
+    curve, then compute area under curve.
+    """
+    if len(gt_coords) == 0:
+        return 0.0 if detections else 1.0
+    # Sort by confidence descending
+    sorted_dets = sorted(detections, key=lambda d: d["conf"], reverse=True)
+    tp_list = []
+    fp_list = []
+    matched_gt = set()
+    for det in sorted_dets:
+        det_coord = np.array([det["x"], det["y"]])
+        dists = np.sqrt(np.sum((gt_coords - det_coord) ** 2, axis=1))
+        min_idx = np.argmin(dists)
+        if dists[min_idx] <= match_radius and min_idx not in matched_gt:
+            tp_list.append(1)
+            fp_list.append(0)
+            matched_gt.add(min_idx)
+        else:
+            tp_list.append(0)
+            fp_list.append(1)
+    tp_cumsum = np.cumsum(tp_list)
+    fp_cumsum = np.cumsum(fp_list)
+    precision = tp_cumsum / (tp_cumsum + fp_cumsum)
+    recall = tp_cumsum / len(gt_coords)
+    # Compute AP using all-point interpolation
+    ap = 0.0
+    for i in range(len(precision)):
+        if i == 0:
+            ap += precision[i] * recall[i]
+        else:
+            ap += precision[i] * (recall[i] - recall[i - 1])
+    return ap

src/heatmap.py ADDED Viewed

	@@ -0,0 +1,187 @@

+"""
+Ground truth heatmap generation and peak extraction for CenterNet.
+Generates Gaussian-splat heatmaps at stride-2 resolution with
+class-specific sigma values calibrated to bead size.
+"""
+import numpy as np
+import torch
+import torch.nn.functional as F
+from typing import Dict, List, Tuple, Optional
+# Class index mapping
+CLASS_IDX = {"6nm": 0, "12nm": 1}
+CLASS_NAMES = ["6nm", "12nm"]
+STRIDE = 2
+def generate_heatmap_gt(
+    coords_6nm: np.ndarray,
+    coords_12nm: np.ndarray,
+    image_h: int,
+    image_w: int,
+    sigmas: Optional[Dict[str, float]] = None,
+    stride: int = STRIDE,
+    confidence_6nm: Optional[np.ndarray] = None,
+    confidence_12nm: Optional[np.ndarray] = None,
+) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+    """
+    Generate CenterNet ground truth heatmaps and offset maps.
+    Args:
+        coords_6nm: (N, 2) array of (x, y) in ORIGINAL pixel space
+        coords_12nm: (M, 2) array of (x, y) in ORIGINAL pixel space
+        image_h: original image height
+        image_w: original image width
+        sigmas: per-class Gaussian sigma in feature space
+        stride: output stride (default 2)
+        confidence_6nm: optional per-particle confidence weights
+        confidence_12nm: optional per-particle confidence weights
+    Returns:
+        heatmap: (2, H//stride, W//stride) float32 in [0, 1]
+        offsets: (2, H//stride, W//stride) float32 sub-pixel offsets
+        offset_mask: (H//stride, W//stride) bool — True at particle centers
+        conf_map: (2, H//stride, W//stride) float32 confidence weights
+    """
+    if sigmas is None:
+        sigmas = {"6nm": 1.0, "12nm": 1.5}
+    h_feat = image_h // stride
+    w_feat = image_w // stride
+    heatmap = np.zeros((2, h_feat, w_feat), dtype=np.float32)
+    offsets = np.zeros((2, h_feat, w_feat), dtype=np.float32)
+    offset_mask = np.zeros((h_feat, w_feat), dtype=bool)
+    conf_map = np.ones((2, h_feat, w_feat), dtype=np.float32)
+    # Prepare coordinate lists with class labels and confidences
+    all_entries = []
+    if len(coords_6nm) > 0:
+        confs = confidence_6nm if confidence_6nm is not None else np.ones(len(coords_6nm))
+        for i, (x, y) in enumerate(coords_6nm):
+            all_entries.append((x, y, "6nm", confs[i]))
+    if len(coords_12nm) > 0:
+        confs = confidence_12nm if confidence_12nm is not None else np.ones(len(coords_12nm))
+        for i, (x, y) in enumerate(coords_12nm):
+            all_entries.append((x, y, "12nm", confs[i]))
+    for x, y, cls, conf in all_entries:
+        cidx = CLASS_IDX[cls]
+        sigma = sigmas[cls]
+        # Feature-space center (float)
+        cx_f = x / stride
+        cy_f = y / stride
+        # Integer grid center
+        cx_i = int(round(cx_f))
+        cy_i = int(round(cy_f))
+        # Sub-pixel offset
+        off_x = cx_f - cx_i
+        off_y = cy_f - cy_i
+        # Gaussian radius: truncate at 3 sigma
+        r = max(int(3 * sigma + 1), 2)
+        # Bounds-clipped grid
+        y0 = max(0, cy_i - r)
+        y1 = min(h_feat, cy_i + r + 1)
+        x0 = max(0, cx_i - r)
+        x1 = min(w_feat, cx_i + r + 1)
+        if y0 >= y1 or x0 >= x1:
+            continue
+        yy, xx = np.meshgrid(
+            np.arange(y0, y1),
+            np.arange(x0, x1),
+            indexing="ij",
+        )
+        # Gaussian centered at INTEGER center (not float)
+        # The integer center MUST be exactly 1.0 — the CornerNet focal loss
+        # uses pos_mask = (gt == 1.0) and treats everything else as negative.
+        # Centering the Gaussian at the float position produces peaks of 0.78-0.93
+        # which the loss sees as negatives → zero positive training signal.
+        gaussian = np.exp(
+            -((xx - cx_i) ** 2 + (yy - cy_i) ** 2) / (2 * sigma ** 2)
+        )
+        # Scale by confidence (for pseudo-label weighting)
+        gaussian = gaussian * conf
+        # Element-wise max (handles overlapping particles correctly)
+        heatmap[cidx, y0:y1, x0:x1] = np.maximum(
+            heatmap[cidx, y0:y1, x0:x1], gaussian
+        )
+        # Offset and confidence only at the integer center pixel
+        if 0 <= cy_i < h_feat and 0 <= cx_i < w_feat:
+            offsets[0, cy_i, cx_i] = off_x
+            offsets[1, cy_i, cx_i] = off_y
+            offset_mask[cy_i, cx_i] = True
+            conf_map[cidx, cy_i, cx_i] = conf
+    return heatmap, offsets, offset_mask, conf_map
+def extract_peaks(
+    heatmap: torch.Tensor,
+    offset_map: torch.Tensor,
+    stride: int = STRIDE,
+    conf_threshold: float = 0.3,
+    nms_kernel_sizes: Optional[Dict[str, int]] = None,
+) -> List[dict]:
+    """
+    Extract detections from predicted heatmap via max-pool NMS.
+    Args:
+        heatmap: (2, H/stride, W/stride) sigmoid-activated
+        offset_map: (2, H/stride, W/stride) raw offset predictions
+        stride: output stride
+        conf_threshold: minimum confidence to keep
+        nms_kernel_sizes: per-class NMS kernel sizes
+    Returns:
+        List of {'x': float, 'y': float, 'class': str, 'conf': float}
+    """
+    if nms_kernel_sizes is None:
+        nms_kernel_sizes = {"6nm": 3, "12nm": 5}
+    detections = []
+    for cls_idx, cls_name in enumerate(CLASS_NAMES):
+        hm_cls = heatmap[cls_idx].unsqueeze(0).unsqueeze(0)  # (1,1,H,W)
+        kernel = nms_kernel_sizes[cls_name]
+        # Max-pool NMS
+        hmax = F.max_pool2d(
+            hm_cls, kernel_size=kernel, stride=1, padding=kernel // 2
+        )
+        peaks = (hmax.squeeze() == heatmap[cls_idx]) & (
+            heatmap[cls_idx] > conf_threshold
+        )
+        ys, xs = torch.where(peaks)
+        for y_idx, x_idx in zip(ys, xs):
+            y_i = y_idx.item()
+            x_i = x_idx.item()
+            conf = heatmap[cls_idx, y_i, x_i].item()
+            dx = offset_map[0, y_i, x_i].item()
+            dy = offset_map[1, y_i, x_i].item()
+            # Back to input space with sub-pixel offset
+            det_x = (x_i + dx) * stride
+            det_y = (y_i + dy) * stride
+            detections.append({
+                "x": det_x,
+                "y": det_y,
+                "class": cls_name,
+                "conf": conf,
+            })
+    return detections

src/loss.py ADDED Viewed

	@@ -0,0 +1,137 @@

+"""
+Loss functions for CenterNet immunogold detection.
+Implements CornerNet penalty-reduced focal loss for sparse heatmaps
+and smooth L1 offset regression loss.
+"""
+import torch
+import torch.nn.functional as F
+def cornernet_focal_loss(
+    pred: torch.Tensor,
+    gt: torch.Tensor,
+    alpha: int = 2,
+    beta: int = 4,
+    conf_weights: torch.Tensor = None,
+    eps: float = 1e-6,
+) -> torch.Tensor:
+    """
+    CornerNet penalty-reduced focal loss for sparse heatmaps.
+    The positive:negative pixel ratio is ~1:23,000 per channel.
+    Standard BCE would learn to predict all zeros. This loss
+    penalizes confident wrong predictions and rewards uncertain
+    correct ones via the (1-p)^alpha and p^alpha terms.
+    Args:
+        pred: (B, C, H, W) sigmoid-activated predictions in [0, 1]
+        gt: (B, C, H, W) Gaussian heatmap targets in [0, 1]
+        alpha: focal exponent for prediction confidence (default 2)
+        beta: penalty reduction exponent near GT peaks (default 4)
+        conf_weights: optional (B, C, H, W) per-pixel confidence weights
+                      for pseudo-label weighting
+        eps: numerical stability
+    Returns:
+        Scalar loss, normalized by number of positive locations.
+    """
+    pos_mask = (gt == 1).float()
+    neg_mask = (gt < 1).float()
+    # Penalty reduction: pixels near particle centers get lower negative penalty
+    # (1 - gt)^beta → 0 near peaks, → 1 far from peaks
+    neg_weights = torch.pow(1 - gt, beta)
+    # Positive loss: encourage high confidence at GT peaks
+    pos_loss = torch.log(pred.clamp(min=eps)) * torch.pow(1 - pred, alpha) * pos_mask
+    # Negative loss: penalize high confidence away from GT peaks
+    neg_loss = (
+        torch.log((1 - pred).clamp(min=eps))
+        * torch.pow(pred, alpha)
+        * neg_weights
+        * neg_mask
+    )
+    # Apply confidence weighting if provided (for pseudo-label support)
+    if conf_weights is not None:
+        pos_loss = pos_loss * conf_weights
+        # Negative loss near pseudo-labels also scaled
+        neg_loss = neg_loss * conf_weights
+    num_pos = pos_mask.sum().clamp(min=1)
+    loss = -(pos_loss.sum() + neg_loss.sum()) / num_pos
+    return loss
+def offset_loss(
+    pred_offsets: torch.Tensor,
+    gt_offsets: torch.Tensor,
+    mask: torch.Tensor,
+) -> torch.Tensor:
+    """
+    Smooth L1 loss on sub-pixel offsets at annotated particle locations only.
+    Args:
+        pred_offsets: (B, 2, H, W) predicted offsets
+        gt_offsets: (B, 2, H, W) ground truth offsets
+        mask: (B, H, W) boolean — True at particle integer centers
+    Returns:
+        Scalar loss.
+    """
+    # Expand mask to match offset dimensions
+    mask_expanded = mask.unsqueeze(1).expand_as(pred_offsets)
+    if mask_expanded.sum() == 0:
+        return torch.tensor(0.0, device=pred_offsets.device, requires_grad=True)
+    loss = F.smooth_l1_loss(
+        pred_offsets[mask_expanded],
+        gt_offsets[mask_expanded],
+        reduction="mean",
+    )
+    return loss
+def total_loss(
+    heatmap_pred: torch.Tensor,
+    heatmap_gt: torch.Tensor,
+    offset_pred: torch.Tensor,
+    offset_gt: torch.Tensor,
+    offset_mask: torch.Tensor,
+    lambda_offset: float = 1.0,
+    focal_alpha: int = 2,
+    focal_beta: int = 4,
+    conf_weights: torch.Tensor = None,
+) -> tuple:
+    """
+    Combined heatmap focal loss + offset regression loss.
+    Args:
+        heatmap_pred: (B, 2, H, W) sigmoid predictions
+        heatmap_gt: (B, 2, H, W) Gaussian GT
+        offset_pred: (B, 2, H, W) predicted offsets
+        offset_gt: (B, 2, H, W) GT offsets
+        offset_mask: (B, H, W) boolean mask
+        lambda_offset: weight for offset loss (default 1.0)
+        focal_alpha: focal loss alpha
+        focal_beta: focal loss beta
+        conf_weights: optional per-pixel confidence weights
+    Returns:
+        (total_loss, heatmap_loss_value, offset_loss_value)
+    """
+    l_hm = cornernet_focal_loss(
+        heatmap_pred, heatmap_gt,
+        alpha=focal_alpha, beta=focal_beta,
+        conf_weights=conf_weights,
+    )
+    l_off = offset_loss(offset_pred, offset_gt, offset_mask)
+    total = l_hm + lambda_offset * l_off
+    return total, l_hm.item(), l_off.item()

src/model.py ADDED Viewed

	@@ -0,0 +1,382 @@

+"""
+CenterNet with CEM500K-pretrained ResNet-50 backbone for immunogold detection.
+Architecture:
+    Input:   1ch grayscale, variable size (padded to multiple of 32)
+    Encoder: CEM500K ResNet-50 (pretrained), conv1 adapted for 1ch input
+    Neck:    BiFPN (2 rounds, 128ch)
+    Decoder: Transposed conv → stride-2 output
+    Heads:   Heatmap (2ch sigmoid), Offset (2ch)
+    Output:  Stride-2 maps → (H/2, W/2) resolution
+Output stride is 2, NOT 4 or 8. At stride 4, a 6nm bead (4-6px radius)
+collapses to 1px in feature space — insufficient for detection.
+At stride 2, same bead occupies 2-3px, enough for Gaussian peak extraction.
+"""
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision.models as models
+from typing import List, Optional
+# ---------------------------------------------------------------------------
+# BiFPN: Bidirectional Feature Pyramid Network
+# ---------------------------------------------------------------------------
+class DepthwiseSeparableConv(nn.Module):
+    """Depthwise separable convolution as used in BiFPN."""
+    def __init__(self, in_ch: int, out_ch: int, kernel_size: int = 3,
+                 stride: int = 1, padding: int = 1):
+        super().__init__()
+        self.depthwise = nn.Conv2d(
+            in_ch, in_ch, kernel_size, stride=stride,
+            padding=padding, groups=in_ch, bias=False,
+        )
+        self.pointwise = nn.Conv2d(in_ch, out_ch, 1, bias=False)
+        self.bn = nn.BatchNorm2d(out_ch)
+        self.act = nn.ReLU(inplace=True)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.act(self.bn(self.pointwise(self.depthwise(x))))
+class BiFPNFusionNode(nn.Module):
+    """
+    Single BiFPN fusion node with fast normalized weighted fusion.
+    w_normalized = relu(w) / (sum(relu(w)) + eps)
+    output = conv(sum(w_i * input_i))
+    """
+    def __init__(self, channels: int, n_inputs: int = 2, eps: float = 1e-4):
+        super().__init__()
+        self.eps = eps
+        # Learnable fusion weights
+        self.weights = nn.Parameter(torch.ones(n_inputs, dtype=torch.float32))
+        self.conv = DepthwiseSeparableConv(channels, channels)
+    def forward(self, inputs: List[torch.Tensor]) -> torch.Tensor:
+        # Fast normalized fusion
+        w = F.relu(self.weights)
+        w_norm = w / (w.sum() + self.eps)
+        fused = sum(w_i * inp for w_i, inp in zip(w_norm, inputs))
+        return self.conv(fused)
+class BiFPNLayer(nn.Module):
+    """
+    One round of BiFPN: top-down + bottom-up bidirectional fusion.
+    Input levels: P2 (stride 4), P3 (stride 8), P4 (stride 16), P5 (stride 32)
+    """
+    def __init__(self, channels: int):
+        super().__init__()
+        # Top-down fusion nodes (P5 → P4_td, P4_td+P3 → P3_td, P3_td+P2 → P2_td)
+        self.td_p4 = BiFPNFusionNode(channels, n_inputs=2)
+        self.td_p3 = BiFPNFusionNode(channels, n_inputs=2)
+        self.td_p2 = BiFPNFusionNode(channels, n_inputs=2)
+        # Bottom-up fusion nodes (combine top-down outputs with original)
+        self.bu_p3 = BiFPNFusionNode(channels, n_inputs=3)  # p3_orig + p3_td + p2_out
+        self.bu_p4 = BiFPNFusionNode(channels, n_inputs=3)  # p4_orig + p4_td + p3_out
+        self.bu_p5 = BiFPNFusionNode(channels, n_inputs=2)  # p5_orig + p4_out
+    def forward(self, features: List[torch.Tensor]) -> List[torch.Tensor]:
+        """
+        Args:
+            features: [P2, P3, P4, P5] at channels ch, with decreasing spatial dims
+        Returns:
+            [P2_out, P3_out, P4_out, P5_out]
+        """
+        p2, p3, p4, p5 = features
+        # --- Top-down pathway ---
+        # P5 → upscale → fuse with P4
+        p5_up = F.interpolate(p5, size=p4.shape[2:], mode="nearest")
+        p4_td = self.td_p4([p4, p5_up])
+        # P4_td → upscale → fuse with P3
+        p4_td_up = F.interpolate(p4_td, size=p3.shape[2:], mode="nearest")
+        p3_td = self.td_p3([p3, p4_td_up])
+        # P3_td → upscale → fuse with P2
+        p3_td_up = F.interpolate(p3_td, size=p2.shape[2:], mode="nearest")
+        p2_td = self.td_p2([p2, p3_td_up])
+        # --- Bottom-up pathway ---
+        p2_out = p2_td
+        # P2_out → downsample → fuse with P3_td and P3_orig
+        p2_down = F.max_pool2d(p2_out, kernel_size=2)
+        p3_out = self.bu_p3([p3, p3_td, p2_down])
+        # P3_out → downsample → fuse with P4_td and P4_orig
+        p3_down = F.max_pool2d(p3_out, kernel_size=2)
+        p4_out = self.bu_p4([p4, p4_td, p3_down])
+        # P4_out → downsample → fuse with P5_orig
+        p4_down = F.max_pool2d(p4_out, kernel_size=2)
+        p5_out = self.bu_p5([p5, p4_down])
+        return [p2_out, p3_out, p4_out, p5_out]
+class BiFPN(nn.Module):
+    """Multi-round BiFPN with lateral projections."""
+    def __init__(self, in_channels: List[int], out_channels: int = 128,
+                 num_rounds: int = 2):
+        super().__init__()
+        # Lateral 1x1 projections to unify channel count
+        self.laterals = nn.ModuleList([
+            nn.Sequential(
+                nn.Conv2d(in_ch, out_channels, 1, bias=False),
+                nn.BatchNorm2d(out_channels),
+                nn.ReLU(inplace=True),
+            )
+            for in_ch in in_channels
+        ])
+        # BiFPN rounds
+        self.rounds = nn.ModuleList([
+            BiFPNLayer(out_channels) for _ in range(num_rounds)
+        ])
+    def forward(self, features: List[torch.Tensor]) -> List[torch.Tensor]:
+        # Project to uniform channels
+        projected = [lat(feat) for lat, feat in zip(self.laterals, features)]
+        # Run BiFPN rounds
+        for bifpn_round in self.rounds:
+            projected = bifpn_round(projected)
+        return projected
+# ---------------------------------------------------------------------------
+# Detection Heads
+# ---------------------------------------------------------------------------
+class HeatmapHead(nn.Module):
+    """Heatmap prediction head at stride-2 resolution."""
+    def __init__(self, in_channels: int = 64, num_classes: int = 2):
+        super().__init__()
+        self.conv1 = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = nn.Conv2d(64, num_classes, kernel_size=1)
+        # Initialize final conv bias for focal loss: -log((1-pi)/pi) where pi=0.01
+        # This prevents the network from producing high false positive rate early
+        nn.init.constant_(self.conv2.bias, -math.log((1 - 0.01) / 0.01))
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.relu(self.bn1(self.conv1(x)))
+        return torch.sigmoid(self.conv2(x))
+class OffsetHead(nn.Module):
+    """Sub-pixel offset regression head."""
+    def __init__(self, in_channels: int = 64):
+        super().__init__()
+        self.conv1 = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = nn.Conv2d(64, 2, kernel_size=1)  # dx, dy
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.relu(self.bn1(self.conv1(x)))
+        return self.conv2(x)
+# ---------------------------------------------------------------------------
+# Full CenterNet Model
+# ---------------------------------------------------------------------------
+class ImmunogoldCenterNet(nn.Module):
+    """
+    CenterNet with CEM500K-pretrained ResNet-50 backbone.
+    Detects 6nm and 12nm immunogold particles at stride-2 resolution.
+    """
+    def __init__(
+        self,
+        pretrained_path: Optional[str] = None,
+        bifpn_channels: int = 128,
+        bifpn_rounds: int = 2,
+        num_classes: int = 2,
+    ):
+        super().__init__()
+        self.num_classes = num_classes
+        # --- Encoder: ResNet-50 ---
+        backbone = models.resnet50(weights=None)
+        # Adapt conv1 for 1-channel grayscale input
+        backbone.conv1 = nn.Conv2d(
+            1, 64, kernel_size=7, stride=2, padding=3, bias=False,
+        )
+        # Load pretrained weights
+        if pretrained_path:
+            self._load_pretrained(backbone, pretrained_path)
+        else:
+            # Use ImageNet weights as fallback, adapting conv1
+            imagenet_backbone = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
+            state = imagenet_backbone.state_dict()
+            # Mean-pool RGB conv1 weights → grayscale
+            state["conv1.weight"] = state["conv1.weight"].mean(dim=1, keepdim=True)
+            backbone.load_state_dict(state, strict=False)
+        # Extract encoder stages
+        self.stem = nn.Sequential(
+            backbone.conv1, backbone.bn1, backbone.relu, backbone.maxpool,
+        )
+        self.layer1 = backbone.layer1  # 256ch, stride 4
+        self.layer2 = backbone.layer2  # 512ch, stride 8
+        self.layer3 = backbone.layer3  # 1024ch, stride 16
+        self.layer4 = backbone.layer4  # 2048ch, stride 32
+        # --- BiFPN Neck ---
+        self.bifpn = BiFPN(
+            in_channels=[256, 512, 1024, 2048],
+            out_channels=bifpn_channels,
+            num_rounds=bifpn_rounds,
+        )
+        # --- Decoder: upsample P2 (stride 4) → stride 2 ---
+        self.upsample = nn.Sequential(
+            nn.ConvTranspose2d(
+                bifpn_channels, 64, kernel_size=4, stride=2, padding=1, bias=False,
+            ),
+            nn.BatchNorm2d(64),
+            nn.ReLU(inplace=True),
+        )
+        # --- Detection Heads (at stride-2 resolution) ---
+        self.heatmap_head = HeatmapHead(64, num_classes)
+        self.offset_head = OffsetHead(64)
+    def _load_pretrained(self, backbone: nn.Module, path: str):
+        """Load CEM500K MoCoV2 pretrained weights."""
+        ckpt = torch.load(path, map_location="cpu", weights_only=False)
+        state = {}
+        # CEM500K uses MoCo format: keys prefixed with 'module.encoder_q.'
+        src_state = ckpt.get("state_dict", ckpt)
+        for k, v in src_state.items():
+            # Strip MoCo prefix
+            new_key = k
+            for prefix in ["module.encoder_q.", "module.", "encoder_q."]:
+                if new_key.startswith(prefix):
+                    new_key = new_key[len(prefix):]
+                    break
+            state[new_key] = v
+        # Adapt conv1: mean-pool 3ch RGB → 1ch grayscale
+        if "conv1.weight" in state and state["conv1.weight"].shape[1] == 3:
+            state["conv1.weight"] = state["conv1.weight"].mean(dim=1, keepdim=True)
+        # Load with strict=False (head layers won't match)
+        missing, unexpected = backbone.load_state_dict(state, strict=False)
+        # Expected: fc.weight, fc.bias will be missing/unexpected
+        print(f"CEM500K loaded: {len(state)} keys, "
+              f"{len(missing)} missing, {len(unexpected)} unexpected")
+    def forward(self, x: torch.Tensor) -> tuple:
+        """
+        Args:
+            x: (B, 1, H, W) grayscale input
+        Returns:
+            heatmap: (B, 2, H/2, W/2) sigmoid-activated class heatmaps
+            offsets: (B, 2, H/2, W/2) sub-pixel offset predictions
+        """
+        # Encoder
+        x0 = self.stem(x)        # stride 4
+        p2 = self.layer1(x0)     # 256ch, stride 4
+        p3 = self.layer2(p2)     # 512ch, stride 8
+        p4 = self.layer3(p3)     # 1024ch, stride 16
+        p5 = self.layer4(p4)     # 2048ch, stride 32
+        # BiFPN neck
+        features = self.bifpn([p2, p3, p4, p5])
+        # Decoder: upsample P2 to stride 2
+        x_up = self.upsample(features[0])
+        # Detection heads
+        heatmap = self.heatmap_head(x_up)   # (B, 2, H/2, W/2)
+        offsets = self.offset_head(x_up)    # (B, 2, H/2, W/2)
+        return heatmap, offsets
+    def freeze_encoder(self):
+        """Freeze entire encoder (Phase 1 training)."""
+        for module in [self.stem, self.layer1, self.layer2, self.layer3, self.layer4]:
+            for param in module.parameters():
+                param.requires_grad = False
+    def unfreeze_deep_layers(self):
+        """Unfreeze layer3 and layer4 (Phase 2 training)."""
+        for module in [self.layer3, self.layer4]:
+            for param in module.parameters():
+                param.requires_grad = True
+    def unfreeze_all(self):
+        """Unfreeze all layers (Phase 3 training)."""
+        for param in self.parameters():
+            param.requires_grad = True
+    def get_param_groups(self, phase: int, cfg: dict) -> list:
+        """
+        Get parameter groups with discriminative learning rates per phase.
+        Args:
+            phase: 1, 2, or 3
+            cfg: training phase config from config.yaml
+        Returns:
+            List of param group dicts for optimizer.
+        """
+        if phase == 1:
+            # Only neck + heads trainable
+            return [
+                {"params": self.bifpn.parameters(), "lr": cfg["lr"]},
+                {"params": self.upsample.parameters(), "lr": cfg["lr"]},
+                {"params": self.heatmap_head.parameters(), "lr": cfg["lr"]},
+                {"params": self.offset_head.parameters(), "lr": cfg["lr"]},
+            ]
+        elif phase == 2:
+            return [
+                {"params": self.stem.parameters(), "lr": 0},
+                {"params": self.layer1.parameters(), "lr": 0},
+                {"params": self.layer2.parameters(), "lr": 0},
+                {"params": self.layer3.parameters(), "lr": cfg["lr_layer3"]},
+                {"params": self.layer4.parameters(), "lr": cfg["lr_layer4"]},
+                {"params": self.bifpn.parameters(), "lr": cfg["lr_decoder"]},
+                {"params": self.upsample.parameters(), "lr": cfg["lr_decoder"]},
+                {"params": self.heatmap_head.parameters(), "lr": cfg["lr_decoder"]},
+                {"params": self.offset_head.parameters(), "lr": cfg["lr_decoder"]},
+            ]
+        else:  # phase 3
+            return [
+                {"params": self.stem.parameters(), "lr": cfg["lr_stem"]},
+                {"params": self.layer1.parameters(), "lr": cfg["lr_layer1"]},
+                {"params": self.layer2.parameters(), "lr": cfg["lr_layer2"]},
+                {"params": self.layer3.parameters(), "lr": cfg["lr_layer3"]},
+                {"params": self.layer4.parameters(), "lr": cfg["lr_layer4"]},
+                {"params": self.bifpn.parameters(), "lr": cfg["lr_decoder"]},
+                {"params": self.upsample.parameters(), "lr": cfg["lr_decoder"]},
+                {"params": self.heatmap_head.parameters(), "lr": cfg["lr_decoder"]},
+                {"params": self.offset_head.parameters(), "lr": cfg["lr_decoder"]},
+            ]

src/postprocess.py ADDED Viewed

	@@ -0,0 +1,157 @@

+"""
+Post-processing: structural mask filtering, cross-class NMS, threshold sweep.
+"""
+import numpy as np
+from scipy.spatial.distance import cdist
+from skimage.morphology import dilation, disk
+from typing import Dict, List, Optional
+def apply_structural_mask_filter(
+    detections: List[dict],
+    mask: np.ndarray,
+    margin_px: int = 5,
+) -> List[dict]:
+    """
+    Remove detections outside biological tissue regions.
+    Args:
+        detections: list of {'x', 'y', 'class', 'conf'}
+        mask: boolean array (H, W) where True = tissue region
+        margin_px: dilate mask by this many pixels
+    Returns:
+        Filtered detection list.
+    """
+    if mask is None:
+        return detections
+    # Dilate mask to allow particles at region boundaries
+    tissue = dilation(mask, disk(margin_px))
+    filtered = []
+    for det in detections:
+        xi, yi = int(round(det["x"])), int(round(det["y"]))
+        if (0 <= yi < tissue.shape[0] and
+            0 <= xi < tissue.shape[1] and
+            tissue[yi, xi]):
+            filtered.append(det)
+    return filtered
+def cross_class_nms(
+    detections: List[dict],
+    distance_threshold: float = 8.0,
+) -> List[dict]:
+    """
+    When 6nm and 12nm detections overlap, keep the higher-confidence one.
+    This handles cases where both heads fire on the same particle.
+    """
+    if len(detections) <= 1:
+        return detections
+    # Sort by confidence descending
+    dets = sorted(detections, key=lambda d: d["conf"], reverse=True)
+    keep = [True] * len(dets)
+    coords = np.array([[d["x"], d["y"]] for d in dets])
+    for i in range(len(dets)):
+        if not keep[i]:
+            continue
+        for j in range(i + 1, len(dets)):
+            if not keep[j]:
+                continue
+            # Only suppress across classes
+            if dets[i]["class"] == dets[j]["class"]:
+                continue
+            dist = np.sqrt(
+                (coords[i, 0] - coords[j, 0]) ** 2
+                + (coords[i, 1] - coords[j, 1]) ** 2
+            )
+            if dist < distance_threshold:
+                keep[j] = False  # Lower confidence suppressed
+    return [d for d, k in zip(dets, keep) if k]
+def sweep_confidence_threshold(
+    detections: List[dict],
+    gt_coords: Dict[str, np.ndarray],
+    match_radii: Dict[str, float],
+    start: float = 0.05,
+    stop: float = 0.95,
+    step: float = 0.01,
+) -> Dict[str, float]:
+    """
+    Sweep confidence thresholds to find optimal per-class thresholds.
+    Args:
+        detections: all detections (before thresholding)
+        gt_coords: {'6nm': Nx2, '12nm': Mx2} ground truth
+        match_radii: per-class match radii in pixels
+        start, stop, step: sweep range
+    Returns:
+        Dict with best threshold per class and overall.
+    """
+    from src.evaluate import match_detections_to_gt, compute_f1
+    best_thresholds = {}
+    thresholds = np.arange(start, stop, step)
+    for cls in ["6nm", "12nm"]:
+        best_f1 = -1
+        best_thr = 0.3
+        for thr in thresholds:
+            cls_dets = [d for d in detections if d["class"] == cls and d["conf"] >= thr]
+            if not cls_dets and len(gt_coords[cls]) == 0:
+                continue
+            pred_coords = np.array([[d["x"], d["y"]] for d in cls_dets]).reshape(-1, 2)
+            gt = gt_coords[cls]
+            if len(pred_coords) == 0:
+                tp, fp, fn = 0, 0, len(gt)
+            elif len(gt) == 0:
+                tp, fp, fn = 0, len(pred_coords), 0
+            else:
+                tp, fp, fn = _simple_match(pred_coords, gt, match_radii[cls])
+            f1, _, _ = compute_f1(tp, fp, fn)
+            if f1 > best_f1:
+                best_f1 = f1
+                best_thr = thr
+        best_thresholds[cls] = best_thr
+    return best_thresholds
+def _simple_match(
+    pred: np.ndarray, gt: np.ndarray, radius: float
+) -> tuple:
+    """Quick matching for threshold sweep (greedy, not Hungarian)."""
+    from scipy.spatial.distance import cdist
+    if len(pred) == 0 or len(gt) == 0:
+        return 0, len(pred), len(gt)
+    dists = cdist(pred, gt)
+    tp = 0
+    matched_gt = set()
+    # Greedy: match closest pairs first
+    for i in range(len(pred)):
+        min_j = np.argmin(dists[i])
+        if dists[i, min_j] <= radius and min_j not in matched_gt:
+            tp += 1
+            matched_gt.add(min_j)
+            dists[:, min_j] = np.inf
+    fp = len(pred) - tp
+    fn = len(gt) - tp
+    return tp, fp, fn

src/preprocessing.py ADDED Viewed

	@@ -0,0 +1,284 @@

+"""
+Data loading, annotation parsing, and preprocessing for immunogold TEM images.
+The model receives raw images — the CEM500K backbone was pretrained on raw EM.
+Top-hat preprocessing is only used by LodeStar (Stage 1).
+"""
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+import numpy as np
+import pandas as pd
+import tifffile
+# ---------------------------------------------------------------------------
+# Data registry: robust discovery of images, masks, and annotations
+# ---------------------------------------------------------------------------
+@dataclass
+class SynapseRecord:
+    """Metadata for one synapse sample."""
+    synapse_id: str
+    image_path: Path
+    mask_path: Optional[Path]
+    csv_6nm_paths: List[Path] = field(default_factory=list)
+    csv_12nm_paths: List[Path] = field(default_factory=list)
+    has_6nm: bool = False
+    has_12nm: bool = False
+def discover_synapse_data(root: str, synapse_ids: List[str]) -> List[SynapseRecord]:
+    """
+    Discover all TIF images, masks, and CSV annotations for each synapse.
+    Handles naming inconsistencies:
+    - S22: main image is S22_0003.tif, two Results folders
+    - S25: 12nm CSV has no space ("Results12nm")
+    - CSV patterns: "Results 6nm XY" vs "Results XY in microns 6nm"
+    """
+    root = Path(root)
+    analyzed = root / "analyzed synapses"
+    records = []
+    for sid in synapse_ids:
+        folder = analyzed / sid
+        if not folder.exists():
+            raise FileNotFoundError(f"Synapse folder not found: {folder}")
+        # --- Find main image (TIF without 'mask' or 'color' in name) ---
+        all_tifs = list(folder.glob("*.tif"))
+        main_tifs = [
+            t for t in all_tifs
+            if "mask" not in t.stem.lower() and "color" not in t.stem.lower()
+        ]
+        if not main_tifs:
+            raise FileNotFoundError(f"No main image found in {folder}")
+        # Prefer the largest file (main EM image) if multiple found
+        image_path = max(main_tifs, key=lambda t: t.stat().st_size)
+        # --- Find mask ---
+        mask_tifs = [t for t in all_tifs if "mask" in t.stem.lower()]
+        mask_path = None
+        if mask_tifs:
+            # Prefer plain "mask.tif" over "mask 1.tif" / "mask 2.tif"
+            plain = [t for t in mask_tifs if t.stem.lower().endswith("mask")]
+            mask_path = plain[0] if plain else mask_tifs[0]
+        # --- Find CSVs across all Results* subdirectories ---
+        results_dirs = sorted(folder.glob("Results*"))
+        # Also check direct subdirs like "Results 1", "Results 2"
+        csv_6nm_paths = []
+        csv_12nm_paths = []
+        for rdir in results_dirs:
+            if rdir.is_dir():
+                for csv_file in rdir.glob("*.csv"):
+                    name_lower = csv_file.name.lower()
+                    if "6nm" in name_lower:
+                        csv_6nm_paths.append(csv_file)
+                    elif "12nm" in name_lower:
+                        csv_12nm_paths.append(csv_file)
+        record = SynapseRecord(
+            synapse_id=sid,
+            image_path=image_path,
+            mask_path=mask_path,
+            csv_6nm_paths=csv_6nm_paths,
+            csv_12nm_paths=csv_12nm_paths,
+            has_6nm=len(csv_6nm_paths) > 0,
+            has_12nm=len(csv_12nm_paths) > 0,
+        )
+        records.append(record)
+    return records
+# ---------------------------------------------------------------------------
+# Image I/O
+# ---------------------------------------------------------------------------
+def load_image(path: Path) -> np.ndarray:
+    """
+    Load a TIF image as grayscale uint8.
+    Handles:
+    - RGB images (take first channel)
+    - Palette-mode images
+    - Already-grayscale images
+    """
+    img = tifffile.imread(str(path))
+    if img.ndim == 3:
+        # RGB or multi-channel — take first channel (all channels identical in these images)
+        img = img[:, :, 0] if img.shape[2] <= 4 else img[0]
+    return img.astype(np.uint8)
+def load_mask(path: Path) -> np.ndarray:
+    """
+    Load mask TIF as binary array.
+    Mask is RGB where tissue regions have values < 250 in at least one channel.
+    Returns boolean array: True = tissue/structural region.
+    """
+    mask_rgb = tifffile.imread(str(path))
+    if mask_rgb.ndim == 2:
+        return mask_rgb < 250
+    # RGB mask: tissue where any channel is not white
+    return np.any(mask_rgb < 250, axis=-1)
+# ---------------------------------------------------------------------------
+# Annotation loading and coordinate conversion
+# ---------------------------------------------------------------------------
+def load_annotations_csv(csv_path: Path) -> pd.DataFrame:
+    """
+    Load annotation CSV with columns [index, X, Y].
+    CSV headers have leading space: " ,X,Y".
+    Coordinates are normalized [0, 1] despite 'microns' in filename.
+    """
+    df = pd.read_csv(csv_path)
+    # Normalize column names (strip whitespace)
+    df.columns = [c.strip() for c in df.columns]
+    # Rename unnamed index column
+    if "" in df.columns:
+        df = df.rename(columns={"": "idx"})
+    return df[["X", "Y"]]
+# Micron-to-pixel scale factor: consistent across all synapses (verified
+# against researcher's color overlay TIFs). The CSV columns labeled "XY in
+# microns" really ARE microns — multiply by this constant to get pixels.
+MICRONS_TO_PIXELS = 1790.0
+def load_all_annotations(
+    record: SynapseRecord, image_shape: Tuple[int, int]
+) -> Dict[str, np.ndarray]:
+    """
+    Load and convert annotations for one synapse to pixel coordinates.
+    CSV coordinates are in microns (despite filename suggesting normalization).
+    Multiply by MICRONS_TO_PIXELS (1790 px/micron) to convert.
+    Args:
+        record: SynapseRecord with CSV paths.
+        image_shape: (height, width) of the corresponding image.
+    Returns:
+        Dictionary with keys '6nm' and '12nm', each containing
+        an Nx2 array of (x, y) pixel coordinates.
+    """
+    h, w = image_shape[:2]
+    result = {"6nm": np.empty((0, 2), dtype=np.float64),
+              "12nm": np.empty((0, 2), dtype=np.float64)}
+    for cls, paths in [("6nm", record.csv_6nm_paths),
+                       ("12nm", record.csv_12nm_paths)]:
+        all_coords = []
+        for csv_path in paths:
+            df = load_annotations_csv(csv_path)
+            # Convert microns to pixels
+            px_x = df["X"].values * MICRONS_TO_PIXELS
+            px_y = df["Y"].values * MICRONS_TO_PIXELS
+            # Validate: coords must fall within image bounds
+            assert px_x.max() < w + 10, \
+                f"X coords out of bounds ({px_x.max():.0f} > {w}) in {csv_path}"
+            assert px_y.max() < h + 10, \
+                f"Y coords out of bounds ({px_y.max():.0f} > {h}) in {csv_path}"
+            all_coords.append(np.stack([px_x, px_y], axis=1))
+        if all_coords:
+            coords = np.concatenate(all_coords, axis=0)
+            # Deduplicate (for S22 merged results): remove within 3px
+            if len(coords) > 1:
+                coords = _deduplicate_coords(coords, min_dist=3.0)
+            result[cls] = coords
+    return result
+def _deduplicate_coords(
+    coords: np.ndarray, min_dist: float = 3.0
+) -> np.ndarray:
+    """Remove duplicate coordinates within min_dist pixels."""
+    from scipy.spatial.distance import cdist
+    if len(coords) <= 1:
+        return coords
+    dists = cdist(coords, coords)
+    np.fill_diagonal(dists, np.inf)
+    keep = np.ones(len(coords), dtype=bool)
+    for i in range(len(coords)):
+        if not keep[i]:
+            continue
+        # Mark later duplicates
+        for j in range(i + 1, len(coords)):
+            if keep[j] and dists[i, j] < min_dist:
+                keep[j] = False
+    return coords[keep]
+# ---------------------------------------------------------------------------
+# Preprocessing transforms
+# ---------------------------------------------------------------------------
+def preprocess_image(img: np.ndarray, bead_class: str,
+                     tophat_radii: Optional[Dict[str, int]] = None,
+                     clahe_clip_limit: float = 0.03,
+                     clahe_kernel_size: int = 64) -> np.ndarray:
+    """
+    Top-hat + CLAHE preprocessing. Used ONLY by LodeStar (Stage 1).
+    Not used for model training — the CEM500K backbone expects raw EM images.
+    """
+    from skimage import exposure
+    from skimage.morphology import disk, white_tophat
+    if tophat_radii is None:
+        tophat_radii = {"6nm": 8, "12nm": 12}
+    img_inv = (255 - img).astype(np.float32)
+    radius = tophat_radii[bead_class]
+    tophat = white_tophat(img_inv, disk(radius))
+    tophat_max = tophat.max()
+    if tophat_max > 0:
+        tophat_norm = tophat / tophat_max
+    else:
+        tophat_norm = tophat
+    enhanced = exposure.equalize_adapthist(
+        tophat_norm,
+        clip_limit=clahe_clip_limit,
+        kernel_size=clahe_kernel_size,
+    )
+    return (enhanced * 255).astype(np.uint8)
+# ---------------------------------------------------------------------------
+# Convenience: load everything for one synapse
+# ---------------------------------------------------------------------------
+def load_synapse(record: SynapseRecord) -> dict:
+    """
+    Load image, mask, and annotations for one synapse.
+    Returns dict with keys: 'image', 'mask', 'annotations',
+                            'synapse_id', 'image_shape'
+    """
+    img = load_image(record.image_path)
+    mask = load_mask(record.mask_path) if record.mask_path else None
+    annotations = load_all_annotations(record, img.shape)
+    return {
+        "synapse_id": record.synapse_id,
+        "image": img,
+        "mask": mask,
+        "annotations": annotations,
+        "image_shape": img.shape,
+    }

src/visualize.py ADDED Viewed

	@@ -0,0 +1,244 @@

+"""
+Visualization utilities for QC at every pipeline stage.
+Generates overlay images showing predictions on raw EM images:
+- Cyan circles for 6nm particles
+- Yellow circles for 12nm particles
+"""
+import numpy as np
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches
+from pathlib import Path
+from typing import Dict, List, Optional
+# Color scheme
+COLORS = {
+    "6nm": (0, 255, 255),     # cyan
+    "12nm": (255, 255, 0),    # yellow
+    "6nm_pred": (0, 200, 200),
+    "12nm_pred": (200, 200, 0),
+}
+RADII = {"6nm": 6, "12nm": 12}
+def overlay_annotations(
+    image: np.ndarray,
+    annotations: Dict[str, np.ndarray],
+    title: str = "",
+    save_path: Optional[Path] = None,
+    predictions: Optional[List[dict]] = None,
+    figsize: tuple = (12, 12),
+) -> plt.Figure:
+    """
+    Overlay ground truth annotations (and optional predictions) on image.
+    Args:
+        image: (H, W) grayscale image
+        annotations: {'6nm': Nx2, '12nm': Mx2} pixel coordinates
+        title: figure title
+        save_path: if provided, save figure here
+        predictions: optional list of {'x', 'y', 'class', 'conf'}
+        figsize: figure size
+    Returns:
+        matplotlib Figure
+    """
+    fig, ax = plt.subplots(1, 1, figsize=figsize)
+    ax.imshow(image, cmap="gray")
+    # Ground truth circles (solid)
+    for cls, coords in annotations.items():
+        if len(coords) == 0:
+            continue
+        color_rgb = np.array(COLORS[cls]) / 255.0
+        radius = RADII[cls]
+        for x, y in coords:
+            circle = plt.Circle(
+                (x, y), radius, fill=False,
+                edgecolor=color_rgb, linewidth=1.5,
+            )
+            ax.add_patch(circle)
+    # Predictions (dashed)
+    if predictions:
+        for det in predictions:
+            cls = det["class"]
+            color_rgb = np.array(COLORS.get(f"{cls}_pred", COLORS[cls])) / 255.0
+            radius = RADII[cls]
+            circle = plt.Circle(
+                (det["x"], det["y"]), radius, fill=False,
+                edgecolor=color_rgb, linewidth=1.0, linestyle="--",
+            )
+            ax.add_patch(circle)
+            # Confidence label
+            ax.text(
+                det["x"] + radius + 2, det["y"],
+                f'{det["conf"]:.2f}',
+                color=color_rgb, fontsize=6,
+            )
+    # Legend
+    legend_elements = [
+        mpatches.Patch(facecolor="none", edgecolor="cyan", label=f'6nm GT ({len(annotations.get("6nm", []))})', linewidth=1.5),
+        mpatches.Patch(facecolor="none", edgecolor="yellow", label=f'12nm GT ({len(annotations.get("12nm", []))})', linewidth=1.5),
+    ]
+    if predictions:
+        n_pred_6 = sum(1 for d in predictions if d["class"] == "6nm")
+        n_pred_12 = sum(1 for d in predictions if d["class"] == "12nm")
+        legend_elements.extend([
+            mpatches.Patch(facecolor="none", edgecolor="darkcyan", label=f"6nm pred ({n_pred_6})", linewidth=1.0),
+            mpatches.Patch(facecolor="none", edgecolor="goldenrod", label=f"12nm pred ({n_pred_12})", linewidth=1.0),
+        ])
+    ax.legend(handles=legend_elements, loc="upper right", fontsize=8)
+    ax.set_title(title, fontsize=10)
+    ax.axis("off")
+    if save_path:
+        save_path = Path(save_path)
+        save_path.parent.mkdir(parents=True, exist_ok=True)
+        fig.savefig(str(save_path), dpi=150, bbox_inches="tight")
+        plt.close(fig)
+    return fig
+def plot_heatmap_overlay(
+    image: np.ndarray,
+    heatmap: np.ndarray,
+    title: str = "",
+    save_path: Optional[Path] = None,
+) -> plt.Figure:
+    """
+    Overlay predicted heatmap on image for QC.
+    Args:
+        image: (H, W) grayscale
+        heatmap: (2, H/2, W/2) predicted heatmap
+    """
+    fig, axes = plt.subplots(1, 3, figsize=(18, 6))
+    axes[0].imshow(image, cmap="gray")
+    axes[0].set_title("Raw Image")
+    axes[0].axis("off")
+    # Upsample heatmap to image size for overlay
+    h, w = image.shape[:2]
+    for idx, (cls, color) in enumerate([("6nm", "hot"), ("12nm", "cool")]):
+        hm = heatmap[idx]
+        # Resize to image dims
+        from skimage.transform import resize
+        hm_up = resize(hm, (h, w), order=1)
+        axes[idx + 1].imshow(image, cmap="gray")
+        axes[idx + 1].imshow(hm_up, cmap=color, alpha=0.5, vmin=0, vmax=1)
+        axes[idx + 1].set_title(f"{cls} heatmap")
+        axes[idx + 1].axis("off")
+    fig.suptitle(title, fontsize=12)
+    if save_path:
+        save_path = Path(save_path)
+        save_path.parent.mkdir(parents=True, exist_ok=True)
+        fig.savefig(str(save_path), dpi=150, bbox_inches="tight")
+        plt.close(fig)
+    return fig
+def plot_training_curves(
+    metrics: dict,
+    save_path: Optional[Path] = None,
+) -> plt.Figure:
+    """Plot training loss and F1 curves."""
+    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
+    epochs = range(1, len(metrics["train_loss"]) + 1)
+    # Loss
+    ax1.plot(epochs, metrics["train_loss"], label="Train Loss")
+    if "val_loss" in metrics:
+        ax1.plot(epochs, metrics["val_loss"], label="Val Loss")
+    ax1.set_xlabel("Epoch")
+    ax1.set_ylabel("Loss")
+    ax1.set_title("Training Loss")
+    ax1.legend()
+    ax1.grid(True, alpha=0.3)
+    # F1
+    if "val_f1_6nm" in metrics:
+        ax2.plot(epochs, metrics["val_f1_6nm"], label="6nm F1")
+    if "val_f1_12nm" in metrics:
+        ax2.plot(epochs, metrics["val_f1_12nm"], label="12nm F1")
+    if "val_f1_mean" in metrics:
+        ax2.plot(epochs, metrics["val_f1_mean"], label="Mean F1", linewidth=2)
+    ax2.set_xlabel("Epoch")
+    ax2.set_ylabel("F1 Score")
+    ax2.set_title("Validation F1")
+    ax2.legend()
+    ax2.grid(True, alpha=0.3)
+    if save_path:
+        save_path = Path(save_path)
+        save_path.parent.mkdir(parents=True, exist_ok=True)
+        fig.savefig(str(save_path), dpi=150, bbox_inches="tight")
+        plt.close(fig)
+    return fig
+def plot_precision_recall_curve(
+    detections: List[dict],
+    gt_coords: np.ndarray,
+    match_radius: float,
+    cls_name: str = "",
+    save_path: Optional[Path] = None,
+) -> plt.Figure:
+    """Plot precision-recall curve for one class."""
+    sorted_dets = sorted(detections, key=lambda d: d["conf"], reverse=True)
+    tp_list = []
+    matched_gt = set()
+    for det in sorted_dets:
+        det_coord = np.array([det["x"], det["y"]])
+        if len(gt_coords) > 0:
+            dists = np.sqrt(np.sum((gt_coords - det_coord) ** 2, axis=1))
+            min_idx = np.argmin(dists)
+            if dists[min_idx] <= match_radius and min_idx not in matched_gt:
+                tp_list.append(1)
+                matched_gt.add(min_idx)
+            else:
+                tp_list.append(0)
+        else:
+            tp_list.append(0)
+    tp_cumsum = np.cumsum(tp_list)
+    fp_cumsum = np.cumsum([1 - t for t in tp_list])
+    n_gt = max(len(gt_coords), 1)
+    precision = tp_cumsum / (tp_cumsum + fp_cumsum)
+    recall = tp_cumsum / n_gt
+    fig, ax = plt.subplots(figsize=(6, 6))
+    ax.plot(recall, precision, linewidth=2)
+    ax.set_xlabel("Recall")
+    ax.set_ylabel("Precision")
+    ax.set_title(f"PR Curve — {cls_name}")
+    ax.set_xlim(0, 1)
+    ax.set_ylim(0, 1)
+    ax.grid(True, alpha=0.3)
+    if save_path:
+        save_path = Path(save_path)
+        save_path.parent.mkdir(parents=True, exist_ok=True)
+        fig.savefig(str(save_path), dpi=150, bbox_inches="tight")
+        plt.close(fig)
+    return fig