Spaces:

Kwee-Lab
/

MyoSeg

Running

App Files Files Community

skarugu commited on Mar 1

Commit

7fca4c6

1 Parent(s): 8e73848

Starting Streamlit Space structure

Browse files

Files changed (7) hide show

.streamlit/config.toml +9 -0
Dockerfile +17 -6
README.md +2 -1
model_final.pt +3 -0
requirements.txt +10 -2
self_train.py +499 -0
src/streamlit_app.py +791 -33

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,9 @@

+[server]
+headless = true
+port = 8501
+address = "0.0.0.0"
+enableCORS = false
+enableXsrfProtection = false
+[browser]
+gatherUsageStats = false

Dockerfile CHANGED Viewed

@@ -1,20 +1,31 @@
-FROM python:3.13.5-slim
 WORKDIR /app
-RUN apt-get update && apt-get install -y \
     build-essential \
     curl \
     git \
-    && rm -rf /var/lib/apt/lists/*
 COPY requirements.txt ./
 COPY src/ ./src/
-RUN pip3 install -r requirements.txt
 EXPOSE 8501
-HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
-ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]

+FROM python:3.11-slim
 WORKDIR /app
+RUN apt-get update && apt-get install -y --no-install-recommends \
     build-essential \
     curl \
     git \
+    libglib2.0-0 \
+    libsm6 \
+    libxrender1 \
+    libxext6 \
+    libtiff6 \
+ && rm -rf /var/lib/apt/lists/*
 COPY requirements.txt ./
 COPY src/ ./src/
+COPY .streamlit/ ./.streamlit/
+RUN pip install --no-cache-dir -r requirements.txt
 EXPOSE 8501
+ENV STREAMLIT_SERVER_HEADLESS=true
+ENV STREAMLIT_BROWSER_GATHER_USAGE_STATS=false
+HEALTHCHECK --interval=30s --timeout=5s --start-period=30s --retries=3 \
+  CMD curl --fail http://localhost:8501/_stcore/health || exit 1
+ENTRYPOINT ["sh", "-c", "streamlit run src/streamlit_app.py --server.port=${PORT:-8501} --server.address=0.0.0.0"]

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: Myosight
 emoji: 🚀
 colorFrom: red
 colorTo: red
@@ -9,6 +9,7 @@ tags:
 - streamlit
 pinned: false
 short_description: Streamlit template space
 ---
 # Welcome to Streamlit!

 ---
+title: myosight
 emoji: 🚀
 colorFrom: red
 colorTo: red
 - streamlit
 pinned: false
 short_description: Streamlit template space
+license: mit
 ---
 # Welcome to Streamlit!

model_final.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dae27460af830a53ac184453980f3609c4adc9d0839db2d67e77fe1a41839de9
+size 31130023

requirements.txt CHANGED Viewed

@@ -1,3 +1,11 @@
-altair
 pandas
-streamlit

+streamlit
+torch
+torchvision
+numpy
+pillow
+scikit-image
+scipy
+huggingface_hub
+matplotlib
 pandas
+apscheduler

self_train.py ADDED Viewed

	@@ -0,0 +1,499 @@

+"""
+self_train.py
+=============
+Autonomous continual-learning pipeline for MyoSight.
+Place at the ROOT of your Hugging Face Space repo (same level as Dockerfile).
+IMPORTANT: This file is completely self-contained.
+           It does NOT import from train_myotube_nuclei_unet.py.
+           The train script is a separate PyCharm tool.
+Trigger conditions (any one fires a retrain):
+  1. User submitted corrected label pairs via the app  → corrections/ folder
+  2. N unlabelled images accumulated in queue          → retrain_queue/
+  3. K consecutive low-confidence images              → retrain_queue/ (reason=low_confidence)
+  4. Nightly scheduled run                            → APScheduler cron 02:00 UTC
+After each retrain:
+  • Fine-tunes from current HF Hub weights
+  • Validates on held-out 20% split
+  • Only pushes to Hub if new Dice > previous best
+  • Archives queue → runs/<run_id>/processed_queue/
+  • Appends entry to manifest.json
+Usage:
+  python self_train.py              # check triggers once
+  python self_train.py --manual     # force retrain now
+  python self_train.py --scheduler  # blocking APScheduler loop (for Docker)
+Environment variables / HF Secrets:
+  HF_TOKEN        write-access Hugging Face token
+  HF_REPO_ID      model repo, e.g. "skarugu/myotube-unet"
+  HF_FILENAME     model filename, e.g. "model_final.pt"
+  DATA_ROOT       path to base training data/ folder
+  BATCH_TRIGGER_N images before batch trigger          (default 20)
+  CONF_DROP_K     consecutive low-conf before trigger  (default 5)
+  FT_EPOCHS       fine-tuning epochs per run           (default 10)
+  FT_LR           fine-tuning learning rate            (default 5e-4)
+  SCHEDULE_HOUR   nightly retrain UTC hour             (default 2)
+"""
+import argparse
+import json
+import logging
+import os
+import random
+import shutil
+import tempfile
+from datetime import datetime
+from pathlib import Path
+from typing import Optional
+import numpy as np
+import scipy.ndimage as ndi
+import torch
+import torch.nn as nn
+from PIL import Image
+from huggingface_hub import HfApi, hf_hub_download
+from skimage import measure
+from skimage.feature import peak_local_max
+from skimage.morphology import disk, opening, remove_small_objects
+from skimage.segmentation import watershed
+from torch.utils.data import DataLoader, Dataset, random_split
+try:
+    from apscheduler.schedulers.blocking import BlockingScheduler
+    HAS_SCHEDULER = True
+except ImportError:
+    HAS_SCHEDULER = False
+# ─────────────────────────────────────────────────────────────────────────────
+# Configuration
+# ─────────────────────────────────────────────────────────────────────────────
+ROOT = Path(__file__).parent
+HF_REPO_ID  = os.environ.get("HF_REPO_ID",  "skarugu/myotube-unet")
+HF_FILENAME = os.environ.get("HF_FILENAME", "model_final.pt")
+HF_TOKEN    = os.environ.get("HF_TOKEN",    None)
+DATA_ROOT   = os.environ.get("DATA_ROOT",   str(ROOT / "data"))
+BATCH_TRIGGER_N = int(os.environ.get("BATCH_TRIGGER_N", 20))
+CONF_DROP_K     = int(os.environ.get("CONF_DROP_K",      5))
+CONF_FLAG_THR   = float(os.environ.get("CONF_FLAG_THR",  0.60))
+SCHEDULE_HOUR   = int(os.environ.get("SCHEDULE_HOUR",    2))
+FT_EPOCHS       = int(os.environ.get("FT_EPOCHS",        10))
+FT_LR           = float(os.environ.get("FT_LR",          5e-4))
+FT_BATCH_SIZE   = int(os.environ.get("FT_BATCH_SIZE",    4))
+IMAGE_SIZE      = int(os.environ.get("IMAGE_SIZE",       512))
+QUEUE_DIR       = ROOT / "retrain_queue"
+CORRECTIONS_DIR = ROOT / "corrections"
+RUNS_DIR        = ROOT / "runs"
+STATE_PATH      = ROOT / "self_train_state.json"
+MANIFEST_PATH   = ROOT / "manifest.json"
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+)
+log = logging.getLogger("self_train")
+# ─────────────────────────────────────────────────────────────────────────────
+# State helpers
+# ─────────────────────────────────────────────────────────────────────────────
+def _load_state() -> dict:
+    if STATE_PATH.exists():
+        return json.loads(STATE_PATH.read_text())
+    return {"best_dice": 0.0, "last_retrain_ts": None, "current_hf_sha": None}
+def _save_state(s: dict): STATE_PATH.write_text(json.dumps(s, indent=2))
+def _load_manifest() -> list:
+    return json.loads(MANIFEST_PATH.read_text()) if MANIFEST_PATH.exists() else []
+def _save_manifest(m: list): MANIFEST_PATH.write_text(json.dumps(m, indent=2, default=str))
+# ─────────────────────────────────────────────────────────────────────────────
+# Trigger checks
+# ─────────────────────────────────────────────────────────────────────────────
+def should_retrain(force=False):
+    if force:
+        return True, "manual"
+    corrections = list(CORRECTIONS_DIR.glob("*/meta.json")) if CORRECTIONS_DIR.exists() else []
+    if corrections:
+        return True, f"user_correction ({len(corrections)} pairs)"
+    q_jsons = list(QUEUE_DIR.glob("*.json")) if QUEUE_DIR.exists() else []
+    if len(q_jsons) >= BATCH_TRIGGER_N:
+        return True, f"batch_trigger ({len(q_jsons)} images)"
+    low_conf = sum(
+        1 for jf in q_jsons
+        if json.loads(jf.read_text()).get("reason") == "low_confidence"
+    ) if q_jsons else 0
+    if low_conf >= CONF_DROP_K:
+        return True, f"confidence_drop ({low_conf} low-conf images)"
+    return False, "none"
+# ─────────────────────────────────────────────────────────────────────────────
+# Model definition  (must be identical to the training script)
+# ─────────────────────────────────────────────────────────────────────────────
+class DoubleConv(nn.Module):
+    def __init__(self, in_ch, out_ch):
+        super().__init__()
+        self.net = nn.Sequential(
+            nn.Conv2d(in_ch, out_ch, 3, padding=1), nn.BatchNorm2d(out_ch), nn.ReLU(True),
+            nn.Conv2d(out_ch, out_ch, 3, padding=1), nn.BatchNorm2d(out_ch), nn.ReLU(True),
+        )
+    def forward(self, x): return self.net(x)
+class UNet(nn.Module):
+    def __init__(self, in_ch=2, out_ch=2, base=32):
+        super().__init__()
+        self.d1=DoubleConv(in_ch,base);     self.p1=nn.MaxPool2d(2)
+        self.d2=DoubleConv(base,base*2);    self.p2=nn.MaxPool2d(2)
+        self.d3=DoubleConv(base*2,base*4);  self.p3=nn.MaxPool2d(2)
+        self.d4=DoubleConv(base*4,base*8);  self.p4=nn.MaxPool2d(2)
+        self.bn=DoubleConv(base*8,base*16)
+        self.u4=nn.ConvTranspose2d(base*16,base*8,2,2); self.du4=DoubleConv(base*16,base*8)
+        self.u3=nn.ConvTranspose2d(base*8,base*4,2,2);  self.du3=DoubleConv(base*8,base*4)
+        self.u2=nn.ConvTranspose2d(base*4,base*2,2,2);  self.du2=DoubleConv(base*4,base*2)
+        self.u1=nn.ConvTranspose2d(base*2,base,2,2);    self.du1=DoubleConv(base*2,base)
+        self.out=nn.Conv2d(base,out_ch,1)
+    def forward(self, x):
+        d1=self.d1(x); p1=self.p1(d1)
+        d2=self.d2(p1); p2=self.p2(d2)
+        d3=self.d3(p2); p3=self.p3(d3)
+        d4=self.d4(p3); p4=self.p4(d4)
+        b=self.bn(p4)
+        x=self.u4(b); x=torch.cat([x,d4],1); x=self.du4(x)
+        x=self.u3(x); x=torch.cat([x,d3],1); x=self.du3(x)
+        x=self.u2(x); x=torch.cat([x,d2],1); x=self.du2(x)
+        x=self.u1(x); x=torch.cat([x,d1],1); x=self.du1(x)
+        return self.out(x)
+# ─────────────────────────────────────────────────────────────────────────────
+# Minimal Dataset for fine-tuning
+# ─────────────────────────────────────────────────────────────────────────────
+class _FTDataset(Dataset):
+    IMG_EXTS = {".jpg", ".jpeg", ".png", ".tif", ".tiff"}
+    def __init__(self, root, size=512, augment=True):
+        root     = Path(root)
+        img_dir  = root / "images"
+        nuc_dir  = root / "masks" / "Nuclei_m"
+        myo_dir  = root / "masks" / "Myotubes_m"
+        imgs = sorted([p for p in img_dir.glob("*") if p.suffix.lower() in self.IMG_EXTS])
+        self.samples = []
+        for p in imgs:
+            nuc = self._mp(nuc_dir, p.stem)
+            myo = self._mp(myo_dir, p.stem)
+            if nuc and myo:
+                self.samples.append((p, nuc, myo))
+        if not self.samples:
+            raise FileNotFoundError(f"No labelled samples found under {root}")
+        self.size    = size
+        self.augment = augment
+    @staticmethod
+    def _mp(d, stem):
+        for ext in (".tif", ".tiff", ".png"):
+            p = d / f"{stem}{ext}"
+            if p.exists(): return p
+        return None
+    def __len__(self): return len(self.samples)
+    def __getitem__(self, idx):
+        ip, np_, mp = self.samples[idx]
+        rgb  = np.array(Image.open(ip).convert("RGB"), dtype=np.uint8)
+        H = W = self.size
+        def _ch(arr): return np.array(Image.fromarray(arr, "L").resize((W, H), Image.BILINEAR), dtype=np.float32) / 255.0
+        def _mk(p):   return (np.array(Image.open(p).convert("L").resize((W, H), Image.NEAREST)) > 0).astype(np.uint8)
+        red  = _ch(rgb[..., 0])
+        blue = _ch(rgb[..., 2])
+        yn   = _mk(np_)
+        ym   = _mk(mp)
+        if self.augment:
+            f = np.stack([red, blue, np.zeros_like(red)], -1).astype(np.float32)
+            for ax in [1, 0]:
+                if random.random() < 0.5:
+                    f = np.flip(f, ax); yn = np.flip(yn, ax); ym = np.flip(ym, ax)
+            k = random.randint(0, 3)
+            if k: f = np.rot90(f, k); yn = np.rot90(yn, k); ym = np.rot90(ym, k)
+            red, blue = f[..., 0], f[..., 1]
+        x = np.stack([red, blue], 0).astype(np.float32)
+        y = np.stack([yn,  ym],  0).astype(np.float32)
+        return torch.from_numpy(x.copy()), torch.from_numpy(y.copy()), ip.stem
+# ─────────────────────────────────────────────────────────────────────────────
+# Loss + Dice
+# ─────────────────────────────────────────────────────────────────────────────
+class _BCEDice(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.bce = nn.BCEWithLogitsLoss()
+    def forward(self, logits, target):
+        bce   = self.bce(logits, target)
+        p     = torch.sigmoid(logits)
+        inter = (p * target).sum(dim=(2,3))
+        union = p.sum(dim=(2,3)) + target.sum(dim=(2,3))
+        dice  = 1 - (2*inter+1e-6)/(union+1e-6)
+        return 0.5*bce + 0.5*dice.mean()
+@torch.no_grad()
+def _dice(probs, target, thr=0.5):
+    pred  = (probs > thr).float()
+    inter = (pred * target).sum(dim=(2,3))
+    union = pred.sum(dim=(2,3)) + target.sum(dim=(2,3))
+    return ((2*inter+1e-6)/(union+1e-6)).mean(dim=0)
+# ─────────────────────────────────────────────────────────────────────────────
+# Prepare fine-tune data (base + corrections merged into a temp folder)
+# ─────────────────────────────────────────────────────────────────────────────
+def _prepare_data(base: str) -> str:
+    tmp = Path(tempfile.mkdtemp()) / "ft"
+    orig = Path(base)
+    if (orig / "images").exists():
+        shutil.copytree(str(orig), str(tmp), dirs_exist_ok=True)
+    else:
+        for sub in ("images", "masks/Nuclei_m", "masks/Myotubes_m"):
+            (tmp / sub).mkdir(parents=True, exist_ok=True)
+        log.warning("DATA_ROOT %s has no images/ — training on corrections only.", orig)
+    injected = 0
+    if CORRECTIONS_DIR.exists():
+        for meta_p in CORRECTIONS_DIR.glob("*/meta.json"):
+            folder = meta_p.parent
+            img, nuc, myo = folder/"image.png", folder/"nuclei_mask.png", folder/"myotube_mask.png"
+            if not (img.exists() and nuc.exists() and myo.exists()):
+                continue
+            stem = folder.name
+            shutil.copy(img, tmp/"images"/f"{stem}.png")
+            shutil.copy(nuc, tmp/"masks"/"Nuclei_m"/f"{stem}.png")
+            shutil.copy(myo, tmp/"masks"/"Myotubes_m"/f"{stem}.png")
+            injected += 1
+    log.info("Fine-tune data ready: %d correction(s) injected → %s", injected, tmp)
+    return str(tmp)
+# ─────────────────────────────────────────────────────────────────────────────
+# HF Hub helpers
+# ─────────────────────────────────────────────────────────────────────────────
+def _load_from_hub():
+    path  = hf_hub_download(repo_id=HF_REPO_ID, filename=HF_FILENAME,
+                             token=HF_TOKEN, force_download=True)
+    ckpt  = torch.load(path, map_location="cpu")
+    state = ckpt["model"] if isinstance(ckpt, dict) and "model" in ckpt else ckpt
+    model = UNet(in_ch=2, out_ch=2, base=32)
+    model.load_state_dict(state)
+    log.info("Loaded model from Hub (repo=%s, file=%s)", HF_REPO_ID, HF_FILENAME)
+    return model
+def _push_to_hub(model_path: Path, metrics: dict, run_id: str) -> bool:
+    if not HF_TOKEN:
+        log.warning("HF_TOKEN not set — skipping Hub push.")
+        return False
+    api = HfApi(token=HF_TOKEN)
+    api.upload_file(
+        path_or_fileobj=str(model_path),
+        path_in_repo=HF_FILENAME,
+        repo_id=HF_REPO_ID,
+        repo_type="model",
+        commit_message=(f"Auto-retrain {run_id} | "
+                        f"dice_nuc={metrics['dice_nuc']:.3f} "
+                        f"dice_myo={metrics['dice_myo']:.3f}"),
+    )
+    api.upload_file(
+        path_or_fileobj=json.dumps({**metrics, "run_id": run_id,
+                                    "timestamp": datetime.now().isoformat()},
+                                   indent=2).encode(),
+        path_in_repo="auto_retrain_metrics.json",
+        repo_id=HF_REPO_ID,
+        repo_type="model",
+        commit_message=f"Metrics for auto-retrain {run_id}",
+    )
+    log.info("✅ Pushed new weights to %s/%s", HF_REPO_ID, HF_FILENAME)
+    return True
+# ─────────────────────────────────────────────────────────────────────────────
+# Core retrain
+# ─────────────────────────────────────────────────────────────────────────────
+def run_retrain(reason: str = "scheduled"):
+    random.seed(42); np.random.seed(42); torch.manual_seed(42)
+    device  = "cuda" if torch.cuda.is_available() else "cpu"
+    run_id  = datetime.now().strftime("%Y%m%d_%H%M%S")
+    run_dir = RUNS_DIR / run_id
+    run_dir.mkdir(parents=True, exist_ok=True)
+    log.info("══ Retrain run %s | reason=%s | device=%s ══", run_id, reason, device)
+    ft_data = _prepare_data(DATA_ROOT)
+    try:
+        ds = _FTDataset(ft_data, size=IMAGE_SIZE, augment=True)
+    except FileNotFoundError as e:
+        log.error("No data: %s — aborting.", e)
+        return None
+    n_val   = max(1, int(len(ds) * 0.2))
+    n_train = len(ds) - n_val
+    if n_train < 1:
+        log.warning("Only %d samples — need ≥2. Aborting.", len(ds))
+        return None
+    train_ds, val_ds = random_split(
+        ds, [n_train, n_val], generator=torch.Generator().manual_seed(42)
+    )
+    val_ds.dataset.augment = False
+    train_dl = DataLoader(train_ds, batch_size=FT_BATCH_SIZE, shuffle=True,  num_workers=0)
+    val_dl   = DataLoader(val_ds,   batch_size=1,             shuffle=False, num_workers=0)
+    model    = _load_from_hub().to(device)
+    loss_fn  = _BCEDice()
+    opt      = torch.optim.Adam(model.parameters(), lr=FT_LR)
+    sched    = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=FT_EPOCHS, eta_min=1e-5)
+    state         = _load_state()
+    prev_best     = state.get("best_dice", 0.0)
+    best_run_dice = -1.0
+    best_path     = run_dir / "model_best.pt"
+    for ep in range(1, FT_EPOCHS + 1):
+        model.train()
+        for x, y, _ in train_dl:
+            x, y = x.to(device), y.to(device)
+            opt.zero_grad(); loss_fn(model(x), y).backward(); opt.step()
+        sched.step()
+        model.eval()
+        dices = []
+        with torch.no_grad():
+            for x, y, _ in val_dl:
+                probs = torch.sigmoid(model(x.to(device))).cpu()
+                dices.append(_dice(probs, y).numpy())
+        d = np.array(dices)
+        d_nuc, d_myo = float(d[:,0].mean()), float(d[:,1].mean())
+        score = (d_nuc + d_myo) / 2.0
+        log.info("  Ep %02d | dice_nuc=%.3f | dice_myo=%.3f | mean=%.3f", ep, d_nuc, d_myo, score)
+        if score > best_run_dice:
+            best_run_dice = score
+            torch.save({"model": model.state_dict()}, best_path)
+    metrics = {
+        "dice_nuc":  round(d_nuc, 4),
+        "dice_myo":  round(d_myo, 4),
+        "mean_dice": round(best_run_dice, 4),
+        "reason":    reason,
+        "n_train":   n_train,
+        "n_val":     n_val,
+    }
+    pushed = False
+    log.info("Best this run: %.4f | Previous best: %.4f", best_run_dice, prev_best)
+    if best_run_dice > prev_best:
+        pushed = _push_to_hub(best_path, metrics, run_id)
+        state["best_dice"]       = best_run_dice
+        state["current_hf_sha"] = str(best_path)
+    else:
+        log.info("New model did not beat previous best — NOT pushing.")
+    # Archive queue
+    archive = run_dir / "processed_queue"
+    archive.mkdir(parents=True, exist_ok=True)
+    for p in list(QUEUE_DIR.glob("*")) if QUEUE_DIR.exists() else []:
+        shutil.move(str(p), str(archive / p.name))
+    for folder in list(CORRECTIONS_DIR.glob("*")) if CORRECTIONS_DIR.exists() else []:
+        if folder.is_dir():
+            shutil.move(str(folder), str(archive / folder.name))
+    state["last_retrain_ts"] = datetime.now().isoformat()
+    _save_state(state)
+    manifest = _load_manifest()
+    manifest.append({"run_id": run_id, "timestamp": state["last_retrain_ts"],
+                     "reason": reason, "metrics": metrics, "pushed": pushed})
+    _save_manifest(manifest)
+    log.info("══ Run %s complete | pushed=%s ══", run_id, pushed)
+    return metrics
+# ─────────────────────────────────────────────────────────────────────────────
+# Trigger check entry point
+# ─────────────────────────────────────────────────────────────────────────────
+def check_and_retrain(force=False):
+    ok, reason = should_retrain(force=force)
+    if ok:
+        log.info("Trigger met: %s → retraining…", reason)
+        run_retrain(reason=reason)
+    else:
+        log.info("No trigger met — skipping.")
+# ─────────────────────────────────────────────────────────────────────────────
+# Scheduler
+# ─────────────────────────────────────────────────────────────────────────────
+def start_scheduler():
+    if not HAS_SCHEDULER:
+        log.error("APScheduler not installed.  pip install apscheduler")
+        return
+    s = BlockingScheduler(timezone="UTC")
+    s.add_job(lambda: check_and_retrain(force=True),
+              "cron", hour=SCHEDULE_HOUR, minute=0, id="nightly")
+    s.add_job(check_and_retrain, "interval", minutes=30, id="poll")
+    log.info("Scheduler running. Nightly at %02d:00 UTC. Polling every 30 min.", SCHEDULE_HOUR)
+    try:
+        s.start()
+    except (KeyboardInterrupt, SystemExit):
+        log.info("Scheduler stopped.")
+# ─────────────────────────────────────────────────────────────────────────────
+# CLI
+# ─────────────────────────────────────────────────────────────────────────────
+if __name__ == "__main__":
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--manual",    action="store_true", help="Force retrain now")
+    ap.add_argument("--scheduler", action="store_true", help="Start blocking scheduler")
+    ap.add_argument("--data_root", default=None,        help="Override DATA_ROOT env var")
+    a = ap.parse_args()
+    if a.data_root:
+        DATA_ROOT = a.data_root
+    if a.scheduler:
+        start_scheduler()
+    else:
+        check_and_retrain(force=a.manual)

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,798 @@
-import altair as alt
 import numpy as np
 import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

+# src/streamlit_app.py
+"""
+MyoSight  —  Myotube & Nuclei Analyser
+========================================
+Drop-in replacement for streamlit_app.py on Hugging Face Spaces.
+New features vs the original Myotube Analyzer V2:
+  ✦ Animated count-up metrics (9 counters)
+  ✦ Instance overlay — nucleus IDs (1,2,3…) + myotube IDs (M1,M2…)
+  ✦ Watershed nuclei splitting for accurate counts
+  ✦ Myotube surface area (total, mean, max µm²) + per-tube bar chart
+  ✦ Active learning — upload corrected masks → saved to corrections/
+  ✦ Low-confidence auto-flagging → image queued for retraining
+  ✦ Retraining queue status panel
+  ✦ All original sidebar controls preserved
+"""
+import io
+import os
+import json
+import time
+import zipfile
+import hashlib
+from datetime import datetime
+from pathlib import Path
 import numpy as np
 import pandas as pd
+from PIL import Image
 import streamlit as st
+import torch
+import torch.nn as nn
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches
+from huggingface_hub import hf_hub_download
+import scipy.ndimage as ndi
+from skimage.morphology import remove_small_objects, disk, closing, opening
+from skimage import measure
+from skimage.segmentation import watershed
+from skimage.feature import peak_local_max
+# ─────────────────────────────────────────────────────────────────────────────
+# CONFIG  ← edit these two lines to match your HF model repo
+# ─────────────────────────────────────────────────────────────────────────────
+MODEL_REPO_ID  = "skarugu/myotube-unet"
+MODEL_FILENAME = "model_final.pt"
+CONF_FLAG_THR   = 0.60    # images below this confidence are queued for retraining
+QUEUE_DIR       = Path("retrain_queue")
+CORRECTIONS_DIR = Path("corrections")
+# ─────────────────────────────────────────────────────────────────────────────
+# Helpers  (identical to originals so nothing breaks)
+# ─────────────────────────────────────────────────────────────────────────────
+def sha256_file(path: str) -> str:
+    h = hashlib.sha256()
+    with open(path, "rb") as f:
+        for chunk in iter(lambda: f.read(1024 * 1024), b""):
+            h.update(chunk)
+    return h.hexdigest()
+def png_bytes(arr_u8: np.ndarray) -> bytes:
+    buf = io.BytesIO()
+    Image.fromarray(arr_u8).save(buf, format="PNG")
+    return buf.getvalue()
+def resize_u8_to_float01(ch_u8: np.ndarray, W: int, H: int,
+                          resample=Image.BILINEAR) -> np.ndarray:
+    im = Image.fromarray(ch_u8, mode="L").resize((W, H), resample=resample)
+    return np.array(im, dtype=np.float32) / 255.0
+def get_channel(rgb_u8: np.ndarray, source: str) -> np.ndarray:
+    if source == "Red":   return rgb_u8[..., 0]
+    if source == "Green": return rgb_u8[..., 1]
+    if source == "Blue":  return rgb_u8[..., 2]
+    return (0.299*rgb_u8[...,0] + 0.587*rgb_u8[...,1] + 0.114*rgb_u8[...,2]).astype(np.uint8)
+def hex_to_rgb(h: str):
+    h = h.lstrip("#")
+    return tuple(int(h[i:i+2], 16) for i in (0, 2, 4))
+# ─────────────────────────────────────────────────────────────────────────────
+# Postprocessing
+# ─────────────────────────────────────────────────────────────────────────────
+def postprocess_masks(nuc_mask, myo_mask,
+                      min_nuc_area=20, min_myo_area=500,
+                      myo_close_radius=3):
+    """Original closing-based postprocess — unchanged from V2."""
+    nuc_clean = remove_small_objects(
+        nuc_mask.astype(bool), min_size=int(min_nuc_area)
+    ).astype(np.uint8)
+    selem     = disk(int(myo_close_radius))
+    myo_bin   = closing(myo_mask.astype(bool), selem)
+    myo_bin   = opening(myo_bin, selem)
+    myo_clean = remove_small_objects(myo_bin, min_size=int(min_myo_area)).astype(np.uint8)
+    return nuc_clean, myo_clean
+def label_cc(mask: np.ndarray) -> np.ndarray:
+    lab, _ = ndi.label(mask.astype(np.uint8))
+    return lab
+def label_nuclei_watershed(nuc_bin: np.ndarray,
+                            min_distance: int = 3,
+                            min_nuc_area: int = 6) -> np.ndarray:
+    """Split touching nuclei via distance-transform watershed."""
+    nuc_bin = remove_small_objects(nuc_bin.astype(bool), min_size=min_nuc_area)
+    if nuc_bin.sum() == 0:
+        return np.zeros_like(nuc_bin, dtype=np.int32)
+    dist    = ndi.distance_transform_edt(nuc_bin)
+    coords  = peak_local_max(dist, labels=nuc_bin,
+                              min_distance=min_distance, exclude_border=False)
+    markers = np.zeros_like(nuc_bin, dtype=np.int32)
+    for i, (r, c) in enumerate(coords, start=1):
+        markers[r, c] = i
+    if markers.max() == 0:
+        return ndi.label(nuc_bin.astype(np.uint8))[0].astype(np.int32)
+    return watershed(-dist, markers, mask=nuc_bin).astype(np.int32)
+# ─────────────────────────────────────────────────────────────────────────────
+# Surface area  (new)
+# ─────────────────────────────────────────────────────────────────────────────
+def compute_surface_area(myo_mask: np.ndarray, px_um: float = 1.0) -> dict:
+    lab     = label_cc(myo_mask)
+    px_area = px_um ** 2
+    per     = [round(prop.area * px_area, 2) for prop in measure.regionprops(lab)]
+    return {
+        "total_area_um2"    : round(sum(per), 2),
+        "mean_area_um2"     : round(float(np.mean(per)) if per else 0.0, 2),
+        "max_area_um2"      : round(float(np.max(per))  if per else 0.0, 2),
+        "per_myotube_areas" : per,
+    }
+# ─────────────────────────────────────────────────────────────────────────────
+# Biological metrics  (counting + fusion + surface area)
+# ─────────────────────────────────────────────────────────────────────────────
+def compute_bio_metrics(nuc_mask, myo_mask,
+                        min_overlap_frac=0.1,
+                        nuc_ws_min_distance=3,
+                        nuc_ws_min_area=6,
+                        px_um=1.0) -> dict:
+    nuc_lab = label_nuclei_watershed(nuc_mask,
+                                     min_distance=nuc_ws_min_distance,
+                                     min_nuc_area=nuc_ws_min_area)
+    myo_lab = label_cc(myo_mask)
+    total   = int(nuc_lab.max())
+    pos, nm = 0, {}
+    for prop in measure.regionprops(nuc_lab):
+        coords = prop.coords
+        ids    = myo_lab[coords[:, 0], coords[:, 1]]
+        ids    = ids[ids > 0]
+        if ids.size == 0:
+            continue
+        unique, counts = np.unique(ids, return_counts=True)
+        mt   = int(unique[np.argmax(counts)])
+        frac = counts.max() / len(coords)
+        if frac >= min_overlap_frac:
+            pos += 1
+            nm.setdefault(mt, []).append(prop.label)
+    per   = [len(v) for v in nm.values()]
+    fused = sum(n for n in per if n >= 2)
+    fi    = 100.0 * fused / total if total else 0.0
+    pct   = 100.0 * pos   / total if total else 0.0
+    avg   = float(np.mean(per)) if per else 0.0
+    sa = compute_surface_area(myo_mask, px_um=px_um)
+    return {
+        "total_nuclei"             : total,
+        "myHC_positive_nuclei"     : int(pos),
+        "myHC_positive_percentage" : round(pct, 2),
+        "nuclei_fused"             : int(fused),
+        "myotube_count"            : int(len(per)),
+        "avg_nuclei_per_myotube"   : round(avg, 2),
+        "fusion_index"             : round(fi, 2),
+        "total_area_um2"           : sa["total_area_um2"],
+        "mean_area_um2"            : sa["mean_area_um2"],
+        "max_area_um2"             : sa["max_area_um2"],
+        "_per_myotube_areas"       : sa["per_myotube_areas"],  # _ prefix = kept out of CSV
+    }
+# ─────────────────────────────────────────────────────────────────────────────
+# Overlay helpers
+# ─────────────────────────────────────────────────────────────────────────────
+def make_simple_overlay(rgb_u8, nuc_mask, myo_mask, nuc_color, myo_color, alpha):
+    """Flat colour overlay — used for the ZIP export (fast, no matplotlib)."""
+    base  = rgb_u8.astype(np.float32)
+    H0, W0 = rgb_u8.shape[:2]
+    nuc   = np.array(Image.fromarray((nuc_mask*255).astype(np.uint8))
+                     .resize((W0, H0), Image.NEAREST)) > 0
+    myo   = np.array(Image.fromarray((myo_mask*255).astype(np.uint8))
+                     .resize((W0, H0), Image.NEAREST)) > 0
+    out   = base.copy()
+    for mask, color in [(myo, myo_color), (nuc, nuc_color)]:
+        c = np.array(color, dtype=np.float32)
+        out[mask] = (1 - alpha) * out[mask] + alpha * c
+    return np.clip(out, 0, 255).astype(np.uint8)
+def make_instance_overlay(rgb_u8: np.ndarray,
+                           nuc_lab: np.ndarray,
+                           myo_lab: np.ndarray,
+                           alpha: float = 0.45,
+                           label_nuclei: bool = True,
+                           label_myotubes: bool = True) -> np.ndarray:
+    """
+    Per-instance coloured overlay rendered with matplotlib.
+    Nuclei → cool colourmap with white numeric IDs.
+    Myotubes → autumn colourmap with M1, M2… IDs.
+    Returns RGB uint8 array at original image resolution.
+    """
+    orig_h, orig_w = rgb_u8.shape[:2]
+    nuc_cmap = plt.cm.get_cmap("cool")
+    myo_cmap = plt.cm.get_cmap("autumn")
+    def _resize_lab(lab, h, w):
+        return np.array(Image.fromarray(lab.astype(np.int32)).resize((w, h), Image.NEAREST))
+    nuc_disp = _resize_lab(nuc_lab, orig_h, orig_w)
+    myo_disp = _resize_lab(myo_lab, orig_h, orig_w)
+    base     = rgb_u8.astype(np.float32).copy()
+    n_myo    = int(myo_disp.max())
+    n_nuc    = int(nuc_disp.max())
+    if n_myo > 0:
+        myo_norm = (myo_disp / max(n_myo, 1)).astype(np.float32)
+        myo_rgba = (myo_cmap(myo_norm)[:, :, :3] * 255).astype(np.float32)
+        mask = myo_disp > 0
+        base[mask] = (1 - alpha) * base[mask] + alpha * myo_rgba[mask]
+    if n_nuc > 0:
+        nuc_norm = (nuc_disp / max(n_nuc, 1)).astype(np.float32)
+        nuc_rgba = (nuc_cmap(nuc_norm)[:, :, :3] * 255).astype(np.float32)
+        mask = nuc_disp > 0
+        base[mask] = (1 - alpha) * base[mask] + alpha * nuc_rgba[mask]
+    overlay = np.clip(base, 0, 255).astype(np.uint8)
+    dpi = 100
+    fig, ax = plt.subplots(figsize=(orig_w / dpi, orig_h / dpi), dpi=dpi)
+    ax.imshow(overlay)
+    ax.axis("off")
+    scale_x = orig_w / nuc_lab.shape[1]
+    scale_y = orig_h / nuc_lab.shape[0]
+    font_nuc = max(3, min(6, orig_w // 200))
+    font_myo = max(4, min(8, orig_w // 150))
+    if label_nuclei:
+        for prop in measure.regionprops(nuc_lab):
+            r, c = prop.centroid
+            ax.text(c * scale_x, r * scale_y, str(prop.label),
+                    fontsize=font_nuc, color="white", ha="center", va="center",
+                    fontweight="bold",
+                    bbox=dict(boxstyle="round,pad=0.1", fc="steelblue", alpha=0.6, lw=0))
+    if label_myotubes:
+        for prop in measure.regionprops(myo_lab):
+            r, c = prop.centroid
+            ax.text(c * scale_x, r * scale_y, f"M{prop.label}",
+                    fontsize=font_myo, color="white", ha="center", va="center",
+                    fontweight="bold",
+                    bbox=dict(boxstyle="round,pad=0.1", fc="darkred", alpha=0.6, lw=0))
+    patches = [
+        mpatches.Patch(color=nuc_cmap(0.7), label=f"Nuclei (n={n_nuc})"),
+        mpatches.Patch(color=myo_cmap(0.7), label=f"Myotubes (n={n_myo})"),
+    ]
+    ax.legend(handles=patches, loc="upper right", fontsize=max(5, orig_w // 200),
+              framealpha=0.75, facecolor="#111", labelcolor="white")
+    fig.tight_layout(pad=0)
+    buf = io.BytesIO()
+    fig.savefig(buf, format="png", bbox_inches="tight", pad_inches=0, dpi=dpi)
+    plt.close(fig)
+    buf.seek(0)
+    return np.array(Image.open(buf).convert("RGB"))
+# ─────────────────────────────────────────────────────────────────────────────
+# Animated counter
+# ─────────────────────────────────────────────────────────────────────────────
+def animated_metric(placeholder, label: str, final_val,
+                    color: str = "#4fc3f7", steps: int = 20, delay: float = 0.025):
+    is_float = isinstance(final_val, float)
+    for i in range(1, steps + 1):
+        v = final_val * i / steps
+        display = f"{v:.1f}" if is_float else str(int(v))
+        placeholder.markdown(
+            f"""
+            <div style='text-align:center;padding:12px 6px;border-radius:12px;
+                        background:#1a1a2e;border:1px solid #2a2a4e;margin:4px 0;'>
+                <div style='font-size:2rem;font-weight:800;color:{color};
+                            line-height:1.1;'>{display}</div>
+                <div style='font-size:0.75rem;color:#9e9e9e;margin-top:4px;'>{label}</div>
+            </div>
+            """,
+            unsafe_allow_html=True,
+        )
+        time.sleep(delay)
+# ─────────────────────────────────────────────────────────────────────────────
+# Active-learning queue helpers
+# ─────────────────────────────────────────────────────────────────────────────
+def _ensure_dirs():
+    QUEUE_DIR.mkdir(parents=True, exist_ok=True)
+    CORRECTIONS_DIR.mkdir(parents=True, exist_ok=True)
+def add_to_queue(image_array: np.ndarray, reason: str = "batch",
+                 nuc_mask=None, myo_mask=None, metadata: dict = None):
+    _ensure_dirs()
+    ts   = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
+    meta = {**(metadata or {}), "reason": reason, "timestamp": ts}
+    if nuc_mask is not None and myo_mask is not None:
+        folder = CORRECTIONS_DIR / ts
+        folder.mkdir(parents=True, exist_ok=True)
+        Image.fromarray(image_array).save(folder / "image.png")
+        Image.fromarray((nuc_mask > 0).astype(np.uint8) * 255).save(folder / "nuclei_mask.png")
+        Image.fromarray((myo_mask > 0).astype(np.uint8) * 255).save(folder / "myotube_mask.png")
+        (folder / "meta.json").write_text(json.dumps({**meta, "has_masks": True}, indent=2))
+    else:
+        Image.fromarray(image_array).save(QUEUE_DIR / f"{ts}.png")
+        (QUEUE_DIR / f"{ts}.json").write_text(json.dumps({**meta, "has_masks": False}, indent=2))
+# ─────────────────────────────────────────────────────────────────────────────
+# Model  (architecture identical to training script)
+# ─────────────────────────────────────────────────────────────────────────────
+class DoubleConv(nn.Module):
+    def __init__(self, in_ch, out_ch):
+        super().__init__()
+        self.net = nn.Sequential(
+            nn.Conv2d(in_ch, out_ch, 3, padding=1), nn.BatchNorm2d(out_ch), nn.ReLU(True),
+            nn.Conv2d(out_ch, out_ch, 3, padding=1), nn.BatchNorm2d(out_ch), nn.ReLU(True),
+        )
+    def forward(self, x): return self.net(x)
+class UNet(nn.Module):
+    def __init__(self, in_ch=2, out_ch=2, base=32):
+        super().__init__()
+        self.d1 = DoubleConv(in_ch, base);    self.p1 = nn.MaxPool2d(2)
+        self.d2 = DoubleConv(base,   base*2); self.p2 = nn.MaxPool2d(2)
+        self.d3 = DoubleConv(base*2, base*4); self.p3 = nn.MaxPool2d(2)
+        self.d4 = DoubleConv(base*4, base*8); self.p4 = nn.MaxPool2d(2)
+        self.bn = DoubleConv(base*8, base*16)
+        self.u4 = nn.ConvTranspose2d(base*16, base*8, 2, 2); self.du4 = DoubleConv(base*16, base*8)
+        self.u3 = nn.ConvTranspose2d(base*8,  base*4, 2, 2); self.du3 = DoubleConv(base*8,  base*4)
+        self.u2 = nn.ConvTranspose2d(base*4,  base*2, 2, 2); self.du2 = DoubleConv(base*4,  base*2)
+        self.u1 = nn.ConvTranspose2d(base*2,  base,   2, 2); self.du1 = DoubleConv(base*2,  base)
+        self.out = nn.Conv2d(base, out_ch, 1)
+    def forward(self, x):
+        d1=self.d1(x); p1=self.p1(d1)
+        d2=self.d2(p1); p2=self.p2(d2)
+        d3=self.d3(p2); p3=self.p3(d3)
+        d4=self.d4(p3); p4=self.p4(d4)
+        b=self.bn(p4)
+        x=self.u4(b);  x=torch.cat([x,d4],1); x=self.du4(x)
+        x=self.u3(x);  x=torch.cat([x,d3],1); x=self.du3(x)
+        x=self.u2(x);  x=torch.cat([x,d2],1); x=self.du2(x)
+        x=self.u1(x);  x=torch.cat([x,d1],1); x=self.du1(x)
+        return self.out(x)
+@st.cache_resource
+def load_model(device: str):
+    local    = hf_hub_download(repo_id=MODEL_REPO_ID, filename=MODEL_FILENAME,
+                               force_download=True)
+    file_sha = sha256_file(local)
+    mtime    = time.ctime(os.path.getmtime(local))
+    size_mb  = os.path.getsize(local) / 1e6
+    st.sidebar.markdown("### 🔍 Model debug")
+    st.sidebar.caption(f"Repo: `{MODEL_REPO_ID}`")
+    st.sidebar.caption(f"File: `{MODEL_FILENAME}`")
+    st.sidebar.caption(f"Size: {size_mb:.2f} MB")
+    st.sidebar.caption(f"Modified: {mtime}")
+    st.sidebar.caption(f"SHA256: `{file_sha[:20]}…`")
+    ckpt  = torch.load(local, map_location=device)
+    state = ckpt["model"] if isinstance(ckpt, dict) and "model" in ckpt else ckpt
+    model = UNet(in_ch=2, out_ch=2, base=32)
+    model.load_state_dict(state)
+    model.to(device).eval()
+    return model
+# ────────────────────────────────────────────────────��────────────────────────
+# PAGE CONFIG  +  CSS
+# ─────────────────────────────────────────────────────────────────────────────
+st.set_page_config(page_title="MyoSight — Myotube Analyser",
+                   layout="wide", page_icon="🔬")
+st.markdown("""
+<style>
+body, .stApp { background:#0e0e1a; color:#e0e0e0; }
+.block-container { max-width:1200px; padding-top:1.25rem; }
+h1,h2,h3,h4 { color:#90caf9; }
+.flag-box {
+    background:#3e1a1a; border-left:4px solid #ef5350;
+    padding:10px 16px; border-radius:8px; margin:8px 0;
+}
+</style>
+""", unsafe_allow_html=True)
+st.title("🔬 MyoSight — Myotube & Nuclei Analyser")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# ─────────────────────────────────────────────────────────────────────────────
+# SIDEBAR
+# ─────────────────────────────────────────────────────────────────────────────
+with st.sidebar:
+    st.caption(f"Device: **{device}**")
+    st.header("Input mapping")
+    src1 = st.selectbox("Model channel 1 (MyHC / myotubes)",
+                        ["Red", "Green", "Blue", "Grayscale"], index=0)
+    inv1 = st.checkbox("Invert channel 1", value=False)
+    src2 = st.selectbox("Model channel 2 (DAPI / nuclei)",
+                        ["Red", "Green", "Blue", "Grayscale"], index=2)
+    inv2 = st.checkbox("Invert channel 2", value=False)
+    st.header("Preprocessing")
+    image_size = st.select_slider("Model input size",
+                                  options=[256, 384, 512, 640, 768, 1024], value=512)
+    st.header("Thresholds")
+    thr_nuc = st.slider("Nuclei threshold",  0.05, 0.95, 0.50, 0.01)
+    thr_myo = st.slider("Myotube threshold", 0.05, 0.95, 0.50, 0.01)
+    st.header("Postprocessing")
+    min_nuc_area     = st.number_input("Min nucleus area (px)",  0, 10000,  20, 1)
+    min_myo_area     = st.number_input("Min myotube area (px)",  0, 200000, 500, 10)
+    myo_close_radius = st.number_input("Myotube close radius",   0, 50,     3,   1)
+    st.header("Watershed (nuclei splitting)")
+    nuc_ws_min_dist = st.number_input("Min watershed distance", 1, 30, 3, 1)
+    nuc_ws_min_area = st.number_input("Min watershed area (px)", 1, 500, 6, 1)
+    st.header("Overlay")
+    nuc_hex   = st.color_picker("Nuclei colour",  "#00FFFF")
+    myo_hex   = st.color_picker("Myotube colour", "#FF0000")
+    alpha     = st.slider("Overlay alpha", 0.0, 1.0, 0.45, 0.01)
+    nuc_rgb   = hex_to_rgb(nuc_hex)
+    myo_rgb   = hex_to_rgb(myo_hex)
+    label_nuc = st.checkbox("Show nucleus IDs on overlay",  value=True)
+    label_myo = st.checkbox("Show myotube IDs on overlay",  value=True)
+    st.header("Surface area")
+    px_um = st.number_input("Pixel size (µm) — set for real µm²",
+                             value=1.0, min_value=0.01, step=0.01)
+    st.header("Active learning")
+    enable_al = st.toggle("Enable correction upload", value=True)
+    st.header("Metric definitions")
+    with st.expander("Fusion Index"):
+        st.write("100 × (nuclei in myotubes with ≥2 nuclei) / total nuclei")
+    with st.expander("MyHC-positive nucleus"):
+        st.write("Counted if ≥10% of nucleus pixels overlap a myotube.")
+    with st.expander("Surface area"):
+        st.write("Pixel count × px_um². Set pixel size for real µm² values.")
+# ─────────────────────────────────────────────────────────────────────────────
+# FILE UPLOADER
+# ─────────────────────────────────────────────────────────────────────────────
+uploads = st.file_uploader(
+    "Upload 1+ images (png / jpg / tif).  Public Space — don't upload sensitive data.",
+    type=["png", "jpg", "jpeg", "tif", "tiff"],
+    accept_multiple_files=True,
+)
+for key in ("df", "artifacts", "zip_bytes", "bio_metrics"):
+    if key not in st.session_state:
+        st.session_state[key] = None
+if not uploads:
+    st.info("👆 Upload one or more fluorescence images to get started.")
+    st.stop()
+model = load_model(device=device)
+# ─────────────────────────────────────────────────────────────────────────────
+# RUN ANALYSIS
+# ─────────────────���───────────────────────────────────────────────────────────
+with st.form("run_form"):
+    run = st.form_submit_button("▶  Run / Rerun analysis", type="primary")
+if run:
+    results          = []
+    artifacts        = {}
+    all_bio_metrics  = {}
+    low_conf_flags   = []
+    zip_buf          = io.BytesIO()
+    with st.spinner("Analysing images…"):
+        with zipfile.ZipFile(zip_buf, "w", compression=zipfile.ZIP_DEFLATED) as zf:
+            prog = st.progress(0.0)
+            for i, up in enumerate(uploads):
+                name   = Path(up.name).stem
+                rgb_u8 = np.array(
+                    Image.open(io.BytesIO(up.getvalue())).convert("RGB"),
+                    dtype=np.uint8
+                )
+                ch1 = get_channel(rgb_u8, src1)
+                ch2 = get_channel(rgb_u8, src2)
+                if inv1: ch1 = 255 - ch1
+                if inv2: ch2 = 255 - ch2
+                H = W = int(image_size)
+                x1 = resize_u8_to_float01(ch1, W, H, Image.BILINEAR)
+                x2 = resize_u8_to_float01(ch2, W, H, Image.BILINEAR)
+                x  = np.stack([x1, x2], 0).astype(np.float32)
+                x_t = torch.from_numpy(x).unsqueeze(0).to(device)
+                with torch.no_grad():
+                    probs = torch.sigmoid(model(x_t)).cpu().numpy()[0]
+                # Confidence check
+                conf = float(np.mean([probs[0].max(), probs[1].max()]))
+                if conf < CONF_FLAG_THR:
+                    low_conf_flags.append((name, conf))
+                    add_to_queue(rgb_u8, reason="low_confidence",
+                                 metadata={"confidence": conf, "filename": up.name})
+                nuc_raw = (probs[0] > float(thr_nuc)).astype(np.uint8)
+                myo_raw = (probs[1] > float(thr_myo)).astype(np.uint8)
+                nuc_pp, myo_pp = postprocess_masks(
+                    nuc_raw, myo_raw,
+                    min_nuc_area=int(min_nuc_area),
+                    min_myo_area=int(min_myo_area),
+                    myo_close_radius=int(myo_close_radius),
+                )
+                # Flat overlay for ZIP
+                simple_ov = make_simple_overlay(
+                    rgb_u8, nuc_pp, myo_pp, nuc_rgb, myo_rgb, float(alpha)
+                )
+                # Instance overlay for display
+                nuc_lab = label_nuclei_watershed(nuc_pp,
+                                                  min_distance=int(nuc_ws_min_dist),
+                                                  min_nuc_area=int(nuc_ws_min_area))
+                myo_lab = label_cc(myo_pp)
+                inst_ov = make_instance_overlay(rgb_u8, nuc_lab, myo_lab,
+                                                alpha=float(alpha),
+                                                label_nuclei=label_nuc,
+                                                label_myotubes=label_myo)
+                bio = compute_bio_metrics(
+                    nuc_pp, myo_pp,
+                    nuc_ws_min_distance=int(nuc_ws_min_dist),
+                    nuc_ws_min_area=int(nuc_ws_min_area),
+                    px_um=float(px_um),
+                )
+                per_areas = bio.pop("_per_myotube_areas", [])
+                bio["image"] = name
+                results.append(bio)
+                all_bio_metrics[name] = {**bio, "_per_myotube_areas": per_areas}
+                artifacts[name] = {
+                    "original" : png_bytes(rgb_u8),
+                    "overlay"  : png_bytes(inst_ov),
+                    "nuc_pp"   : png_bytes((nuc_pp * 255).astype(np.uint8)),
+                    "myo_pp"   : png_bytes((myo_pp * 255).astype(np.uint8)),
+                }
+                # ZIP contents
+                zf.writestr(f"{name}/overlay.png",          png_bytes(simple_ov))
+                zf.writestr(f"{name}/instance_overlay.png", png_bytes(inst_ov))
+                zf.writestr(f"{name}/nuclei_pp.png",        artifacts[name]["nuc_pp"])
+                zf.writestr(f"{name}/myotube_pp.png",       artifacts[name]["myo_pp"])
+                zf.writestr(f"{name}/nuclei_raw.png",       png_bytes((nuc_raw*255).astype(np.uint8)))
+                zf.writestr(f"{name}/myotube_raw.png",      png_bytes((myo_raw*255).astype(np.uint8)))
+                prog.progress((i + 1) / len(uploads))
+            df = pd.DataFrame(results).sort_values("image")
+            zf.writestr("metrics.csv", df.to_csv(index=False).encode("utf-8"))
+    st.session_state.df          = df
+    st.session_state.artifacts   = artifacts
+    st.session_state.zip_bytes   = zip_buf.getvalue()
+    st.session_state.bio_metrics = all_bio_metrics
+    if low_conf_flags:
+        names_str = ", ".join(f"{n} (conf={c:.2f})" for n, c in low_conf_flags)
+        st.markdown(
+            f"<div class='flag-box'>⚠️ <b>Low-confidence images auto-queued for retraining:</b> "
+            f"{names_str}</div>",
+            unsafe_allow_html=True,
+        )
+if st.session_state.df is None:
+    st.info("Click **▶ Run / Rerun analysis** to generate results.")
+    st.stop()
+# ─────────────────────────────────────────────────────────────────────────────
+# RESULTS TABLE  +  DOWNLOADS
+# ─────────────────────────────────────────────────────────────────────────────
+st.subheader("📋 Results")
+display_cols = [c for c in st.session_state.df.columns if not c.startswith("_")]
+st.dataframe(st.session_state.df[display_cols], use_container_width=True, height=320)
+c1, c2 = st.columns(2)
+with c1:
+    st.download_button("⬇️ Download metrics.csv",
+                       st.session_state.df[display_cols].to_csv(index=False).encode(),
+                       file_name="metrics.csv", mime="text/csv")
+with c2:
+    st.download_button("⬇️ Download results.zip",
+                       st.session_state.zip_bytes,
+                       file_name="results.zip", mime="application/zip")
+st.divider()
+# ─────────────────────────────────────────────────────────────────────────────
+# PER-IMAGE PREVIEW  +  ANIMATED METRICS
+# ─────────────────────────────────────────────────────────────────────────────
+st.subheader("🖼️ Image preview & live metrics")
+names = list(st.session_state.artifacts.keys())
+pick  = st.selectbox("Select image", names)
+col_img, col_metrics = st.columns([3, 2], gap="large")
+with col_img:
+    tabs = st.tabs(["Instance overlay", "Original", "Nuclei mask", "Myotube mask"])
+    art  = st.session_state.artifacts[pick]
+    FIXED_W = 700
+    with tabs[0]: st.image(art["overlay"],  width=FIXED_W)
+    with tabs[1]: st.image(art["original"], width=FIXED_W)
+    with tabs[2]: st.image(art["nuc_pp"],   width=FIXED_W)
+    with tabs[3]: st.image(art["myo_pp"],   width=FIXED_W)
+with col_metrics:
+    st.markdown("#### 📊 Live metrics")
+    bio       = st.session_state.bio_metrics.get(pick, {})
+    per_areas = bio.get("_per_myotube_areas", [])
+    r1c1, r1c2, r1c3 = st.columns(3)
+    r2c1, r2c2, r2c3 = st.columns(3)
+    r3c1, r3c2, r3c3 = st.columns(3)
+    placeholders = {
+        "total_nuclei"            : r1c1.empty(),
+        "myotube_count"           : r1c2.empty(),
+        "myHC_positive_nuclei"    : r1c3.empty(),
+        "myHC_positive_percentage": r2c1.empty(),
+        "fusion_index"            : r2c2.empty(),
+        "avg_nuclei_per_myotube"  : r2c3.empty(),
+        "total_area_um2"          : r3c1.empty(),
+        "mean_area_um2"           : r3c2.empty(),
+        "max_area_um2"            : r3c3.empty(),
+    }
+    specs = [
+        ("total_nuclei",             "Total nuclei",      "#4fc3f7", False),
+        ("myotube_count",            "Myotubes",          "#ff8a65", False),
+        ("myHC_positive_nuclei",     "MyHC⁺ nuclei",      "#a5d6a7", False),
+        ("myHC_positive_percentage", "MyHC⁺ %",           "#ce93d8", True),
+        ("fusion_index",             "Fusion index %",     "#80cbc4", True),
+        ("avg_nuclei_per_myotube",   "Avg nuc/myotube",   "#80deea", True),
+        ("total_area_um2",           f"Total area (µm²)",  "#fff176", True),
+        ("mean_area_um2",            f"Mean area (µm²)",   "#ffcc80", True),
+        ("max_area_um2",             f"Max area (µm²)",    "#ef9a9a", True),
+    ]
+    for key, label, color, is_float in specs:
+        val = bio.get(key, 0)
+        animated_metric(placeholders[key], label,
+                        float(val) if is_float else int(val),
+                        color=color)
+    if per_areas:
+        st.markdown("#### 📐 Per-myotube area")
+        area_df = pd.DataFrame({
+            "Myotube"      : [f"M{i+1}" for i in range(len(per_areas))],
+            f"Area (µm²)"  : per_areas,
+        }).set_index("Myotube")
+        st.bar_chart(area_df, height=220)
+st.divider()
+# ─────────────────────────────────────────────────────────────────────────────
+# ACTIVE LEARNING — CORRECTION UPLOAD
+# ─────────────────────────────────────────────────────────────────────────────
+if enable_al:
+    st.subheader("🧠 Submit corrected labels (Active Learning)")
+    st.caption(
+        "Upload corrected binary masks for any image. "
+        "Corrections are saved to corrections/ and picked up "
+        "automatically by self_train.py at the next trigger check."
+    )
+    al_pick = st.selectbox("Correct masks for image", names, key="al_pick")
+    acol1, acol2 = st.columns(2)
+    with acol1:
+        corr_nuc = st.file_uploader("Corrected NUCLEI mask (PNG/TIF, binary 0/255)",
+                                    type=["png", "tif", "tiff"], key="nuc_corr")
+    with acol2:
+        corr_myo = st.file_uploader("Corrected MYOTUBE mask (PNG/TIF, binary 0/255)",
+                                    type=["png", "tif", "tiff"], key="myo_corr")
+    if st.button("✅ Submit corrections", type="primary"):
+        if corr_nuc is None or corr_myo is None:
+            st.error("Please upload BOTH a nuclei mask and a myotube mask.")
+        else:
+            orig_bytes = st.session_state.artifacts[al_pick]["original"]
+            orig_rgb   = np.array(Image.open(io.BytesIO(orig_bytes)).convert("RGB"))
+            nuc_arr    = (np.array(Image.open(corr_nuc).convert("L")) > 0).astype(np.uint8)
+            myo_arr    = (np.array(Image.open(corr_myo).convert("L")) > 0).astype(np.uint8)
+            add_to_queue(orig_rgb, nuc_mask=nuc_arr, myo_mask=myo_arr,
+                         reason="user_correction",
+                         metadata={"source_image": al_pick,
+                                   "timestamp": datetime.now().isoformat()})
+            st.success(
+                f"✅ Corrections for **{al_pick}** saved to `corrections/`. "
+                "The model will retrain at the next scheduled cycle."
+            )
+st.divider()
+# ─────────────────────────────────────────────────────────────────────────────
+# RETRAINING QUEUE STATUS
+# ─────────────────────────────────────────────────────────────────────────────
+with st.expander("🔧 Self-training queue status"):
+    _ensure_dirs()
+    q_items = list(QUEUE_DIR.glob("*.json"))
+    c_items = list(CORRECTIONS_DIR.glob("*/meta.json"))
+    sq1, sq2 = st.columns(2)
+    sq1.metric("Images in retraining queue", len(q_items))
+    sq2.metric("Corrected label pairs",      len(c_items))
+    if q_items:
+        reasons = {}
+        for p in q_items:
+            try:
+                r = json.loads(p.read_text()).get("reason", "unknown")
+                reasons[r] = reasons.get(r, 0) + 1
+            except Exception:
+                pass
+        st.write("Queue breakdown:", reasons)
+    manifest = Path("manifest.json")
+    if manifest.exists():
+        try:
+            history = json.loads(manifest.read_text())
+            if history:
+                st.markdown("**Last 5 retraining runs:**")
+                hist_df = pd.DataFrame(history[-5:])
+                st.dataframe(hist_df, use_container_width=True)
+        except Exception:
+            pass
+    if st.button("🔄 Trigger retraining now"):
+        import subprocess
+        subprocess.Popen(["python", "self_train.py", "--manual"])
+        st.info("Retraining started in the background. Check terminal / logs for progress.")