skarugu commited on
Commit
7fca4c6
Β·
1 Parent(s): 8e73848

Starting Streamlit Space structure

Browse files
Files changed (7) hide show
  1. .streamlit/config.toml +9 -0
  2. Dockerfile +17 -6
  3. README.md +2 -1
  4. model_final.pt +3 -0
  5. requirements.txt +10 -2
  6. self_train.py +499 -0
  7. src/streamlit_app.py +791 -33
.streamlit/config.toml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ [server]
2
+ headless = true
3
+ port = 8501
4
+ address = "0.0.0.0"
5
+ enableCORS = false
6
+ enableXsrfProtection = false
7
+
8
+ [browser]
9
+ gatherUsageStats = false
Dockerfile CHANGED
@@ -1,20 +1,31 @@
1
- FROM python:3.13.5-slim
2
 
3
  WORKDIR /app
4
 
5
- RUN apt-get update && apt-get install -y \
6
  build-essential \
7
  curl \
8
  git \
9
- && rm -rf /var/lib/apt/lists/*
 
 
 
 
 
10
 
11
  COPY requirements.txt ./
12
  COPY src/ ./src/
 
13
 
14
- RUN pip3 install -r requirements.txt
15
 
16
  EXPOSE 8501
17
 
18
- HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
 
 
 
 
 
 
19
 
20
- ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
1
+ FROM python:3.11-slim
2
 
3
  WORKDIR /app
4
 
5
+ RUN apt-get update && apt-get install -y --no-install-recommends \
6
  build-essential \
7
  curl \
8
  git \
9
+ libglib2.0-0 \
10
+ libsm6 \
11
+ libxrender1 \
12
+ libxext6 \
13
+ libtiff6 \
14
+ && rm -rf /var/lib/apt/lists/*
15
 
16
  COPY requirements.txt ./
17
  COPY src/ ./src/
18
+ COPY .streamlit/ ./.streamlit/
19
 
20
+ RUN pip install --no-cache-dir -r requirements.txt
21
 
22
  EXPOSE 8501
23
 
24
+ ENV STREAMLIT_SERVER_HEADLESS=true
25
+ ENV STREAMLIT_BROWSER_GATHER_USAGE_STATS=false
26
+
27
+ HEALTHCHECK --interval=30s --timeout=5s --start-period=30s --retries=3 \
28
+ CMD curl --fail http://localhost:8501/_stcore/health || exit 1
29
+
30
+ ENTRYPOINT ["sh", "-c", "streamlit run src/streamlit_app.py --server.port=${PORT:-8501} --server.address=0.0.0.0"]
31
 
 
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Myosight
3
  emoji: πŸš€
4
  colorFrom: red
5
  colorTo: red
@@ -9,6 +9,7 @@ tags:
9
  - streamlit
10
  pinned: false
11
  short_description: Streamlit template space
 
12
  ---
13
 
14
  # Welcome to Streamlit!
 
1
  ---
2
+ title: myosight
3
  emoji: πŸš€
4
  colorFrom: red
5
  colorTo: red
 
9
  - streamlit
10
  pinned: false
11
  short_description: Streamlit template space
12
+ license: mit
13
  ---
14
 
15
  # Welcome to Streamlit!
model_final.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dae27460af830a53ac184453980f3609c4adc9d0839db2d67e77fe1a41839de9
3
+ size 31130023
requirements.txt CHANGED
@@ -1,3 +1,11 @@
1
- altair
 
 
 
 
 
 
 
 
2
  pandas
3
- streamlit
 
1
+ streamlit
2
+ torch
3
+ torchvision
4
+ numpy
5
+ pillow
6
+ scikit-image
7
+ scipy
8
+ huggingface_hub
9
+ matplotlib
10
  pandas
11
+ apscheduler
self_train.py ADDED
@@ -0,0 +1,499 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ self_train.py
3
+ =============
4
+ Autonomous continual-learning pipeline for MyoSight.
5
+ Place at the ROOT of your Hugging Face Space repo (same level as Dockerfile).
6
+
7
+ IMPORTANT: This file is completely self-contained.
8
+ It does NOT import from train_myotube_nuclei_unet.py.
9
+ The train script is a separate PyCharm tool.
10
+
11
+ Trigger conditions (any one fires a retrain):
12
+ 1. User submitted corrected label pairs via the app β†’ corrections/ folder
13
+ 2. N unlabelled images accumulated in queue β†’ retrain_queue/
14
+ 3. K consecutive low-confidence images β†’ retrain_queue/ (reason=low_confidence)
15
+ 4. Nightly scheduled run β†’ APScheduler cron 02:00 UTC
16
+
17
+ After each retrain:
18
+ β€’ Fine-tunes from current HF Hub weights
19
+ β€’ Validates on held-out 20% split
20
+ β€’ Only pushes to Hub if new Dice > previous best
21
+ β€’ Archives queue β†’ runs/<run_id>/processed_queue/
22
+ β€’ Appends entry to manifest.json
23
+
24
+ Usage:
25
+ python self_train.py # check triggers once
26
+ python self_train.py --manual # force retrain now
27
+ python self_train.py --scheduler # blocking APScheduler loop (for Docker)
28
+
29
+ Environment variables / HF Secrets:
30
+ HF_TOKEN write-access Hugging Face token
31
+ HF_REPO_ID model repo, e.g. "skarugu/myotube-unet"
32
+ HF_FILENAME model filename, e.g. "model_final.pt"
33
+ DATA_ROOT path to base training data/ folder
34
+ BATCH_TRIGGER_N images before batch trigger (default 20)
35
+ CONF_DROP_K consecutive low-conf before trigger (default 5)
36
+ FT_EPOCHS fine-tuning epochs per run (default 10)
37
+ FT_LR fine-tuning learning rate (default 5e-4)
38
+ SCHEDULE_HOUR nightly retrain UTC hour (default 2)
39
+ """
40
+
41
+ import argparse
42
+ import json
43
+ import logging
44
+ import os
45
+ import random
46
+ import shutil
47
+ import tempfile
48
+ from datetime import datetime
49
+ from pathlib import Path
50
+ from typing import Optional
51
+
52
+ import numpy as np
53
+ import scipy.ndimage as ndi
54
+ import torch
55
+ import torch.nn as nn
56
+ from PIL import Image
57
+ from huggingface_hub import HfApi, hf_hub_download
58
+ from skimage import measure
59
+ from skimage.feature import peak_local_max
60
+ from skimage.morphology import disk, opening, remove_small_objects
61
+ from skimage.segmentation import watershed
62
+ from torch.utils.data import DataLoader, Dataset, random_split
63
+
64
+ try:
65
+ from apscheduler.schedulers.blocking import BlockingScheduler
66
+ HAS_SCHEDULER = True
67
+ except ImportError:
68
+ HAS_SCHEDULER = False
69
+
70
+ # ─────────────────────────────────────────────────────────────────────────────
71
+ # Configuration
72
+ # ─────────────────────────────────────────────────────────────────────────────
73
+
74
+ ROOT = Path(__file__).parent
75
+
76
+ HF_REPO_ID = os.environ.get("HF_REPO_ID", "skarugu/myotube-unet")
77
+ HF_FILENAME = os.environ.get("HF_FILENAME", "model_final.pt")
78
+ HF_TOKEN = os.environ.get("HF_TOKEN", None)
79
+ DATA_ROOT = os.environ.get("DATA_ROOT", str(ROOT / "data"))
80
+
81
+ BATCH_TRIGGER_N = int(os.environ.get("BATCH_TRIGGER_N", 20))
82
+ CONF_DROP_K = int(os.environ.get("CONF_DROP_K", 5))
83
+ CONF_FLAG_THR = float(os.environ.get("CONF_FLAG_THR", 0.60))
84
+ SCHEDULE_HOUR = int(os.environ.get("SCHEDULE_HOUR", 2))
85
+ FT_EPOCHS = int(os.environ.get("FT_EPOCHS", 10))
86
+ FT_LR = float(os.environ.get("FT_LR", 5e-4))
87
+ FT_BATCH_SIZE = int(os.environ.get("FT_BATCH_SIZE", 4))
88
+ IMAGE_SIZE = int(os.environ.get("IMAGE_SIZE", 512))
89
+
90
+ QUEUE_DIR = ROOT / "retrain_queue"
91
+ CORRECTIONS_DIR = ROOT / "corrections"
92
+ RUNS_DIR = ROOT / "runs"
93
+ STATE_PATH = ROOT / "self_train_state.json"
94
+ MANIFEST_PATH = ROOT / "manifest.json"
95
+
96
+ logging.basicConfig(
97
+ level=logging.INFO,
98
+ format="%(asctime)s [%(levelname)s] %(message)s",
99
+ datefmt="%Y-%m-%d %H:%M:%S",
100
+ )
101
+ log = logging.getLogger("self_train")
102
+
103
+
104
+ # ─────────────────────────────────────────────────────────────────────────────
105
+ # State helpers
106
+ # ─────────────────────────────────────────────────────────────────────────────
107
+
108
+ def _load_state() -> dict:
109
+ if STATE_PATH.exists():
110
+ return json.loads(STATE_PATH.read_text())
111
+ return {"best_dice": 0.0, "last_retrain_ts": None, "current_hf_sha": None}
112
+
113
+ def _save_state(s: dict): STATE_PATH.write_text(json.dumps(s, indent=2))
114
+
115
+ def _load_manifest() -> list:
116
+ return json.loads(MANIFEST_PATH.read_text()) if MANIFEST_PATH.exists() else []
117
+
118
+ def _save_manifest(m: list): MANIFEST_PATH.write_text(json.dumps(m, indent=2, default=str))
119
+
120
+
121
+ # ─────────────────────────────────────────────────────────────────────────────
122
+ # Trigger checks
123
+ # ─────────────────────────────────────────────────────────────────────────────
124
+
125
+ def should_retrain(force=False):
126
+ if force:
127
+ return True, "manual"
128
+
129
+ corrections = list(CORRECTIONS_DIR.glob("*/meta.json")) if CORRECTIONS_DIR.exists() else []
130
+ if corrections:
131
+ return True, f"user_correction ({len(corrections)} pairs)"
132
+
133
+ q_jsons = list(QUEUE_DIR.glob("*.json")) if QUEUE_DIR.exists() else []
134
+ if len(q_jsons) >= BATCH_TRIGGER_N:
135
+ return True, f"batch_trigger ({len(q_jsons)} images)"
136
+
137
+ low_conf = sum(
138
+ 1 for jf in q_jsons
139
+ if json.loads(jf.read_text()).get("reason") == "low_confidence"
140
+ ) if q_jsons else 0
141
+ if low_conf >= CONF_DROP_K:
142
+ return True, f"confidence_drop ({low_conf} low-conf images)"
143
+
144
+ return False, "none"
145
+
146
+
147
+ # ─────────────────────────────────────────────────────────────────────────────
148
+ # Model definition (must be identical to the training script)
149
+ # ─────────────────────────────────────────────────────────────────────────────
150
+
151
+ class DoubleConv(nn.Module):
152
+ def __init__(self, in_ch, out_ch):
153
+ super().__init__()
154
+ self.net = nn.Sequential(
155
+ nn.Conv2d(in_ch, out_ch, 3, padding=1), nn.BatchNorm2d(out_ch), nn.ReLU(True),
156
+ nn.Conv2d(out_ch, out_ch, 3, padding=1), nn.BatchNorm2d(out_ch), nn.ReLU(True),
157
+ )
158
+ def forward(self, x): return self.net(x)
159
+
160
+
161
+ class UNet(nn.Module):
162
+ def __init__(self, in_ch=2, out_ch=2, base=32):
163
+ super().__init__()
164
+ self.d1=DoubleConv(in_ch,base); self.p1=nn.MaxPool2d(2)
165
+ self.d2=DoubleConv(base,base*2); self.p2=nn.MaxPool2d(2)
166
+ self.d3=DoubleConv(base*2,base*4); self.p3=nn.MaxPool2d(2)
167
+ self.d4=DoubleConv(base*4,base*8); self.p4=nn.MaxPool2d(2)
168
+ self.bn=DoubleConv(base*8,base*16)
169
+ self.u4=nn.ConvTranspose2d(base*16,base*8,2,2); self.du4=DoubleConv(base*16,base*8)
170
+ self.u3=nn.ConvTranspose2d(base*8,base*4,2,2); self.du3=DoubleConv(base*8,base*4)
171
+ self.u2=nn.ConvTranspose2d(base*4,base*2,2,2); self.du2=DoubleConv(base*4,base*2)
172
+ self.u1=nn.ConvTranspose2d(base*2,base,2,2); self.du1=DoubleConv(base*2,base)
173
+ self.out=nn.Conv2d(base,out_ch,1)
174
+
175
+ def forward(self, x):
176
+ d1=self.d1(x); p1=self.p1(d1)
177
+ d2=self.d2(p1); p2=self.p2(d2)
178
+ d3=self.d3(p2); p3=self.p3(d3)
179
+ d4=self.d4(p3); p4=self.p4(d4)
180
+ b=self.bn(p4)
181
+ x=self.u4(b); x=torch.cat([x,d4],1); x=self.du4(x)
182
+ x=self.u3(x); x=torch.cat([x,d3],1); x=self.du3(x)
183
+ x=self.u2(x); x=torch.cat([x,d2],1); x=self.du2(x)
184
+ x=self.u1(x); x=torch.cat([x,d1],1); x=self.du1(x)
185
+ return self.out(x)
186
+
187
+
188
+ # ─────────────────────────────────────────────────────────────────────────────
189
+ # Minimal Dataset for fine-tuning
190
+ # ─────────────────────────────────────────────────────────────────────────────
191
+
192
+ class _FTDataset(Dataset):
193
+ IMG_EXTS = {".jpg", ".jpeg", ".png", ".tif", ".tiff"}
194
+
195
+ def __init__(self, root, size=512, augment=True):
196
+ root = Path(root)
197
+ img_dir = root / "images"
198
+ nuc_dir = root / "masks" / "Nuclei_m"
199
+ myo_dir = root / "masks" / "Myotubes_m"
200
+
201
+ imgs = sorted([p for p in img_dir.glob("*") if p.suffix.lower() in self.IMG_EXTS])
202
+ self.samples = []
203
+ for p in imgs:
204
+ nuc = self._mp(nuc_dir, p.stem)
205
+ myo = self._mp(myo_dir, p.stem)
206
+ if nuc and myo:
207
+ self.samples.append((p, nuc, myo))
208
+
209
+ if not self.samples:
210
+ raise FileNotFoundError(f"No labelled samples found under {root}")
211
+
212
+ self.size = size
213
+ self.augment = augment
214
+
215
+ @staticmethod
216
+ def _mp(d, stem):
217
+ for ext in (".tif", ".tiff", ".png"):
218
+ p = d / f"{stem}{ext}"
219
+ if p.exists(): return p
220
+ return None
221
+
222
+ def __len__(self): return len(self.samples)
223
+
224
+ def __getitem__(self, idx):
225
+ ip, np_, mp = self.samples[idx]
226
+ rgb = np.array(Image.open(ip).convert("RGB"), dtype=np.uint8)
227
+ H = W = self.size
228
+
229
+ def _ch(arr): return np.array(Image.fromarray(arr, "L").resize((W, H), Image.BILINEAR), dtype=np.float32) / 255.0
230
+ def _mk(p): return (np.array(Image.open(p).convert("L").resize((W, H), Image.NEAREST)) > 0).astype(np.uint8)
231
+
232
+ red = _ch(rgb[..., 0])
233
+ blue = _ch(rgb[..., 2])
234
+ yn = _mk(np_)
235
+ ym = _mk(mp)
236
+
237
+ if self.augment:
238
+ f = np.stack([red, blue, np.zeros_like(red)], -1).astype(np.float32)
239
+ for ax in [1, 0]:
240
+ if random.random() < 0.5:
241
+ f = np.flip(f, ax); yn = np.flip(yn, ax); ym = np.flip(ym, ax)
242
+ k = random.randint(0, 3)
243
+ if k: f = np.rot90(f, k); yn = np.rot90(yn, k); ym = np.rot90(ym, k)
244
+ red, blue = f[..., 0], f[..., 1]
245
+
246
+ x = np.stack([red, blue], 0).astype(np.float32)
247
+ y = np.stack([yn, ym], 0).astype(np.float32)
248
+ return torch.from_numpy(x.copy()), torch.from_numpy(y.copy()), ip.stem
249
+
250
+
251
+ # ─────────────────────────────────────────────────────────────────────────────
252
+ # Loss + Dice
253
+ # ─────────────────────────────────────────────────────────────────────────────
254
+
255
+ class _BCEDice(nn.Module):
256
+ def __init__(self):
257
+ super().__init__()
258
+ self.bce = nn.BCEWithLogitsLoss()
259
+ def forward(self, logits, target):
260
+ bce = self.bce(logits, target)
261
+ p = torch.sigmoid(logits)
262
+ inter = (p * target).sum(dim=(2,3))
263
+ union = p.sum(dim=(2,3)) + target.sum(dim=(2,3))
264
+ dice = 1 - (2*inter+1e-6)/(union+1e-6)
265
+ return 0.5*bce + 0.5*dice.mean()
266
+
267
+ @torch.no_grad()
268
+ def _dice(probs, target, thr=0.5):
269
+ pred = (probs > thr).float()
270
+ inter = (pred * target).sum(dim=(2,3))
271
+ union = pred.sum(dim=(2,3)) + target.sum(dim=(2,3))
272
+ return ((2*inter+1e-6)/(union+1e-6)).mean(dim=0)
273
+
274
+
275
+ # ─────────────────────────────────────────────────────────────────────────────
276
+ # Prepare fine-tune data (base + corrections merged into a temp folder)
277
+ # ─────────────────────────────────────────────────────────────────────────────
278
+
279
+ def _prepare_data(base: str) -> str:
280
+ tmp = Path(tempfile.mkdtemp()) / "ft"
281
+ orig = Path(base)
282
+ if (orig / "images").exists():
283
+ shutil.copytree(str(orig), str(tmp), dirs_exist_ok=True)
284
+ else:
285
+ for sub in ("images", "masks/Nuclei_m", "masks/Myotubes_m"):
286
+ (tmp / sub).mkdir(parents=True, exist_ok=True)
287
+ log.warning("DATA_ROOT %s has no images/ β€” training on corrections only.", orig)
288
+
289
+ injected = 0
290
+ if CORRECTIONS_DIR.exists():
291
+ for meta_p in CORRECTIONS_DIR.glob("*/meta.json"):
292
+ folder = meta_p.parent
293
+ img, nuc, myo = folder/"image.png", folder/"nuclei_mask.png", folder/"myotube_mask.png"
294
+ if not (img.exists() and nuc.exists() and myo.exists()):
295
+ continue
296
+ stem = folder.name
297
+ shutil.copy(img, tmp/"images"/f"{stem}.png")
298
+ shutil.copy(nuc, tmp/"masks"/"Nuclei_m"/f"{stem}.png")
299
+ shutil.copy(myo, tmp/"masks"/"Myotubes_m"/f"{stem}.png")
300
+ injected += 1
301
+
302
+ log.info("Fine-tune data ready: %d correction(s) injected β†’ %s", injected, tmp)
303
+ return str(tmp)
304
+
305
+
306
+ # ─────────────────────────────────────────────────────────────────────────────
307
+ # HF Hub helpers
308
+ # ─────────────────────────────────────────────────────────────────────────────
309
+
310
+ def _load_from_hub():
311
+ path = hf_hub_download(repo_id=HF_REPO_ID, filename=HF_FILENAME,
312
+ token=HF_TOKEN, force_download=True)
313
+ ckpt = torch.load(path, map_location="cpu")
314
+ state = ckpt["model"] if isinstance(ckpt, dict) and "model" in ckpt else ckpt
315
+ model = UNet(in_ch=2, out_ch=2, base=32)
316
+ model.load_state_dict(state)
317
+ log.info("Loaded model from Hub (repo=%s, file=%s)", HF_REPO_ID, HF_FILENAME)
318
+ return model
319
+
320
+
321
+ def _push_to_hub(model_path: Path, metrics: dict, run_id: str) -> bool:
322
+ if not HF_TOKEN:
323
+ log.warning("HF_TOKEN not set β€” skipping Hub push.")
324
+ return False
325
+ api = HfApi(token=HF_TOKEN)
326
+ api.upload_file(
327
+ path_or_fileobj=str(model_path),
328
+ path_in_repo=HF_FILENAME,
329
+ repo_id=HF_REPO_ID,
330
+ repo_type="model",
331
+ commit_message=(f"Auto-retrain {run_id} | "
332
+ f"dice_nuc={metrics['dice_nuc']:.3f} "
333
+ f"dice_myo={metrics['dice_myo']:.3f}"),
334
+ )
335
+ api.upload_file(
336
+ path_or_fileobj=json.dumps({**metrics, "run_id": run_id,
337
+ "timestamp": datetime.now().isoformat()},
338
+ indent=2).encode(),
339
+ path_in_repo="auto_retrain_metrics.json",
340
+ repo_id=HF_REPO_ID,
341
+ repo_type="model",
342
+ commit_message=f"Metrics for auto-retrain {run_id}",
343
+ )
344
+ log.info("βœ… Pushed new weights to %s/%s", HF_REPO_ID, HF_FILENAME)
345
+ return True
346
+
347
+
348
+ # ─────────────────────────────────────────────────────────────────────────────
349
+ # Core retrain
350
+ # ─────────────────────────────────────────────────────────────────────────────
351
+
352
+ def run_retrain(reason: str = "scheduled"):
353
+ random.seed(42); np.random.seed(42); torch.manual_seed(42)
354
+ device = "cuda" if torch.cuda.is_available() else "cpu"
355
+ run_id = datetime.now().strftime("%Y%m%d_%H%M%S")
356
+ run_dir = RUNS_DIR / run_id
357
+ run_dir.mkdir(parents=True, exist_ok=True)
358
+
359
+ log.info("══ Retrain run %s | reason=%s | device=%s ══", run_id, reason, device)
360
+
361
+ ft_data = _prepare_data(DATA_ROOT)
362
+ try:
363
+ ds = _FTDataset(ft_data, size=IMAGE_SIZE, augment=True)
364
+ except FileNotFoundError as e:
365
+ log.error("No data: %s β€” aborting.", e)
366
+ return None
367
+
368
+ n_val = max(1, int(len(ds) * 0.2))
369
+ n_train = len(ds) - n_val
370
+ if n_train < 1:
371
+ log.warning("Only %d samples β€” need β‰₯2. Aborting.", len(ds))
372
+ return None
373
+
374
+ train_ds, val_ds = random_split(
375
+ ds, [n_train, n_val], generator=torch.Generator().manual_seed(42)
376
+ )
377
+ val_ds.dataset.augment = False
378
+ train_dl = DataLoader(train_ds, batch_size=FT_BATCH_SIZE, shuffle=True, num_workers=0)
379
+ val_dl = DataLoader(val_ds, batch_size=1, shuffle=False, num_workers=0)
380
+
381
+ model = _load_from_hub().to(device)
382
+ loss_fn = _BCEDice()
383
+ opt = torch.optim.Adam(model.parameters(), lr=FT_LR)
384
+ sched = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=FT_EPOCHS, eta_min=1e-5)
385
+
386
+ state = _load_state()
387
+ prev_best = state.get("best_dice", 0.0)
388
+ best_run_dice = -1.0
389
+ best_path = run_dir / "model_best.pt"
390
+
391
+ for ep in range(1, FT_EPOCHS + 1):
392
+ model.train()
393
+ for x, y, _ in train_dl:
394
+ x, y = x.to(device), y.to(device)
395
+ opt.zero_grad(); loss_fn(model(x), y).backward(); opt.step()
396
+ sched.step()
397
+
398
+ model.eval()
399
+ dices = []
400
+ with torch.no_grad():
401
+ for x, y, _ in val_dl:
402
+ probs = torch.sigmoid(model(x.to(device))).cpu()
403
+ dices.append(_dice(probs, y).numpy())
404
+ d = np.array(dices)
405
+ d_nuc, d_myo = float(d[:,0].mean()), float(d[:,1].mean())
406
+ score = (d_nuc + d_myo) / 2.0
407
+ log.info(" Ep %02d | dice_nuc=%.3f | dice_myo=%.3f | mean=%.3f", ep, d_nuc, d_myo, score)
408
+
409
+ if score > best_run_dice:
410
+ best_run_dice = score
411
+ torch.save({"model": model.state_dict()}, best_path)
412
+
413
+ metrics = {
414
+ "dice_nuc": round(d_nuc, 4),
415
+ "dice_myo": round(d_myo, 4),
416
+ "mean_dice": round(best_run_dice, 4),
417
+ "reason": reason,
418
+ "n_train": n_train,
419
+ "n_val": n_val,
420
+ }
421
+
422
+ pushed = False
423
+ log.info("Best this run: %.4f | Previous best: %.4f", best_run_dice, prev_best)
424
+ if best_run_dice > prev_best:
425
+ pushed = _push_to_hub(best_path, metrics, run_id)
426
+ state["best_dice"] = best_run_dice
427
+ state["current_hf_sha"] = str(best_path)
428
+ else:
429
+ log.info("New model did not beat previous best β€” NOT pushing.")
430
+
431
+ # Archive queue
432
+ archive = run_dir / "processed_queue"
433
+ archive.mkdir(parents=True, exist_ok=True)
434
+ for p in list(QUEUE_DIR.glob("*")) if QUEUE_DIR.exists() else []:
435
+ shutil.move(str(p), str(archive / p.name))
436
+ for folder in list(CORRECTIONS_DIR.glob("*")) if CORRECTIONS_DIR.exists() else []:
437
+ if folder.is_dir():
438
+ shutil.move(str(folder), str(archive / folder.name))
439
+
440
+ state["last_retrain_ts"] = datetime.now().isoformat()
441
+ _save_state(state)
442
+
443
+ manifest = _load_manifest()
444
+ manifest.append({"run_id": run_id, "timestamp": state["last_retrain_ts"],
445
+ "reason": reason, "metrics": metrics, "pushed": pushed})
446
+ _save_manifest(manifest)
447
+
448
+ log.info("══ Run %s complete | pushed=%s ══", run_id, pushed)
449
+ return metrics
450
+
451
+
452
+ # ─────────────────────────────────────────────────────────────────────────────
453
+ # Trigger check entry point
454
+ # ─────────────────────────────────────────────────────────────────────────────
455
+
456
+ def check_and_retrain(force=False):
457
+ ok, reason = should_retrain(force=force)
458
+ if ok:
459
+ log.info("Trigger met: %s β†’ retraining…", reason)
460
+ run_retrain(reason=reason)
461
+ else:
462
+ log.info("No trigger met β€” skipping.")
463
+
464
+
465
+ # ─────────────────────────────────────────────────────────────────────────────
466
+ # Scheduler
467
+ # ─────────────────────────────────────────────────────────────────────────────
468
+
469
+ def start_scheduler():
470
+ if not HAS_SCHEDULER:
471
+ log.error("APScheduler not installed. pip install apscheduler")
472
+ return
473
+ s = BlockingScheduler(timezone="UTC")
474
+ s.add_job(lambda: check_and_retrain(force=True),
475
+ "cron", hour=SCHEDULE_HOUR, minute=0, id="nightly")
476
+ s.add_job(check_and_retrain, "interval", minutes=30, id="poll")
477
+ log.info("Scheduler running. Nightly at %02d:00 UTC. Polling every 30 min.", SCHEDULE_HOUR)
478
+ try:
479
+ s.start()
480
+ except (KeyboardInterrupt, SystemExit):
481
+ log.info("Scheduler stopped.")
482
+
483
+
484
+ # ─────────────────────────────────────────────────────────────────────────────
485
+ # CLI
486
+ # ─────────────────────────────────────────────────────────────────────────────
487
+
488
+ if __name__ == "__main__":
489
+ ap = argparse.ArgumentParser()
490
+ ap.add_argument("--manual", action="store_true", help="Force retrain now")
491
+ ap.add_argument("--scheduler", action="store_true", help="Start blocking scheduler")
492
+ ap.add_argument("--data_root", default=None, help="Override DATA_ROOT env var")
493
+ a = ap.parse_args()
494
+ if a.data_root:
495
+ DATA_ROOT = a.data_root
496
+ if a.scheduler:
497
+ start_scheduler()
498
+ else:
499
+ check_and_retrain(force=a.manual)
src/streamlit_app.py CHANGED
@@ -1,40 +1,798 @@
1
- import altair as alt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import numpy as np
3
  import pandas as pd
 
 
4
  import streamlit as st
 
 
 
 
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
 
 
 
8
 
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
 
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
1
+ # src/streamlit_app.py
2
+ """
3
+ MyoSight β€” Myotube & Nuclei Analyser
4
+ ========================================
5
+ Drop-in replacement for streamlit_app.py on Hugging Face Spaces.
6
+
7
+ New features vs the original Myotube Analyzer V2:
8
+ ✦ Animated count-up metrics (9 counters)
9
+ ✦ Instance overlay β€” nucleus IDs (1,2,3…) + myotube IDs (M1,M2…)
10
+ ✦ Watershed nuclei splitting for accurate counts
11
+ ✦ Myotube surface area (total, mean, max ¡m²) + per-tube bar chart
12
+ ✦ Active learning β€” upload corrected masks β†’ saved to corrections/
13
+ ✦ Low-confidence auto-flagging β†’ image queued for retraining
14
+ ✦ Retraining queue status panel
15
+ ✦ All original sidebar controls preserved
16
+ """
17
+
18
+ import io
19
+ import os
20
+ import json
21
+ import time
22
+ import zipfile
23
+ import hashlib
24
+ from datetime import datetime
25
+ from pathlib import Path
26
+
27
  import numpy as np
28
  import pandas as pd
29
+ from PIL import Image
30
+
31
  import streamlit as st
32
+ import torch
33
+ import torch.nn as nn
34
+ import matplotlib
35
+ matplotlib.use("Agg")
36
+ import matplotlib.pyplot as plt
37
+ import matplotlib.patches as mpatches
38
+ from huggingface_hub import hf_hub_download
39
 
40
+ import scipy.ndimage as ndi
41
+ from skimage.morphology import remove_small_objects, disk, closing, opening
42
+ from skimage import measure
43
+ from skimage.segmentation import watershed
44
+ from skimage.feature import peak_local_max
45
 
 
 
 
46
 
47
+ # ─────────────────────────────────────────────────────────────────────────────
48
+ # CONFIG ← edit these two lines to match your HF model repo
49
+ # ─────────────────────────────────────────────────────────────────────────────
50
+ MODEL_REPO_ID = "skarugu/myotube-unet"
51
+ MODEL_FILENAME = "model_final.pt"
52
+
53
+ CONF_FLAG_THR = 0.60 # images below this confidence are queued for retraining
54
+ QUEUE_DIR = Path("retrain_queue")
55
+ CORRECTIONS_DIR = Path("corrections")
56
+
57
+
58
+ # ─────────────────────────────────────────────────────────────────────────────
59
+ # Helpers (identical to originals so nothing breaks)
60
+ # ─────────────────────────────────────────────────────────────────────────────
61
+
62
+ def sha256_file(path: str) -> str:
63
+ h = hashlib.sha256()
64
+ with open(path, "rb") as f:
65
+ for chunk in iter(lambda: f.read(1024 * 1024), b""):
66
+ h.update(chunk)
67
+ return h.hexdigest()
68
+
69
+
70
+ def png_bytes(arr_u8: np.ndarray) -> bytes:
71
+ buf = io.BytesIO()
72
+ Image.fromarray(arr_u8).save(buf, format="PNG")
73
+ return buf.getvalue()
74
+
75
+
76
+ def resize_u8_to_float01(ch_u8: np.ndarray, W: int, H: int,
77
+ resample=Image.BILINEAR) -> np.ndarray:
78
+ im = Image.fromarray(ch_u8, mode="L").resize((W, H), resample=resample)
79
+ return np.array(im, dtype=np.float32) / 255.0
80
+
81
+
82
+ def get_channel(rgb_u8: np.ndarray, source: str) -> np.ndarray:
83
+ if source == "Red": return rgb_u8[..., 0]
84
+ if source == "Green": return rgb_u8[..., 1]
85
+ if source == "Blue": return rgb_u8[..., 2]
86
+ return (0.299*rgb_u8[...,0] + 0.587*rgb_u8[...,1] + 0.114*rgb_u8[...,2]).astype(np.uint8)
87
+
88
+
89
+ def hex_to_rgb(h: str):
90
+ h = h.lstrip("#")
91
+ return tuple(int(h[i:i+2], 16) for i in (0, 2, 4))
92
+
93
+
94
+ # ─────────────────────────────────────────────────────────────────────────────
95
+ # Postprocessing
96
+ # ─────────────────────────────────────────────────────────────────────────────
97
+
98
+ def postprocess_masks(nuc_mask, myo_mask,
99
+ min_nuc_area=20, min_myo_area=500,
100
+ myo_close_radius=3):
101
+ """Original closing-based postprocess β€” unchanged from V2."""
102
+ nuc_clean = remove_small_objects(
103
+ nuc_mask.astype(bool), min_size=int(min_nuc_area)
104
+ ).astype(np.uint8)
105
+
106
+ selem = disk(int(myo_close_radius))
107
+ myo_bin = closing(myo_mask.astype(bool), selem)
108
+ myo_bin = opening(myo_bin, selem)
109
+ myo_clean = remove_small_objects(myo_bin, min_size=int(min_myo_area)).astype(np.uint8)
110
+
111
+ return nuc_clean, myo_clean
112
+
113
+
114
+ def label_cc(mask: np.ndarray) -> np.ndarray:
115
+ lab, _ = ndi.label(mask.astype(np.uint8))
116
+ return lab
117
+
118
+
119
+ def label_nuclei_watershed(nuc_bin: np.ndarray,
120
+ min_distance: int = 3,
121
+ min_nuc_area: int = 6) -> np.ndarray:
122
+ """Split touching nuclei via distance-transform watershed."""
123
+ nuc_bin = remove_small_objects(nuc_bin.astype(bool), min_size=min_nuc_area)
124
+ if nuc_bin.sum() == 0:
125
+ return np.zeros_like(nuc_bin, dtype=np.int32)
126
+
127
+ dist = ndi.distance_transform_edt(nuc_bin)
128
+ coords = peak_local_max(dist, labels=nuc_bin,
129
+ min_distance=min_distance, exclude_border=False)
130
+ markers = np.zeros_like(nuc_bin, dtype=np.int32)
131
+ for i, (r, c) in enumerate(coords, start=1):
132
+ markers[r, c] = i
133
+
134
+ if markers.max() == 0:
135
+ return ndi.label(nuc_bin.astype(np.uint8))[0].astype(np.int32)
136
+
137
+ return watershed(-dist, markers, mask=nuc_bin).astype(np.int32)
138
+
139
+
140
+ # ─────────────────────────────────────────────────────────────────────────────
141
+ # Surface area (new)
142
+ # ─────────────────────────────────────────────────────────────────────────────
143
+
144
+ def compute_surface_area(myo_mask: np.ndarray, px_um: float = 1.0) -> dict:
145
+ lab = label_cc(myo_mask)
146
+ px_area = px_um ** 2
147
+ per = [round(prop.area * px_area, 2) for prop in measure.regionprops(lab)]
148
+ return {
149
+ "total_area_um2" : round(sum(per), 2),
150
+ "mean_area_um2" : round(float(np.mean(per)) if per else 0.0, 2),
151
+ "max_area_um2" : round(float(np.max(per)) if per else 0.0, 2),
152
+ "per_myotube_areas" : per,
153
+ }
154
+
155
+
156
+ # ─────────────────────────────────────────────────────────────────────────────
157
+ # Biological metrics (counting + fusion + surface area)
158
+ # ─────────────────────────────────────────────────────────────────────────────
159
+
160
+ def compute_bio_metrics(nuc_mask, myo_mask,
161
+ min_overlap_frac=0.1,
162
+ nuc_ws_min_distance=3,
163
+ nuc_ws_min_area=6,
164
+ px_um=1.0) -> dict:
165
+ nuc_lab = label_nuclei_watershed(nuc_mask,
166
+ min_distance=nuc_ws_min_distance,
167
+ min_nuc_area=nuc_ws_min_area)
168
+ myo_lab = label_cc(myo_mask)
169
+ total = int(nuc_lab.max())
170
+
171
+ pos, nm = 0, {}
172
+ for prop in measure.regionprops(nuc_lab):
173
+ coords = prop.coords
174
+ ids = myo_lab[coords[:, 0], coords[:, 1]]
175
+ ids = ids[ids > 0]
176
+ if ids.size == 0:
177
+ continue
178
+ unique, counts = np.unique(ids, return_counts=True)
179
+ mt = int(unique[np.argmax(counts)])
180
+ frac = counts.max() / len(coords)
181
+ if frac >= min_overlap_frac:
182
+ pos += 1
183
+ nm.setdefault(mt, []).append(prop.label)
184
+
185
+ per = [len(v) for v in nm.values()]
186
+ fused = sum(n for n in per if n >= 2)
187
+ fi = 100.0 * fused / total if total else 0.0
188
+ pct = 100.0 * pos / total if total else 0.0
189
+ avg = float(np.mean(per)) if per else 0.0
190
+
191
+ sa = compute_surface_area(myo_mask, px_um=px_um)
192
+
193
+ return {
194
+ "total_nuclei" : total,
195
+ "myHC_positive_nuclei" : int(pos),
196
+ "myHC_positive_percentage" : round(pct, 2),
197
+ "nuclei_fused" : int(fused),
198
+ "myotube_count" : int(len(per)),
199
+ "avg_nuclei_per_myotube" : round(avg, 2),
200
+ "fusion_index" : round(fi, 2),
201
+ "total_area_um2" : sa["total_area_um2"],
202
+ "mean_area_um2" : sa["mean_area_um2"],
203
+ "max_area_um2" : sa["max_area_um2"],
204
+ "_per_myotube_areas" : sa["per_myotube_areas"], # _ prefix = kept out of CSV
205
+ }
206
+
207
+
208
+ # ─────────────────────────────────────────────────────────────────────────────
209
+ # Overlay helpers
210
+ # ─────────────────────────────────────────────────────────────────────────────
211
+
212
+ def make_simple_overlay(rgb_u8, nuc_mask, myo_mask, nuc_color, myo_color, alpha):
213
+ """Flat colour overlay β€” used for the ZIP export (fast, no matplotlib)."""
214
+ base = rgb_u8.astype(np.float32)
215
+ H0, W0 = rgb_u8.shape[:2]
216
+ nuc = np.array(Image.fromarray((nuc_mask*255).astype(np.uint8))
217
+ .resize((W0, H0), Image.NEAREST)) > 0
218
+ myo = np.array(Image.fromarray((myo_mask*255).astype(np.uint8))
219
+ .resize((W0, H0), Image.NEAREST)) > 0
220
+ out = base.copy()
221
+ for mask, color in [(myo, myo_color), (nuc, nuc_color)]:
222
+ c = np.array(color, dtype=np.float32)
223
+ out[mask] = (1 - alpha) * out[mask] + alpha * c
224
+ return np.clip(out, 0, 255).astype(np.uint8)
225
+
226
+
227
+ def make_instance_overlay(rgb_u8: np.ndarray,
228
+ nuc_lab: np.ndarray,
229
+ myo_lab: np.ndarray,
230
+ alpha: float = 0.45,
231
+ label_nuclei: bool = True,
232
+ label_myotubes: bool = True) -> np.ndarray:
233
+ """
234
+ Per-instance coloured overlay rendered with matplotlib.
235
+ Nuclei β†’ cool colourmap with white numeric IDs.
236
+ Myotubes β†’ autumn colourmap with M1, M2… IDs.
237
+ Returns RGB uint8 array at original image resolution.
238
+ """
239
+ orig_h, orig_w = rgb_u8.shape[:2]
240
+ nuc_cmap = plt.cm.get_cmap("cool")
241
+ myo_cmap = plt.cm.get_cmap("autumn")
242
+
243
+ def _resize_lab(lab, h, w):
244
+ return np.array(Image.fromarray(lab.astype(np.int32)).resize((w, h), Image.NEAREST))
245
+
246
+ nuc_disp = _resize_lab(nuc_lab, orig_h, orig_w)
247
+ myo_disp = _resize_lab(myo_lab, orig_h, orig_w)
248
+ base = rgb_u8.astype(np.float32).copy()
249
+ n_myo = int(myo_disp.max())
250
+ n_nuc = int(nuc_disp.max())
251
+
252
+ if n_myo > 0:
253
+ myo_norm = (myo_disp / max(n_myo, 1)).astype(np.float32)
254
+ myo_rgba = (myo_cmap(myo_norm)[:, :, :3] * 255).astype(np.float32)
255
+ mask = myo_disp > 0
256
+ base[mask] = (1 - alpha) * base[mask] + alpha * myo_rgba[mask]
257
+
258
+ if n_nuc > 0:
259
+ nuc_norm = (nuc_disp / max(n_nuc, 1)).astype(np.float32)
260
+ nuc_rgba = (nuc_cmap(nuc_norm)[:, :, :3] * 255).astype(np.float32)
261
+ mask = nuc_disp > 0
262
+ base[mask] = (1 - alpha) * base[mask] + alpha * nuc_rgba[mask]
263
+
264
+ overlay = np.clip(base, 0, 255).astype(np.uint8)
265
+
266
+ dpi = 100
267
+ fig, ax = plt.subplots(figsize=(orig_w / dpi, orig_h / dpi), dpi=dpi)
268
+ ax.imshow(overlay)
269
+ ax.axis("off")
270
+
271
+ scale_x = orig_w / nuc_lab.shape[1]
272
+ scale_y = orig_h / nuc_lab.shape[0]
273
+ font_nuc = max(3, min(6, orig_w // 200))
274
+ font_myo = max(4, min(8, orig_w // 150))
275
+
276
+ if label_nuclei:
277
+ for prop in measure.regionprops(nuc_lab):
278
+ r, c = prop.centroid
279
+ ax.text(c * scale_x, r * scale_y, str(prop.label),
280
+ fontsize=font_nuc, color="white", ha="center", va="center",
281
+ fontweight="bold",
282
+ bbox=dict(boxstyle="round,pad=0.1", fc="steelblue", alpha=0.6, lw=0))
283
+
284
+ if label_myotubes:
285
+ for prop in measure.regionprops(myo_lab):
286
+ r, c = prop.centroid
287
+ ax.text(c * scale_x, r * scale_y, f"M{prop.label}",
288
+ fontsize=font_myo, color="white", ha="center", va="center",
289
+ fontweight="bold",
290
+ bbox=dict(boxstyle="round,pad=0.1", fc="darkred", alpha=0.6, lw=0))
291
+
292
+ patches = [
293
+ mpatches.Patch(color=nuc_cmap(0.7), label=f"Nuclei (n={n_nuc})"),
294
+ mpatches.Patch(color=myo_cmap(0.7), label=f"Myotubes (n={n_myo})"),
295
+ ]
296
+ ax.legend(handles=patches, loc="upper right", fontsize=max(5, orig_w // 200),
297
+ framealpha=0.75, facecolor="#111", labelcolor="white")
298
+
299
+ fig.tight_layout(pad=0)
300
+ buf = io.BytesIO()
301
+ fig.savefig(buf, format="png", bbox_inches="tight", pad_inches=0, dpi=dpi)
302
+ plt.close(fig)
303
+ buf.seek(0)
304
+ return np.array(Image.open(buf).convert("RGB"))
305
+
306
+
307
+ # ─────────────────────────────────────────────────────────────────────────────
308
+ # Animated counter
309
+ # ─────────────────────────────────────────────────────────────────────────────
310
+
311
+ def animated_metric(placeholder, label: str, final_val,
312
+ color: str = "#4fc3f7", steps: int = 20, delay: float = 0.025):
313
+ is_float = isinstance(final_val, float)
314
+ for i in range(1, steps + 1):
315
+ v = final_val * i / steps
316
+ display = f"{v:.1f}" if is_float else str(int(v))
317
+ placeholder.markdown(
318
+ f"""
319
+ <div style='text-align:center;padding:12px 6px;border-radius:12px;
320
+ background:#1a1a2e;border:1px solid #2a2a4e;margin:4px 0;'>
321
+ <div style='font-size:2rem;font-weight:800;color:{color};
322
+ line-height:1.1;'>{display}</div>
323
+ <div style='font-size:0.75rem;color:#9e9e9e;margin-top:4px;'>{label}</div>
324
+ </div>
325
+ """,
326
+ unsafe_allow_html=True,
327
+ )
328
+ time.sleep(delay)
329
+
330
+
331
+ # ─────────────────────────────────────────────────────────────────────────────
332
+ # Active-learning queue helpers
333
+ # ─────────────────────────────────────────────────────────────────────────────
334
+
335
+ def _ensure_dirs():
336
+ QUEUE_DIR.mkdir(parents=True, exist_ok=True)
337
+ CORRECTIONS_DIR.mkdir(parents=True, exist_ok=True)
338
+
339
+
340
+ def add_to_queue(image_array: np.ndarray, reason: str = "batch",
341
+ nuc_mask=None, myo_mask=None, metadata: dict = None):
342
+ _ensure_dirs()
343
+ ts = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
344
+ meta = {**(metadata or {}), "reason": reason, "timestamp": ts}
345
+
346
+ if nuc_mask is not None and myo_mask is not None:
347
+ folder = CORRECTIONS_DIR / ts
348
+ folder.mkdir(parents=True, exist_ok=True)
349
+ Image.fromarray(image_array).save(folder / "image.png")
350
+ Image.fromarray((nuc_mask > 0).astype(np.uint8) * 255).save(folder / "nuclei_mask.png")
351
+ Image.fromarray((myo_mask > 0).astype(np.uint8) * 255).save(folder / "myotube_mask.png")
352
+ (folder / "meta.json").write_text(json.dumps({**meta, "has_masks": True}, indent=2))
353
+ else:
354
+ Image.fromarray(image_array).save(QUEUE_DIR / f"{ts}.png")
355
+ (QUEUE_DIR / f"{ts}.json").write_text(json.dumps({**meta, "has_masks": False}, indent=2))
356
+
357
+
358
+ # ─────────────────────────────────────────────────────────────────────────────
359
+ # Model (architecture identical to training script)
360
+ # ─────────────────────────────────────────────────────────────────────────────
361
+
362
+ class DoubleConv(nn.Module):
363
+ def __init__(self, in_ch, out_ch):
364
+ super().__init__()
365
+ self.net = nn.Sequential(
366
+ nn.Conv2d(in_ch, out_ch, 3, padding=1), nn.BatchNorm2d(out_ch), nn.ReLU(True),
367
+ nn.Conv2d(out_ch, out_ch, 3, padding=1), nn.BatchNorm2d(out_ch), nn.ReLU(True),
368
+ )
369
+ def forward(self, x): return self.net(x)
370
+
371
+
372
+ class UNet(nn.Module):
373
+ def __init__(self, in_ch=2, out_ch=2, base=32):
374
+ super().__init__()
375
+ self.d1 = DoubleConv(in_ch, base); self.p1 = nn.MaxPool2d(2)
376
+ self.d2 = DoubleConv(base, base*2); self.p2 = nn.MaxPool2d(2)
377
+ self.d3 = DoubleConv(base*2, base*4); self.p3 = nn.MaxPool2d(2)
378
+ self.d4 = DoubleConv(base*4, base*8); self.p4 = nn.MaxPool2d(2)
379
+ self.bn = DoubleConv(base*8, base*16)
380
+ self.u4 = nn.ConvTranspose2d(base*16, base*8, 2, 2); self.du4 = DoubleConv(base*16, base*8)
381
+ self.u3 = nn.ConvTranspose2d(base*8, base*4, 2, 2); self.du3 = DoubleConv(base*8, base*4)
382
+ self.u2 = nn.ConvTranspose2d(base*4, base*2, 2, 2); self.du2 = DoubleConv(base*4, base*2)
383
+ self.u1 = nn.ConvTranspose2d(base*2, base, 2, 2); self.du1 = DoubleConv(base*2, base)
384
+ self.out = nn.Conv2d(base, out_ch, 1)
385
+
386
+ def forward(self, x):
387
+ d1=self.d1(x); p1=self.p1(d1)
388
+ d2=self.d2(p1); p2=self.p2(d2)
389
+ d3=self.d3(p2); p3=self.p3(d3)
390
+ d4=self.d4(p3); p4=self.p4(d4)
391
+ b=self.bn(p4)
392
+ x=self.u4(b); x=torch.cat([x,d4],1); x=self.du4(x)
393
+ x=self.u3(x); x=torch.cat([x,d3],1); x=self.du3(x)
394
+ x=self.u2(x); x=torch.cat([x,d2],1); x=self.du2(x)
395
+ x=self.u1(x); x=torch.cat([x,d1],1); x=self.du1(x)
396
+ return self.out(x)
397
+
398
+
399
+ @st.cache_resource
400
+ def load_model(device: str):
401
+ local = hf_hub_download(repo_id=MODEL_REPO_ID, filename=MODEL_FILENAME,
402
+ force_download=True)
403
+ file_sha = sha256_file(local)
404
+ mtime = time.ctime(os.path.getmtime(local))
405
+ size_mb = os.path.getsize(local) / 1e6
406
+
407
+ st.sidebar.markdown("### πŸ” Model debug")
408
+ st.sidebar.caption(f"Repo: `{MODEL_REPO_ID}`")
409
+ st.sidebar.caption(f"File: `{MODEL_FILENAME}`")
410
+ st.sidebar.caption(f"Size: {size_mb:.2f} MB")
411
+ st.sidebar.caption(f"Modified: {mtime}")
412
+ st.sidebar.caption(f"SHA256: `{file_sha[:20]}…`")
413
+
414
+ ckpt = torch.load(local, map_location=device)
415
+ state = ckpt["model"] if isinstance(ckpt, dict) and "model" in ckpt else ckpt
416
+ model = UNet(in_ch=2, out_ch=2, base=32)
417
+ model.load_state_dict(state)
418
+ model.to(device).eval()
419
+ return model
420
+
421
+
422
+ # ────────────────────────────────────────────────────��────────────────────────
423
+ # PAGE CONFIG + CSS
424
+ # ─────────────────────────────────────────────────────────────────────────────
425
+
426
+ st.set_page_config(page_title="MyoSight β€” Myotube Analyser",
427
+ layout="wide", page_icon="πŸ”¬")
428
+
429
+ st.markdown("""
430
+ <style>
431
+ body, .stApp { background:#0e0e1a; color:#e0e0e0; }
432
+ .block-container { max-width:1200px; padding-top:1.25rem; }
433
+ h1,h2,h3,h4 { color:#90caf9; }
434
+ .flag-box {
435
+ background:#3e1a1a; border-left:4px solid #ef5350;
436
+ padding:10px 16px; border-radius:8px; margin:8px 0;
437
+ }
438
+ </style>
439
+ """, unsafe_allow_html=True)
440
+
441
+ st.title("πŸ”¬ MyoSight β€” Myotube & Nuclei Analyser")
442
+ device = "cuda" if torch.cuda.is_available() else "cpu"
443
+
444
+ # ─────────────────────────────────────────────────────────────────────────────
445
+ # SIDEBAR
446
+ # ─────────────────────────────────────────────────────────────────────────────
447
+ with st.sidebar:
448
+ st.caption(f"Device: **{device}**")
449
+
450
+ st.header("Input mapping")
451
+ src1 = st.selectbox("Model channel 1 (MyHC / myotubes)",
452
+ ["Red", "Green", "Blue", "Grayscale"], index=0)
453
+ inv1 = st.checkbox("Invert channel 1", value=False)
454
+ src2 = st.selectbox("Model channel 2 (DAPI / nuclei)",
455
+ ["Red", "Green", "Blue", "Grayscale"], index=2)
456
+ inv2 = st.checkbox("Invert channel 2", value=False)
457
+
458
+ st.header("Preprocessing")
459
+ image_size = st.select_slider("Model input size",
460
+ options=[256, 384, 512, 640, 768, 1024], value=512)
461
+
462
+ st.header("Thresholds")
463
+ thr_nuc = st.slider("Nuclei threshold", 0.05, 0.95, 0.50, 0.01)
464
+ thr_myo = st.slider("Myotube threshold", 0.05, 0.95, 0.50, 0.01)
465
+
466
+ st.header("Postprocessing")
467
+ min_nuc_area = st.number_input("Min nucleus area (px)", 0, 10000, 20, 1)
468
+ min_myo_area = st.number_input("Min myotube area (px)", 0, 200000, 500, 10)
469
+ myo_close_radius = st.number_input("Myotube close radius", 0, 50, 3, 1)
470
+
471
+ st.header("Watershed (nuclei splitting)")
472
+ nuc_ws_min_dist = st.number_input("Min watershed distance", 1, 30, 3, 1)
473
+ nuc_ws_min_area = st.number_input("Min watershed area (px)", 1, 500, 6, 1)
474
+
475
+ st.header("Overlay")
476
+ nuc_hex = st.color_picker("Nuclei colour", "#00FFFF")
477
+ myo_hex = st.color_picker("Myotube colour", "#FF0000")
478
+ alpha = st.slider("Overlay alpha", 0.0, 1.0, 0.45, 0.01)
479
+ nuc_rgb = hex_to_rgb(nuc_hex)
480
+ myo_rgb = hex_to_rgb(myo_hex)
481
+ label_nuc = st.checkbox("Show nucleus IDs on overlay", value=True)
482
+ label_myo = st.checkbox("Show myotube IDs on overlay", value=True)
483
+
484
+ st.header("Surface area")
485
+ px_um = st.number_input("Pixel size (Β΅m) β€” set for real Β΅mΒ²",
486
+ value=1.0, min_value=0.01, step=0.01)
487
+
488
+ st.header("Active learning")
489
+ enable_al = st.toggle("Enable correction upload", value=True)
490
+
491
+ st.header("Metric definitions")
492
+ with st.expander("Fusion Index"):
493
+ st.write("100 Γ— (nuclei in myotubes with β‰₯2 nuclei) / total nuclei")
494
+ with st.expander("MyHC-positive nucleus"):
495
+ st.write("Counted if β‰₯10% of nucleus pixels overlap a myotube.")
496
+ with st.expander("Surface area"):
497
+ st.write("Pixel count Γ— px_umΒ². Set pixel size for real Β΅mΒ² values.")
498
+
499
+
500
+ # ─────────────────────────────────────────────────────────────────────────────
501
+ # FILE UPLOADER
502
+ # ─────────────────────────────────────────────────────────────────────────────
503
+ uploads = st.file_uploader(
504
+ "Upload 1+ images (png / jpg / tif). Public Space β€” don't upload sensitive data.",
505
+ type=["png", "jpg", "jpeg", "tif", "tiff"],
506
+ accept_multiple_files=True,
507
+ )
508
+
509
+ for key in ("df", "artifacts", "zip_bytes", "bio_metrics"):
510
+ if key not in st.session_state:
511
+ st.session_state[key] = None
512
+
513
+ if not uploads:
514
+ st.info("πŸ‘† Upload one or more fluorescence images to get started.")
515
+ st.stop()
516
+
517
+ model = load_model(device=device)
518
+
519
+ # ─────────────────────────────────────────────────────────────────────────────
520
+ # RUN ANALYSIS
521
+ # ─────────────────���───────────────────────────────────────────────────────────
522
+ with st.form("run_form"):
523
+ run = st.form_submit_button("β–Ά Run / Rerun analysis", type="primary")
524
+
525
+ if run:
526
+ results = []
527
+ artifacts = {}
528
+ all_bio_metrics = {}
529
+ low_conf_flags = []
530
+ zip_buf = io.BytesIO()
531
+
532
+ with st.spinner("Analysing images…"):
533
+ with zipfile.ZipFile(zip_buf, "w", compression=zipfile.ZIP_DEFLATED) as zf:
534
+ prog = st.progress(0.0)
535
+
536
+ for i, up in enumerate(uploads):
537
+ name = Path(up.name).stem
538
+ rgb_u8 = np.array(
539
+ Image.open(io.BytesIO(up.getvalue())).convert("RGB"),
540
+ dtype=np.uint8
541
+ )
542
+
543
+ ch1 = get_channel(rgb_u8, src1)
544
+ ch2 = get_channel(rgb_u8, src2)
545
+ if inv1: ch1 = 255 - ch1
546
+ if inv2: ch2 = 255 - ch2
547
+
548
+ H = W = int(image_size)
549
+ x1 = resize_u8_to_float01(ch1, W, H, Image.BILINEAR)
550
+ x2 = resize_u8_to_float01(ch2, W, H, Image.BILINEAR)
551
+ x = np.stack([x1, x2], 0).astype(np.float32)
552
+
553
+ x_t = torch.from_numpy(x).unsqueeze(0).to(device)
554
+ with torch.no_grad():
555
+ probs = torch.sigmoid(model(x_t)).cpu().numpy()[0]
556
+
557
+ # Confidence check
558
+ conf = float(np.mean([probs[0].max(), probs[1].max()]))
559
+ if conf < CONF_FLAG_THR:
560
+ low_conf_flags.append((name, conf))
561
+ add_to_queue(rgb_u8, reason="low_confidence",
562
+ metadata={"confidence": conf, "filename": up.name})
563
+
564
+ nuc_raw = (probs[0] > float(thr_nuc)).astype(np.uint8)
565
+ myo_raw = (probs[1] > float(thr_myo)).astype(np.uint8)
566
+
567
+ nuc_pp, myo_pp = postprocess_masks(
568
+ nuc_raw, myo_raw,
569
+ min_nuc_area=int(min_nuc_area),
570
+ min_myo_area=int(min_myo_area),
571
+ myo_close_radius=int(myo_close_radius),
572
+ )
573
+
574
+ # Flat overlay for ZIP
575
+ simple_ov = make_simple_overlay(
576
+ rgb_u8, nuc_pp, myo_pp, nuc_rgb, myo_rgb, float(alpha)
577
+ )
578
+
579
+ # Instance overlay for display
580
+ nuc_lab = label_nuclei_watershed(nuc_pp,
581
+ min_distance=int(nuc_ws_min_dist),
582
+ min_nuc_area=int(nuc_ws_min_area))
583
+ myo_lab = label_cc(myo_pp)
584
+ inst_ov = make_instance_overlay(rgb_u8, nuc_lab, myo_lab,
585
+ alpha=float(alpha),
586
+ label_nuclei=label_nuc,
587
+ label_myotubes=label_myo)
588
+
589
+ bio = compute_bio_metrics(
590
+ nuc_pp, myo_pp,
591
+ nuc_ws_min_distance=int(nuc_ws_min_dist),
592
+ nuc_ws_min_area=int(nuc_ws_min_area),
593
+ px_um=float(px_um),
594
+ )
595
+ per_areas = bio.pop("_per_myotube_areas", [])
596
+ bio["image"] = name
597
+ results.append(bio)
598
+ all_bio_metrics[name] = {**bio, "_per_myotube_areas": per_areas}
599
+
600
+ artifacts[name] = {
601
+ "original" : png_bytes(rgb_u8),
602
+ "overlay" : png_bytes(inst_ov),
603
+ "nuc_pp" : png_bytes((nuc_pp * 255).astype(np.uint8)),
604
+ "myo_pp" : png_bytes((myo_pp * 255).astype(np.uint8)),
605
+ }
606
+
607
+ # ZIP contents
608
+ zf.writestr(f"{name}/overlay.png", png_bytes(simple_ov))
609
+ zf.writestr(f"{name}/instance_overlay.png", png_bytes(inst_ov))
610
+ zf.writestr(f"{name}/nuclei_pp.png", artifacts[name]["nuc_pp"])
611
+ zf.writestr(f"{name}/myotube_pp.png", artifacts[name]["myo_pp"])
612
+ zf.writestr(f"{name}/nuclei_raw.png", png_bytes((nuc_raw*255).astype(np.uint8)))
613
+ zf.writestr(f"{name}/myotube_raw.png", png_bytes((myo_raw*255).astype(np.uint8)))
614
+
615
+ prog.progress((i + 1) / len(uploads))
616
+
617
+ df = pd.DataFrame(results).sort_values("image")
618
+ zf.writestr("metrics.csv", df.to_csv(index=False).encode("utf-8"))
619
+
620
+ st.session_state.df = df
621
+ st.session_state.artifacts = artifacts
622
+ st.session_state.zip_bytes = zip_buf.getvalue()
623
+ st.session_state.bio_metrics = all_bio_metrics
624
+
625
+ if low_conf_flags:
626
+ names_str = ", ".join(f"{n} (conf={c:.2f})" for n, c in low_conf_flags)
627
+ st.markdown(
628
+ f"<div class='flag-box'>⚠️ <b>Low-confidence images auto-queued for retraining:</b> "
629
+ f"{names_str}</div>",
630
+ unsafe_allow_html=True,
631
+ )
632
+
633
+ if st.session_state.df is None:
634
+ st.info("Click **β–Ά Run / Rerun analysis** to generate results.")
635
+ st.stop()
636
+
637
+ # ─────────────────────────────────────────────────────────────────────────────
638
+ # RESULTS TABLE + DOWNLOADS
639
+ # ─────────────────────────────────────────────────────────────────────────────
640
+ st.subheader("πŸ“‹ Results")
641
+ display_cols = [c for c in st.session_state.df.columns if not c.startswith("_")]
642
+ st.dataframe(st.session_state.df[display_cols], use_container_width=True, height=320)
643
+
644
+ c1, c2 = st.columns(2)
645
+ with c1:
646
+ st.download_button("⬇️ Download metrics.csv",
647
+ st.session_state.df[display_cols].to_csv(index=False).encode(),
648
+ file_name="metrics.csv", mime="text/csv")
649
+ with c2:
650
+ st.download_button("⬇️ Download results.zip",
651
+ st.session_state.zip_bytes,
652
+ file_name="results.zip", mime="application/zip")
653
+
654
+ st.divider()
655
+
656
+ # ─────────────────────────────────────────────────────────────────────────────
657
+ # PER-IMAGE PREVIEW + ANIMATED METRICS
658
+ # ─────────────────────────────────────────────────────────────────────────────
659
+ st.subheader("πŸ–ΌοΈ Image preview & live metrics")
660
+ names = list(st.session_state.artifacts.keys())
661
+ pick = st.selectbox("Select image", names)
662
+
663
+ col_img, col_metrics = st.columns([3, 2], gap="large")
664
+
665
+ with col_img:
666
+ tabs = st.tabs(["Instance overlay", "Original", "Nuclei mask", "Myotube mask"])
667
+ art = st.session_state.artifacts[pick]
668
+ FIXED_W = 700
669
+ with tabs[0]: st.image(art["overlay"], width=FIXED_W)
670
+ with tabs[1]: st.image(art["original"], width=FIXED_W)
671
+ with tabs[2]: st.image(art["nuc_pp"], width=FIXED_W)
672
+ with tabs[3]: st.image(art["myo_pp"], width=FIXED_W)
673
+
674
+ with col_metrics:
675
+ st.markdown("#### πŸ“Š Live metrics")
676
+ bio = st.session_state.bio_metrics.get(pick, {})
677
+ per_areas = bio.get("_per_myotube_areas", [])
678
+
679
+ r1c1, r1c2, r1c3 = st.columns(3)
680
+ r2c1, r2c2, r2c3 = st.columns(3)
681
+ r3c1, r3c2, r3c3 = st.columns(3)
682
+
683
+ placeholders = {
684
+ "total_nuclei" : r1c1.empty(),
685
+ "myotube_count" : r1c2.empty(),
686
+ "myHC_positive_nuclei" : r1c3.empty(),
687
+ "myHC_positive_percentage": r2c1.empty(),
688
+ "fusion_index" : r2c2.empty(),
689
+ "avg_nuclei_per_myotube" : r2c3.empty(),
690
+ "total_area_um2" : r3c1.empty(),
691
+ "mean_area_um2" : r3c2.empty(),
692
+ "max_area_um2" : r3c3.empty(),
693
+ }
694
+
695
+ specs = [
696
+ ("total_nuclei", "Total nuclei", "#4fc3f7", False),
697
+ ("myotube_count", "Myotubes", "#ff8a65", False),
698
+ ("myHC_positive_nuclei", "MyHC⁺ nuclei", "#a5d6a7", False),
699
+ ("myHC_positive_percentage", "MyHC⁺ %", "#ce93d8", True),
700
+ ("fusion_index", "Fusion index %", "#80cbc4", True),
701
+ ("avg_nuclei_per_myotube", "Avg nuc/myotube", "#80deea", True),
702
+ ("total_area_um2", f"Total area (Β΅mΒ²)", "#fff176", True),
703
+ ("mean_area_um2", f"Mean area (Β΅mΒ²)", "#ffcc80", True),
704
+ ("max_area_um2", f"Max area (Β΅mΒ²)", "#ef9a9a", True),
705
+ ]
706
+
707
+ for key, label, color, is_float in specs:
708
+ val = bio.get(key, 0)
709
+ animated_metric(placeholders[key], label,
710
+ float(val) if is_float else int(val),
711
+ color=color)
712
+
713
+ if per_areas:
714
+ st.markdown("#### πŸ“ Per-myotube area")
715
+ area_df = pd.DataFrame({
716
+ "Myotube" : [f"M{i+1}" for i in range(len(per_areas))],
717
+ f"Area (Β΅mΒ²)" : per_areas,
718
+ }).set_index("Myotube")
719
+ st.bar_chart(area_df, height=220)
720
+
721
+ st.divider()
722
+
723
+ # ─────────────────────────────────────────────────────────────────────────────
724
+ # ACTIVE LEARNING β€” CORRECTION UPLOAD
725
+ # ─────────────────────────────────────────────────────────────────────────────
726
+ if enable_al:
727
+ st.subheader("🧠 Submit corrected labels (Active Learning)")
728
+ st.caption(
729
+ "Upload corrected binary masks for any image. "
730
+ "Corrections are saved to corrections/ and picked up "
731
+ "automatically by self_train.py at the next trigger check."
732
+ )
733
+
734
+ al_pick = st.selectbox("Correct masks for image", names, key="al_pick")
735
+ acol1, acol2 = st.columns(2)
736
+ with acol1:
737
+ corr_nuc = st.file_uploader("Corrected NUCLEI mask (PNG/TIF, binary 0/255)",
738
+ type=["png", "tif", "tiff"], key="nuc_corr")
739
+ with acol2:
740
+ corr_myo = st.file_uploader("Corrected MYOTUBE mask (PNG/TIF, binary 0/255)",
741
+ type=["png", "tif", "tiff"], key="myo_corr")
742
+
743
+ if st.button("βœ… Submit corrections", type="primary"):
744
+ if corr_nuc is None or corr_myo is None:
745
+ st.error("Please upload BOTH a nuclei mask and a myotube mask.")
746
+ else:
747
+ orig_bytes = st.session_state.artifacts[al_pick]["original"]
748
+ orig_rgb = np.array(Image.open(io.BytesIO(orig_bytes)).convert("RGB"))
749
+ nuc_arr = (np.array(Image.open(corr_nuc).convert("L")) > 0).astype(np.uint8)
750
+ myo_arr = (np.array(Image.open(corr_myo).convert("L")) > 0).astype(np.uint8)
751
+ add_to_queue(orig_rgb, nuc_mask=nuc_arr, myo_mask=myo_arr,
752
+ reason="user_correction",
753
+ metadata={"source_image": al_pick,
754
+ "timestamp": datetime.now().isoformat()})
755
+ st.success(
756
+ f"βœ… Corrections for **{al_pick}** saved to `corrections/`. "
757
+ "The model will retrain at the next scheduled cycle."
758
+ )
759
+
760
+ st.divider()
761
+
762
+ # ─────────────────────────────────────────────────────────────────────────────
763
+ # RETRAINING QUEUE STATUS
764
+ # ─────────────────────────────────────────────────────────────────────────────
765
+ with st.expander("πŸ”§ Self-training queue status"):
766
+ _ensure_dirs()
767
+ q_items = list(QUEUE_DIR.glob("*.json"))
768
+ c_items = list(CORRECTIONS_DIR.glob("*/meta.json"))
769
+
770
+ sq1, sq2 = st.columns(2)
771
+ sq1.metric("Images in retraining queue", len(q_items))
772
+ sq2.metric("Corrected label pairs", len(c_items))
773
+
774
+ if q_items:
775
+ reasons = {}
776
+ for p in q_items:
777
+ try:
778
+ r = json.loads(p.read_text()).get("reason", "unknown")
779
+ reasons[r] = reasons.get(r, 0) + 1
780
+ except Exception:
781
+ pass
782
+ st.write("Queue breakdown:", reasons)
783
+
784
+ manifest = Path("manifest.json")
785
+ if manifest.exists():
786
+ try:
787
+ history = json.loads(manifest.read_text())
788
+ if history:
789
+ st.markdown("**Last 5 retraining runs:**")
790
+ hist_df = pd.DataFrame(history[-5:])
791
+ st.dataframe(hist_df, use_container_width=True)
792
+ except Exception:
793
+ pass
794
 
795
+ if st.button("πŸ”„ Trigger retraining now"):
796
+ import subprocess
797
+ subprocess.Popen(["python", "self_train.py", "--manual"])
798
+ st.info("Retraining started in the background. Check terminal / logs for progress.")