code: complete eval pipeline (7 metrics + per-class + Wilcoxon) + Swin-UNet/TransUNet networks; remove backups/obsolete
1a18f22 verified | """Fill SwinUNet/TransUNet to FULL 3-seed @512 on a100 (their per-seed best.pth live here). | |
| For every results/baselines/<cell>/{swinunet,transunet}/seed<s>/best.pth, copy it into the | |
| unified512 tree and run eval_at_res.py --eval_size 512 --exp_name unified512. GPU 4/5 only | |
| (A100 80G; PCI_BUS_ID). 64 evals. Then metrics.json get transferred to h800 + re-aggregated. | |
| """ | |
| import os, glob, shutil, subprocess, time | |
| CODE = "/home/wzhang/LSC/Code/NPJ" | |
| DATA = "/home/wzhang/LSC/Dataset/Segmentation/processed_unified" | |
| PY = "/opt/anaconda3/envs/seggen/bin/python" | |
| BASE = CODE + "/results/baselines" # source per-seed weights | |
| UNI = CODE + "/results/unified512" # eval_at_res writes here (out_root=results rel to CODE) | |
| LOGD = "/tmp/sw_tr_3seed_logs"; os.makedirs(LOGD, exist_ok=True) | |
| SLOTS = [4, 4, 4, 5, 5, 5] # GPU 4/5, 3 co-located evals each | |
| jobs = [] | |
| for arch in ("swinunet", "transunet"): | |
| for w in sorted(glob.glob(f"{BASE}/*/{arch}/seed*/best.pth")): | |
| parts = w.split("/") | |
| cell, seed = parts[-4], parts[-2] # <cell>, seedN | |
| sd = int(seed.replace("seed", "")) | |
| # parse cell -> dataset, protocol | |
| ds, proto = None, None | |
| for p in ("official", "holdout", "fold01", "fold02", "fold03"): | |
| if cell.endswith("_" + p): | |
| ds, proto = cell[:-(len(p) + 1)], p; break | |
| out = f"{UNI}/{cell}/{arch}/{seed}" | |
| jobs.append({"ds": ds, "proto": proto, "arch": arch, "seed": sd, "w": w, | |
| "out": out, "mj": out + "/metrics.json", "tag": f"{cell}_{arch}_s{sd}"}) | |
| pending = [j for j in jobs if not os.path.isfile(j["mj"])] | |
| print(f"[3seed] total={len(jobs)} done={len(jobs)-len(pending)} pending={len(pending)}", flush=True) | |
| def make_cmd(j, gpu): | |
| enc = "R50-ViT-B_16" if j["arch"] == "transunet" else "resnet50" | |
| os.makedirs(j["out"], exist_ok=True) | |
| shutil.copy(j["w"], j["out"] + "/best.pth") | |
| return (f"export CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES={gpu} " | |
| f"OMP_NUM_THREADS=8 MKL_NUM_THREADS=8 OPENBLAS_NUM_THREADS=8 && cd {CODE} && " | |
| f"{PY} framework/eval_at_res.py --data_root {DATA} --dataset {j['ds']} " | |
| f"--protocol {j['proto']} --arch {j['arch']} --seed {j['seed']} --eval_size 512 " | |
| f"--exp_name unified512 --encoder {enc}") | |
| running = {}; free = list(SLOTS); i = 0; ok = fail = 0 | |
| while i < len(pending) or running: | |
| while free and i < len(pending): | |
| gpu = free.pop(0); j = pending[i]; i += 1 | |
| lf = open(f"{LOGD}/{j['tag']}.log", "w") | |
| p = subprocess.Popen(["bash", "-lc", make_cmd(j, gpu)], stdout=lf, stderr=subprocess.STDOUT) | |
| running[id(p)] = (p, j, lf, gpu); print(f"[launch] gpu{gpu} {j['tag']}", flush=True) | |
| time.sleep(6) | |
| for k, (p, j, lf, gpu) in list(running.items()): | |
| if p.poll() is not None: | |
| lf.close(); okj = os.path.isfile(j["mj"]); ok += okj; fail += (not okj) | |
| print(f"[finish] gpu{gpu} {j['tag']} rc={p.returncode} ok={okj}", flush=True) | |
| del running[k]; free.append(gpu) | |
| print(f"[3seed] ALL DONE ok={ok} fail={fail}", flush=True) | |
| print("SWTR_3SEED_DONE", flush=True) | |