GenSeg-Baselines / code /scripts /gen_pannuke_fw_manifest.py
MaybeRichard's picture
Upload folder using huggingface_hub
057ec4b verified
Raw
History Blame Contribute Delete
2.1 kB
"""Generate the framework PanNuke fold02/fold03 manifest (12 jobs = 6 archs x 2
folds, seed0) by reusing the EXACT fold01 commands (preserves per-arch encoder /
pretrained ckpt / epochs), swapping only the protocol. Writes to a run dir."""
import json, glob, re, os
base = {}
for p in sorted(glob.glob(os.path.expanduser("~/.aris_queue/runs/20260605T13*/manifest.json"))):
d = json.load(open(p)); js = d.get("jobs") or d.get("phases", [{}])[0].get("jobs", [])
for j in js:
c = j.get("cmd", "")
if "pannuke" in c and "--protocol fold01" in c and "--seed 0" in c:
m = re.search(r"--arch (\S+)", c)
if m:
base.setdefault(m.group(1), c)
archs = ["unet", "unetpp", "deeplabv3plus", "attention_unet", "transunet", "swinunet"]
assert all(a in base for a in archs), "missing base cmd: %s" % [a for a in archs if a not in base]
jobs = []
for proto in ["fold02", "fold03"]:
for a in archs:
c = base[a].replace("--protocol fold01", "--protocol " + proto)
# CRITICAL: without PCI_BUS_ID, CVD=4/5 map to L40s (44GB, other users) not
# the A100s -> ECC/OOM. Force PCI bus order so 4/5 == nvidia-smi A100 #4/#5.
c = c.replace("export CUDA_VISIBLE_DEVICES=${GPU}",
"export CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES=${GPU}")
out = "results/baselines/pannuke_semantic_%s/%s/seed0/metrics.json" % (proto, a)
jobs.append({"id": "fw_%s_%s" % (proto, a), "cmd": c, "expected_output": out})
manifest = {"project": "pannuke_fw_cv", "cwd": "/home/wzhang/LSC/Code/NPJ", "conda": "seggen",
"ssh": "a100", "gpus": [4, 5], "jobs_per_gpu": 3, "max_parallel": 6,
"gpu_free_threshold_mib": 60000, "oom_retry": {"delay": 180, "max_attempts": 3},
"phases": [{"name": "fw", "depends_on": [], "jobs": jobs}]}
RUN = "pannuke_fw_" + os.environ["TS"]
rd = os.path.expanduser("~/.aris_queue/runs/" + RUN)
os.makedirs(rd + "/logs", exist_ok=True)
json.dump(manifest, open(rd + "/manifest.json", "w"), indent=2)
print("RUN=" + RUN, "jobs=" + str(len(jobs)))