GenSeg-Baselines / code /scripts /gen_hires_768_recap_manifest.py
MaybeRichard's picture
Upload folder using huggingface_hub
057ec4b verified
Raw
History Blame Contribute Delete
2.14 kB
"""Rebuild a queue manifest for ONLY the 768-res datasets (fives/refuge2/idridd),
injecting CPU thread caps to fix the dataloader thread-oversubscription that starved
the GPU (epoch ~750s -> ~180s). Reuses the original hires manifest's jobs verbatim
(img_size 768, batch 4, num_workers 8, PCI_BUS_ID) and only:
(1) keeps the 3 R=768 datasets' jobs,
(2) prepends OMP/MKL/OPENBLAS/NUMEXPR/VECLIB_NUM_THREADS=8 to each export line.
The already-finished cvc/kvasir/busi (512/384) cells are untouched.
Run ON a100: TS=$(date -u +%Y%m%dT%H%M%SZ) python3 scripts/gen_hires_768_recap_manifest.py
"""
import json, os, glob
SRC = os.path.expanduser("~/.aris_queue/runs/hires_20260610T021920Z/manifest.json")
DS768_PREFIXES = ("hr_fives_", "hr_refuge2_", "hr_idridd_segmentation_")
CAP = ("OMP_NUM_THREADS=8 MKL_NUM_THREADS=8 OPENBLAS_NUM_THREADS=8 "
"NUMEXPR_NUM_THREADS=8 VECLIB_MAXIMUM_THREADS=8")
OLD = "export CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES=${GPU}"
NEW = "export CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES=${GPU} " + CAP
man = json.load(open(SRC))
top = {k: v for k, v in man.items() if k != "phases"}
src_jobs = man.get("jobs") or man["phases"][0]["jobs"]
jobs = []
for j in src_jobs:
if not j["id"].startswith(DS768_PREFIXES):
continue
# skip any that somehow already finished
if os.path.isfile(os.path.join(top.get("cwd", "."), j["expected_output"])):
continue
c = j["cmd"]
if OLD not in c:
raise SystemExit("export anchor not found in: " + j["id"])
j2 = dict(j)
j2["id"] = "rc_" + j["id"] # rc_ = recapped
j2["cmd"] = c.replace(OLD, NEW)
jobs.append(j2)
if not jobs:
raise SystemExit("no 768 jobs to (re)run")
man2 = dict(top)
man2["project"] = "baselines_hires_768_recap"
man2["phases"] = [{"name": "rc768", "depends_on": [], "jobs": jobs}]
RUN = "hires768_" + os.environ["TS"]
rd = os.path.expanduser("~/.aris_queue/runs/" + RUN)
os.makedirs(rd + "/logs", exist_ok=True)
json.dump(man2, open(rd + "/manifest.json", "w"), indent=2)
print("RUN=" + RUN, "jobs=" + str(len(jobs)))
print("sample cmd:", jobs[0]["cmd"][:160])