"""Rebuild the 768 recap manifest at R=512 for fives/refuge2/idridd (user chose to drop 768->512: still high-res & resolution-fair, 300ep/3seed protocol intact, but ~2.3x less compute -> ~6-8 days instead of ~12-18). Reuses the capped 768 manifest's 36 jobs (thread caps already injected; cv2.setNumThreads(1) is in the code) and only: --img_size 768 -> 512, --batch_size 4 -> 8 (batch 8 matches finished kvasir/busi@512). expected_output paths unchanged (metrics.json gets written at whatever res trained). Run ON a100: TS=$(date -u +%Y%m%dT%H%M%SZ) python3 scripts/gen_hires_512_manifest.py """ import json, os SRC = os.path.expanduser("~/.aris_queue/runs/hires768_20260611T004855Z/manifest.json") man = json.load(open(SRC)) top = {k: v for k, v in man.items() if k != "phases"} src_jobs = man.get("jobs") or man["phases"][0]["jobs"] jobs = [] for j in src_jobs: c = j["cmd"] if "--img_size 768" not in c or "--batch_size 4" not in c: raise SystemExit("anchor not found in " + j["id"]) j2 = dict(j) j2["id"] = j["id"].replace("rc_hr_", "r512_") j2["cmd"] = c.replace("--img_size 768", "--img_size 512").replace("--batch_size 4", "--batch_size 8") jobs.append(j2) if not jobs: raise SystemExit("no jobs") man2 = dict(top) man2["project"] = "baselines_hires_512" # GPU5 hosts a non-campaign DDIM job (~43GB); lower the free-mem gate so the queue can # still place light 512 jobs (~16GB) in GPU5's spare room instead of pinning to GPU4. man2["gpu_free_threshold_mib"] = 30000 man2["phases"] = [{"name": "r512", "depends_on": [], "jobs": jobs}] RUN = "hires512_" + os.environ["TS"] rd = os.path.expanduser("~/.aris_queue/runs/" + RUN) os.makedirs(rd + "/logs", exist_ok=True) json.dump(man2, open(rd + "/manifest.json", "w"), indent=2) print("RUN=" + RUN, "jobs=" + str(len(jobs))) print("sample:", jobs[0]["cmd"][:200])