| """Rebuild the 768 recap manifest at R=512 for fives/refuge2/idridd (user chose to |
| drop 768->512: still high-res & resolution-fair, 300ep/3seed protocol intact, but |
| ~2.3x less compute -> ~6-8 days instead of ~12-18). Reuses the capped 768 manifest's |
| 36 jobs (thread caps already injected; cv2.setNumThreads(1) is in the code) and only: |
| --img_size 768 -> 512, --batch_size 4 -> 8 (batch 8 matches finished kvasir/busi@512). |
| expected_output paths unchanged (metrics.json gets written at whatever res trained). |
| Run ON a100: TS=$(date -u +%Y%m%dT%H%M%SZ) python3 scripts/gen_hires_512_manifest.py |
| """ |
| import json, os |
|
|
| SRC = os.path.expanduser("~/.aris_queue/runs/hires768_20260611T004855Z/manifest.json") |
|
|
| man = json.load(open(SRC)) |
| top = {k: v for k, v in man.items() if k != "phases"} |
| src_jobs = man.get("jobs") or man["phases"][0]["jobs"] |
|
|
| jobs = [] |
| for j in src_jobs: |
| c = j["cmd"] |
| if "--img_size 768" not in c or "--batch_size 4" not in c: |
| raise SystemExit("anchor not found in " + j["id"]) |
| j2 = dict(j) |
| j2["id"] = j["id"].replace("rc_hr_", "r512_") |
| j2["cmd"] = c.replace("--img_size 768", "--img_size 512").replace("--batch_size 4", "--batch_size 8") |
| jobs.append(j2) |
|
|
| if not jobs: |
| raise SystemExit("no jobs") |
|
|
| man2 = dict(top) |
| man2["project"] = "baselines_hires_512" |
| |
| |
| man2["gpu_free_threshold_mib"] = 30000 |
| man2["phases"] = [{"name": "r512", "depends_on": [], "jobs": jobs}] |
|
|
| RUN = "hires512_" + os.environ["TS"] |
| rd = os.path.expanduser("~/.aris_queue/runs/" + RUN) |
| os.makedirs(rd + "/logs", exist_ok=True) |
| json.dump(man2, open(rd + "/manifest.json", "w"), indent=2) |
| print("RUN=" + RUN, "jobs=" + str(len(jobs))) |
| print("sample:", jobs[0]["cmd"][:200]) |
|
|