| """ |
| utils.run_dir — per-experiment directory contract. |
| |
| Every training invocation creates its own directory under |
| experiments/runs/<run_id>/ with the following layout. This is the |
| single source of truth for that contract. |
| |
| experiments/runs/<YYYYMMDD-HHMMSS>_<condition>_n<N>_s<seed>/ |
| config.json # frozen config used for this run |
| run.pid # PID of the detached process (written by launcher) |
| logs/ |
| train.log |
| train.err |
| results/ |
| history.json # incremental per-checkpoint metrics |
| summary.json # final test acc, grokking epoch, etc. |
| checkpoints/ |
| final.pt |
| figures/ |
| training_curves.png |
| """ |
|
|
| from __future__ import annotations |
|
|
| import json |
| import os |
| from datetime import datetime, timezone |
| from typing import Iterable |
|
|
| DEFAULT_BASE = os.path.join("experiments", "runs") |
| SUBDIRS = ("logs", "results", "checkpoints", "figures") |
|
|
|
|
| def make_run_dir(run_id_parts: Iterable[str], base: str = DEFAULT_BASE): |
| """ |
| Create experiments/runs/<stamp>_<...parts>/ and all standard subdirs. |
| Returns (run_dir, run_id). |
| |
| Example: |
| make_run_dir(["grokking", "n500", "s42"]) |
| → ("experiments/runs/20260430-141500_grokking_n500_s42", |
| "20260430-141500_grokking_n500_s42") |
| """ |
| stamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S") |
| run_id = "_".join([stamp, *run_id_parts]) |
| run_dir = os.path.join(base, run_id) |
| ensure_run_dir(run_dir) |
| return run_dir, run_id |
|
|
|
|
| def ensure_run_dir(run_dir: str): |
| for sub in SUBDIRS: |
| os.makedirs(os.path.join(run_dir, sub), exist_ok=True) |
|
|
|
|
| def save_config(cfg: dict, run_dir: str): |
| with open(os.path.join(run_dir, "config.json"), "w") as f: |
| json.dump(cfg, f, indent=2) |
|
|