File size: 1,821 Bytes
9d2fc01 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | """
utils.run_dir — per-experiment directory contract.
Every training invocation creates its own directory under
experiments/runs/<run_id>/ with the following layout. This is the
single source of truth for that contract.
experiments/runs/<YYYYMMDD-HHMMSS>_<condition>_n<N>_s<seed>/
config.json # frozen config used for this run
run.pid # PID of the detached process (written by launcher)
logs/
train.log
train.err
results/
history.json # incremental per-checkpoint metrics
summary.json # final test acc, grokking epoch, etc.
checkpoints/
final.pt
figures/
training_curves.png
"""
from __future__ import annotations
import json
import os
from datetime import datetime, timezone
from typing import Iterable
DEFAULT_BASE = os.path.join("experiments", "runs")
SUBDIRS = ("logs", "results", "checkpoints", "figures")
def make_run_dir(run_id_parts: Iterable[str], base: str = DEFAULT_BASE):
"""
Create experiments/runs/<stamp>_<...parts>/ and all standard subdirs.
Returns (run_dir, run_id).
Example:
make_run_dir(["grokking", "n500", "s42"])
→ ("experiments/runs/20260430-141500_grokking_n500_s42",
"20260430-141500_grokking_n500_s42")
"""
stamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
run_id = "_".join([stamp, *run_id_parts])
run_dir = os.path.join(base, run_id)
ensure_run_dir(run_dir)
return run_dir, run_id
def ensure_run_dir(run_dir: str):
for sub in SUBDIRS:
os.makedirs(os.path.join(run_dir, sub), exist_ok=True)
def save_config(cfg: dict, run_dir: str):
with open(os.path.join(run_dir, "config.json"), "w") as f:
json.dump(cfg, f, indent=2)
|