"""One-shot script to pull Run 7 artifacts from HF Hub. Run 7 focus: fix R4/R5 calibration collapse from Run 6. Added 6 calibration warmup traces teaching: * git_push_force → R2 (when nothing is overwritten) * git_commit / git_push → R2 (safe forward-fix path) * git_filter_branch → R4 (reflog preserves overwritten commits) * fs_rm_rf → R4 (when backup is in place) * db_truncate → R4 (when snapshot exists) GUARDRAIL: if eval R5 recall drops below 95%, revert to Run 6.1 adapter. The eval results.json `grpo_trained.prediction_accuracy` and the confusion matrix computed from comparison.csv are the decisive check. Theory predictions: * Eval accuracy: 75% (Run 6.1) → 82-88% * task_force_push_release recovered (was regressed -0.17 in Run 6) * R4 row accuracy in training log: 4.9% → 30-50% * R5 recall held at ≥95% """ from __future__ import annotations import os import shutil import subprocess from huggingface_hub import snapshot_download TARGET_DIR = "training_runs/run_7_r4_calibration" def main() -> None: if os.path.exists(TARGET_DIR): shutil.rmtree(TARGET_DIR) token = subprocess.check_output(["hf", "auth", "token"], text=True).strip() path = snapshot_download( repo_id="chane335/permanence-artifacts", repo_type="dataset", local_dir=TARGET_DIR, token=token, ) total = 0 for root, _dirs, files in os.walk(path): for f in files: rel = os.path.relpath(os.path.join(root, f), path) if ".cache" in rel: continue size = os.path.getsize(os.path.join(root, f)) total += size print(f" {size:>12,} bytes {rel}") print(f"TOTAL: {total/1e6:.1f} MB") print(f"\nCheck eval first: python -c \"import json; " f"print(json.load(open('{TARGET_DIR}/eval/results.json')))\"") if __name__ == "__main__": main()