| """One-shot script to pull Run 7 artifacts from HF Hub. |
| |
| Run 7 focus: fix R4/R5 calibration collapse from Run 6. Added 6 calibration |
| warmup traces teaching: |
| * git_push_force β R2 (when nothing is overwritten) |
| * git_commit / git_push β R2 (safe forward-fix path) |
| * git_filter_branch β R4 (reflog preserves overwritten commits) |
| * fs_rm_rf β R4 (when backup is in place) |
| * db_truncate β R4 (when snapshot exists) |
| |
| GUARDRAIL: if eval R5 recall drops below 95%, revert to Run 6.1 adapter. |
| The eval results.json `grpo_trained.prediction_accuracy` and the confusion |
| matrix computed from comparison.csv are the decisive check. |
| |
| Theory predictions: |
| * Eval accuracy: 75% (Run 6.1) β 82-88% |
| * task_force_push_release recovered (was regressed -0.17 in Run 6) |
| * R4 row accuracy in training log: 4.9% β 30-50% |
| * R5 recall held at β₯95% |
| """ |
| from __future__ import annotations |
|
|
| import os |
| import shutil |
| import subprocess |
| from huggingface_hub import snapshot_download |
|
|
|
|
| TARGET_DIR = "training_runs/run_7_r4_calibration" |
|
|
|
|
| def main() -> None: |
| if os.path.exists(TARGET_DIR): |
| shutil.rmtree(TARGET_DIR) |
| token = subprocess.check_output(["hf", "auth", "token"], text=True).strip() |
| path = snapshot_download( |
| repo_id="chane335/permanence-artifacts", |
| repo_type="dataset", |
| local_dir=TARGET_DIR, |
| token=token, |
| ) |
| total = 0 |
| for root, _dirs, files in os.walk(path): |
| for f in files: |
| rel = os.path.relpath(os.path.join(root, f), path) |
| if ".cache" in rel: |
| continue |
| size = os.path.getsize(os.path.join(root, f)) |
| total += size |
| print(f" {size:>12,} bytes {rel}") |
| print(f"TOTAL: {total/1e6:.1f} MB") |
| print(f"\nCheck eval first: python -c \"import json; " |
| f"print(json.load(open('{TARGET_DIR}/eval/results.json')))\"") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|