Spaces:
Paused
Paused
| """One-shot script to pull Run 6 artifacts from HF Hub. | |
| Written as a file (not python -c) so shells don't choke on the newlines. | |
| Run 6 focus: break Run 5's degenerate R2-only policy via forced-outcome | |
| task variants + unlikeliness reward shaping (He et al. 2506.02355) + | |
| R-level balance bonus + curriculum with 50%β70% forced variants. | |
| After training completes on the Space, run: | |
| python tools/fetch_run6.py | |
| to pull every artifact locally for post-mortem. The destination folder is | |
| `training_runs/run_6_forced_variants/` (gitignored). | |
| Key files to inspect after fetch: | |
| grpo/training_log.json β per-episode predicted vs actual R-level | |
| grpo/metrics.json β mean reward, catastrophe count | |
| eval/results.json β scripted vs sft_only vs grpo_trained | |
| grpo/_trainer/trainer_state.json β TRL's internal metrics, look at | |
| frac_reward_zero_std (target <40%) | |
| Theory predictions (from config.yaml notes): | |
| * frac_reward_zero_std drops from 70% β <40% | |
| * confusion matrix has non-zero entries in all 5 R rows | |
| * mean reward 0.60-0.75 (lower than Run 5's 0.664) | |
| * eval accuracy 75-85% across R-levels (vs 100% R2-only) | |
| * task_log_cleanup solved (was unsolved in Run 5) | |
| """ | |
| from __future__ import annotations | |
| import os | |
| import shutil | |
| import subprocess | |
| from huggingface_hub import snapshot_download | |
| TARGET_DIR = "training_runs/run_6_forced_variants" | |
| def main() -> None: | |
| if os.path.exists(TARGET_DIR): | |
| shutil.rmtree(TARGET_DIR) | |
| token = subprocess.check_output(["hf", "auth", "token"], text=True).strip() | |
| path = snapshot_download( | |
| repo_id="chane335/permanence-artifacts", | |
| repo_type="dataset", | |
| local_dir=TARGET_DIR, | |
| token=token, | |
| ) | |
| total = 0 | |
| for root, _dirs, files in os.walk(path): | |
| for f in files: | |
| rel = os.path.relpath(os.path.join(root, f), path) | |
| if ".cache" in rel: | |
| continue | |
| size = os.path.getsize(os.path.join(root, f)) | |
| total += size | |
| print(f" {size:>12,} bytes {rel}") | |
| print(f"TOTAL: {total/1e6:.1f} MB") | |
| print(f"\nNext: python -c \"import json; " | |
| f"print(json.load(open('{TARGET_DIR}/grpo/metrics.json')))\"") | |
| if __name__ == "__main__": | |
| main() | |