"""Upload BLT-Reasoner pilot artifacts to a public HF repo. Token is read from stdin so it never appears in command-line arguments, process listings, or shell history on the box. Run as: cat token.txt | python3 -m experiments.blt_reasoner.scripts.hf_upload_pilot \ --repo LauraGG/blt-reasoner-pilot1 \ --pilot_dir /home/ubuntu/work/blt_pilot1 \ --code_dir /home/ubuntu/experiments/blt_reasoner Uploads (each in its own folder inside the repo): ckpts/ckpt-step{2000,4000,6000,8000,...} — all saved local ckpts code/ — full blt_reasoner source tree logs/run.log, logs/metrics.jsonl, logs/auto_eval.log, logs/interim_*.log ablations/*.json — interim ablation results README.md — auto-generated state summary """ from __future__ import annotations import argparse import json import os import shutil import sys from pathlib import Path def build_readme(pilot_dir: Path, code_dir: Path, repo: str) -> str: lines = [] lines.append(f"# BLT-Reasoner Pilot 1 — checkpoints + code\n") lines.append( "Compute-constrained latent reasoning pilot on Qwen2.5-1.5B-Instruct + GSM8K. " "Continuous M-step latent loop + strict y→only-z bottleneck + InfoNCE z↔y " "identifiability loss. See `code/README.md` for architecture details and " "`HANDOFF_DACOT_PROPOSAL_2026-05-16.md` (in the main repo) for full motivation.\n" ) # Inventory ckpts = sorted([p for p in (pilot_dir).glob("ckpt-step*") if p.is_dir()], key=lambda p: int(p.name.replace("ckpt-step", ""))) lines.append("## Checkpoints (LoRA adapter + projector + InfoNCE head)\n") lines.append("Each ckpt is ~25 MB — only the trained adapter/projector/head; " "the base Qwen2.5-1.5B-Instruct is loaded fresh from HF on resume.\n") lines.append("| step | K_train | files |") lines.append("|---|---|---|") for c in ckpts: s = int(c.name.replace("ckpt-step", "")) if s < 4000: k = 4 elif s < 8000: k = 8 else: k = 16 lines.append(f"| {s} | {k} | `ckpts/{c.name}/model/`, `projector.pt`, `head.pt` |") lines.append("") # Ablations abls = [] for c in ckpts: for f in c.glob("ablation_*.json"): abls.append((c.name, f)) if abls: lines.append("## Pre-registered z-ablation results\n") lines.append( "Pre-registered success criterion: `Δ_random ≥ 15 pp AND Δ_zero ≥ 25 pp` " "on GSM8K-test. Below are the interim results captured during training.\n" ) lines.append("| ckpt | K_eval | n | acc(normal) | acc(random) | acc(zero) | Δ_random | Δ_zero |") lines.append("|---|---|---|---|---|---|---|---|") for cname, fpath in sorted(abls): try: d = json.loads(Path(fpath).read_text()) r = d.get("results", {}) row = [ cname, str(d.get("K", "?")), str(d.get("n", "?")), f"{r.get('normal', {}).get('acc', float('nan')):.3f}", f"{r.get('random', {}).get('acc', float('nan')):.3f}", f"{r.get('zero', {}).get('acc', float('nan')):.3f}", f"{d.get('delta_normal_minus_random', float('nan')):+.3f}", f"{d.get('delta_normal_minus_zero', float('nan')):+.3f}", ] lines.append("| " + " | ".join(row) + " |") except Exception as e: lines.append(f"| {cname} | (parse error: {e}) |") lines.append("") # Resume instructions lines.append("## Resume training on a fresh instance\n") lines.append("```bash\n" "git clone # or pull the code/ subdir here\n" "pip install transformers peft bitsandbytes datasets safetensors huggingface_hub\n" "python3 -m experiments.blt_reasoner.train \\\n" f" --config experiments/blt_reasoner/configs/pilot_qwen15b_gsm8k.json \\\n" f" --resume_from {repo}:ckpts/ckpt-step6000\n" "```\n" "Notes:\n" "- The `--resume_from` flag (in `train.py`) accepts either a local ckpt path or " f"a `{repo}:ckpts/ckpt-stepN` HF-Hub reference.\n" "- **Optimizer state is not preserved** across resume. Expect a short loss spike " "(~100–300 steps) while Adam moments re-stabilize. The latent geometry (LoRA " "weights, projector, head) survives intact.\n" "- The base model `Qwen/Qwen2.5-1.5B-Instruct` is fetched automatically.\n" ) lines.append("## Logs and intermediate artifacts\n" "- `logs/run.log` — full training log\n" "- `logs/metrics.jsonl` — per-step loss/metric breakdown\n" "- `logs/auto_eval.log` — poller daemon log (auto-eval on train exit)\n" "- `logs/interim_*.log` — interim ablation logs\n" "- `code/` — full `experiments/blt_reasoner/` source tree at upload time\n") return "\n".join(lines) def main(): parser = argparse.ArgumentParser() parser.add_argument("--repo", required=True, help="e.g., LauraGG/blt-reasoner-pilot1") parser.add_argument("--pilot_dir", required=True, help="e.g., /home/ubuntu/work/blt_pilot1") parser.add_argument("--code_dir", required=True, help="e.g., /home/ubuntu/experiments/blt_reasoner") parser.add_argument("--private", action="store_true") args = parser.parse_args() token = sys.stdin.read().strip() if not token.startswith("hf_"): print("[upload] stdin did not contain an hf_ token; aborting", file=sys.stderr) sys.exit(2) from huggingface_hub import HfApi api = HfApi(token=token) print(f"[upload] creating repo {args.repo} (private={args.private})", flush=True) api.create_repo(repo_id=args.repo, repo_type="model", private=args.private, exist_ok=True) pilot = Path(args.pilot_dir) code = Path(args.code_dir) # Stage layout in a tmp dir, then upload as a single folder commit. stage = Path("/tmp/blt_upload_stage") if stage.exists(): shutil.rmtree(stage) stage.mkdir(parents=True) # Ckpts (stage / "ckpts").mkdir() for c in sorted(pilot.glob("ckpt-step*")): if c.is_dir(): shutil.copytree(c, stage / "ckpts" / c.name) print(f"[upload] staged {c.name}", flush=True) # Code if code.exists(): shutil.copytree(code, stage / "code", ignore=shutil.ignore_patterns("__pycache__", "*.pyc")) print(f"[upload] staged code dir", flush=True) # Logs (stage / "logs").mkdir() for name in ("run.log", "metrics.jsonl", "auto_eval.log", "interim_ablation.log", "interim_ablation_K4.log", "interim_ablation_K8.log", "interim_ablation_K16_step8000.log", "run_attempt1_oom.log", "run_attempt2.log"): p = pilot / name if p.exists(): shutil.copy(p, stage / "logs" / name) print(f"[upload] staged log {name}", flush=True) # Also stash all ablation_*.json under ablations/ at the top of the staged tree, # alongside their per-ckpt copies (which are in ckpts/ckpt-stepN/ already). (stage / "ablations").mkdir() for c in sorted(pilot.glob("ckpt-step*")): for f in c.glob("ablation_*.json"): shutil.copy(f, stage / "ablations" / f"{c.name}__{f.name}") # README readme = build_readme(pilot, code, args.repo) (stage / "README.md").write_text(readme) print(f"[upload] staged README.md ({len(readme)} chars)", flush=True) # Final size total_bytes = sum(p.stat().st_size for p in stage.rglob("*") if p.is_file()) print(f"[upload] total staged size = {total_bytes/1e6:.1f} MB", flush=True) print(f"[upload] pushing to {args.repo} ...", flush=True) api.upload_folder( folder_path=str(stage), repo_id=args.repo, repo_type="model", commit_message="BLT-Reasoner pilot 1: ckpts + code + logs + ablations", ) print(f"[upload] DONE — https://huggingface.co/{args.repo}", flush=True) shutil.rmtree(stage) if __name__ == "__main__": main()