| """Upload BLT-Reasoner pilot artifacts to a public HF repo. |
| |
| Token is read from stdin so it never appears in command-line arguments, |
| process listings, or shell history on the box. Run as: |
| |
| cat token.txt | python3 -m experiments.blt_reasoner.scripts.hf_upload_pilot \ |
| --repo LauraGG/blt-reasoner-pilot1 \ |
| --pilot_dir /home/ubuntu/work/blt_pilot1 \ |
| --code_dir /home/ubuntu/experiments/blt_reasoner |
| |
| Uploads (each in its own folder inside the repo): |
| ckpts/ckpt-step{2000,4000,6000,8000,...} — all saved local ckpts |
| code/ — full blt_reasoner source tree |
| logs/run.log, logs/metrics.jsonl, logs/auto_eval.log, logs/interim_*.log |
| ablations/*.json — interim ablation results |
| README.md — auto-generated state summary |
| """ |
| from __future__ import annotations |
|
|
| import argparse |
| import json |
| import os |
| import shutil |
| import sys |
| from pathlib import Path |
|
|
|
|
| def build_readme(pilot_dir: Path, code_dir: Path, repo: str) -> str: |
| lines = [] |
| lines.append(f"# BLT-Reasoner Pilot 1 — checkpoints + code\n") |
| lines.append( |
| "Compute-constrained latent reasoning pilot on Qwen2.5-1.5B-Instruct + GSM8K. " |
| "Continuous M-step latent loop + strict y→only-z bottleneck + InfoNCE z↔y " |
| "identifiability loss. See `code/README.md` for architecture details and " |
| "`HANDOFF_DACOT_PROPOSAL_2026-05-16.md` (in the main repo) for full motivation.\n" |
| ) |
| |
| ckpts = sorted([p for p in (pilot_dir).glob("ckpt-step*") if p.is_dir()], |
| key=lambda p: int(p.name.replace("ckpt-step", ""))) |
| lines.append("## Checkpoints (LoRA adapter + projector + InfoNCE head)\n") |
| lines.append("Each ckpt is ~25 MB — only the trained adapter/projector/head; " |
| "the base Qwen2.5-1.5B-Instruct is loaded fresh from HF on resume.\n") |
| lines.append("| step | K_train | files |") |
| lines.append("|---|---|---|") |
| for c in ckpts: |
| s = int(c.name.replace("ckpt-step", "")) |
| if s < 4000: k = 4 |
| elif s < 8000: k = 8 |
| else: k = 16 |
| lines.append(f"| {s} | {k} | `ckpts/{c.name}/model/`, `projector.pt`, `head.pt` |") |
| lines.append("") |
| |
| abls = [] |
| for c in ckpts: |
| for f in c.glob("ablation_*.json"): |
| abls.append((c.name, f)) |
| if abls: |
| lines.append("## Pre-registered z-ablation results\n") |
| lines.append( |
| "Pre-registered success criterion: `Δ_random ≥ 15 pp AND Δ_zero ≥ 25 pp` " |
| "on GSM8K-test. Below are the interim results captured during training.\n" |
| ) |
| lines.append("| ckpt | K_eval | n | acc(normal) | acc(random) | acc(zero) | Δ_random | Δ_zero |") |
| lines.append("|---|---|---|---|---|---|---|---|") |
| for cname, fpath in sorted(abls): |
| try: |
| d = json.loads(Path(fpath).read_text()) |
| r = d.get("results", {}) |
| row = [ |
| cname, |
| str(d.get("K", "?")), |
| str(d.get("n", "?")), |
| f"{r.get('normal', {}).get('acc', float('nan')):.3f}", |
| f"{r.get('random', {}).get('acc', float('nan')):.3f}", |
| f"{r.get('zero', {}).get('acc', float('nan')):.3f}", |
| f"{d.get('delta_normal_minus_random', float('nan')):+.3f}", |
| f"{d.get('delta_normal_minus_zero', float('nan')):+.3f}", |
| ] |
| lines.append("| " + " | ".join(row) + " |") |
| except Exception as e: |
| lines.append(f"| {cname} | (parse error: {e}) |") |
| lines.append("") |
| |
| lines.append("## Resume training on a fresh instance\n") |
| lines.append("```bash\n" |
| "git clone <main-repo-with-experiments/blt_reasoner> # or pull the code/ subdir here\n" |
| "pip install transformers peft bitsandbytes datasets safetensors huggingface_hub\n" |
| "python3 -m experiments.blt_reasoner.train \\\n" |
| f" --config experiments/blt_reasoner/configs/pilot_qwen15b_gsm8k.json \\\n" |
| f" --resume_from {repo}:ckpts/ckpt-step6000\n" |
| "```\n" |
| "Notes:\n" |
| "- The `--resume_from` flag (in `train.py`) accepts either a local ckpt path or " |
| f"a `{repo}:ckpts/ckpt-stepN` HF-Hub reference.\n" |
| "- **Optimizer state is not preserved** across resume. Expect a short loss spike " |
| "(~100–300 steps) while Adam moments re-stabilize. The latent geometry (LoRA " |
| "weights, projector, head) survives intact.\n" |
| "- The base model `Qwen/Qwen2.5-1.5B-Instruct` is fetched automatically.\n" |
| ) |
| lines.append("## Logs and intermediate artifacts\n" |
| "- `logs/run.log` — full training log\n" |
| "- `logs/metrics.jsonl` — per-step loss/metric breakdown\n" |
| "- `logs/auto_eval.log` — poller daemon log (auto-eval on train exit)\n" |
| "- `logs/interim_*.log` — interim ablation logs\n" |
| "- `code/` — full `experiments/blt_reasoner/` source tree at upload time\n") |
| return "\n".join(lines) |
|
|
|
|
| def main(): |
| parser = argparse.ArgumentParser() |
| parser.add_argument("--repo", required=True, help="e.g., LauraGG/blt-reasoner-pilot1") |
| parser.add_argument("--pilot_dir", required=True, help="e.g., /home/ubuntu/work/blt_pilot1") |
| parser.add_argument("--code_dir", required=True, help="e.g., /home/ubuntu/experiments/blt_reasoner") |
| parser.add_argument("--private", action="store_true") |
| args = parser.parse_args() |
|
|
| token = sys.stdin.read().strip() |
| if not token.startswith("hf_"): |
| print("[upload] stdin did not contain an hf_ token; aborting", file=sys.stderr) |
| sys.exit(2) |
|
|
| from huggingface_hub import HfApi |
| api = HfApi(token=token) |
|
|
| print(f"[upload] creating repo {args.repo} (private={args.private})", flush=True) |
| api.create_repo(repo_id=args.repo, repo_type="model", private=args.private, exist_ok=True) |
|
|
| pilot = Path(args.pilot_dir) |
| code = Path(args.code_dir) |
|
|
| |
| stage = Path("/tmp/blt_upload_stage") |
| if stage.exists(): |
| shutil.rmtree(stage) |
| stage.mkdir(parents=True) |
|
|
| |
| (stage / "ckpts").mkdir() |
| for c in sorted(pilot.glob("ckpt-step*")): |
| if c.is_dir(): |
| shutil.copytree(c, stage / "ckpts" / c.name) |
| print(f"[upload] staged {c.name}", flush=True) |
|
|
| |
| if code.exists(): |
| shutil.copytree(code, stage / "code", |
| ignore=shutil.ignore_patterns("__pycache__", "*.pyc")) |
| print(f"[upload] staged code dir", flush=True) |
|
|
| |
| (stage / "logs").mkdir() |
| for name in ("run.log", "metrics.jsonl", "auto_eval.log", |
| "interim_ablation.log", |
| "interim_ablation_K4.log", |
| "interim_ablation_K8.log", |
| "interim_ablation_K16_step8000.log", |
| "run_attempt1_oom.log", |
| "run_attempt2.log"): |
| p = pilot / name |
| if p.exists(): |
| shutil.copy(p, stage / "logs" / name) |
| print(f"[upload] staged log {name}", flush=True) |
|
|
| |
| |
| (stage / "ablations").mkdir() |
| for c in sorted(pilot.glob("ckpt-step*")): |
| for f in c.glob("ablation_*.json"): |
| shutil.copy(f, stage / "ablations" / f"{c.name}__{f.name}") |
|
|
| |
| readme = build_readme(pilot, code, args.repo) |
| (stage / "README.md").write_text(readme) |
| print(f"[upload] staged README.md ({len(readme)} chars)", flush=True) |
|
|
| |
| total_bytes = sum(p.stat().st_size for p in stage.rglob("*") if p.is_file()) |
| print(f"[upload] total staged size = {total_bytes/1e6:.1f} MB", flush=True) |
|
|
| print(f"[upload] pushing to {args.repo} ...", flush=True) |
| api.upload_folder( |
| folder_path=str(stage), |
| repo_id=args.repo, |
| repo_type="model", |
| commit_message="BLT-Reasoner pilot 1: ckpts + code + logs + ablations", |
| ) |
| print(f"[upload] DONE — https://huggingface.co/{args.repo}", flush=True) |
|
|
| shutil.rmtree(stage) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|