| """Tiny smoke test: 5 train steps + 1 eval pass on a tiny subset. | |
| Verifies the pipeline runs end-to-end (forward, loss, backward, opt step, | |
| eval). Should finish in < 2 minutes on a single GH200. | |
| """ | |
| import json | |
| import sys | |
| import time | |
| from pathlib import Path | |
| # Force tiny config overrides | |
| overrides = { | |
| "max_steps": 5, | |
| "batch_size": 2, | |
| "grad_accum": 1, | |
| "data_train_size": 32, | |
| "data_eval_size": 8, | |
| "eval_every": 5, | |
| "save_every": 0, | |
| "log_every": 1, | |
| "K_latents": 2, | |
| "K_curriculum": [[0, 2]], | |
| "output_dir": "/home/ubuntu/work/blt_smoke", | |
| } | |
| base_cfg_path = Path(__file__).resolve().parent.parent / "configs" / "pilot_qwen15b_gsm8k.json" | |
| with open(base_cfg_path) as f: | |
| cfg = json.load(f) | |
| cfg.update(overrides) | |
| tmp_cfg = Path("/tmp/blt_smoke_config.json") | |
| tmp_cfg.write_text(json.dumps(cfg, indent=2)) | |
| print(f"Wrote {tmp_cfg}") | |
| import subprocess | |
| t0 = time.time() | |
| r = subprocess.run([ | |
| sys.executable, "-u", "-m", "experiments.blt_reasoner.train", | |
| "--config", str(tmp_cfg), | |
| ], cwd="/home/ubuntu") | |
| print(f"smoke test exit={r.returncode} elapsed={time.time()-t0:.0f}s") | |
| sys.exit(r.returncode) | |