"""Tiny smoke test: 5 train steps + 1 eval pass on a tiny subset. Verifies the pipeline runs end-to-end (forward, loss, backward, opt step, eval). Should finish in < 2 minutes on a single GH200. """ import json import sys import time from pathlib import Path # Force tiny config overrides overrides = { "max_steps": 5, "batch_size": 2, "grad_accum": 1, "data_train_size": 32, "data_eval_size": 8, "eval_every": 5, "save_every": 0, "log_every": 1, "K_latents": 2, "K_curriculum": [[0, 2]], "output_dir": "/home/ubuntu/work/blt_smoke", } base_cfg_path = Path(__file__).resolve().parent.parent / "configs" / "pilot_qwen15b_gsm8k.json" with open(base_cfg_path) as f: cfg = json.load(f) cfg.update(overrides) tmp_cfg = Path("/tmp/blt_smoke_config.json") tmp_cfg.write_text(json.dumps(cfg, indent=2)) print(f"Wrote {tmp_cfg}") import subprocess t0 = time.time() r = subprocess.run([ sys.executable, "-u", "-m", "experiments.blt_reasoner.train", "--config", str(tmp_cfg), ], cwd="/home/ubuntu") print(f"smoke test exit={r.returncode} elapsed={time.time()-t0:.0f}s") sys.exit(r.returncode)