#!/usr/bin/env python3 """Smoke test: export a HF folder, then load it with trust_remote_code. This is meant to catch: - remote-code syntax/indentation errors - missing auto_map - missing model weights (optional stub) - basic forward/generate wiring regressions It intentionally uses the small stub-weights mode so it does not download big models. """ from __future__ import annotations import subprocess import sys from pathlib import Path import torch from transformers import AutoModelForCausalLM, AutoTokenizer def main() -> int: repo_root = Path(__file__).resolve().parents[1] out_dir = repo_root / "hf_export_ready" if out_dir.exists(): # keep it simple subprocess.run(["rm", "-rf", str(out_dir)], check=True) subprocess.run( [ sys.executable, str(repo_root / "scripts" / "export_hf.py"), "--out", str(out_dir), "--write_stub_weights", ], check=True, ) print("Loading tokenizer...") tok = AutoTokenizer.from_pretrained(out_dir, trust_remote_code=True) print("Loading model...") model = AutoModelForCausalLM.from_pretrained(out_dir, trust_remote_code=True) model.eval() # Basic forward pass (text-only) ids = tok("Hello", return_tensors="pt").input_ids with torch.inference_mode(): out = model(input_ids=ids) assert out.logits.shape[:2] == ids.shape # Tiny generate smoke with torch.inference_mode(): gen = model.generate(ids, max_new_tokens=4, use_cache=False) assert gen.shape[0] == 1 print("SMOKE OK") return 0 if __name__ == "__main__": raise SystemExit(main())