#!/usr/bin/env python3

"""Smoke test: export a HF folder, then load it with trust_remote_code.

This is meant to catch:
- remote-code syntax/indentation errors
- missing auto_map
- missing model weights (optional stub)
- basic forward/generate wiring regressions

It intentionally uses the small stub-weights mode so it does not download big models.
"""

from __future__ import annotations

import subprocess
import sys
from pathlib import Path

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


def main() -> int:
    repo_root = Path(__file__).resolve().parents[1]
    out_dir = repo_root / "hf_export_ready"

    if out_dir.exists():
        # keep it simple
        subprocess.run(["rm", "-rf", str(out_dir)], check=True)

    subprocess.run(
        [
            sys.executable,
            str(repo_root / "scripts" / "export_hf.py"),
            "--out",
            str(out_dir),
            "--write_stub_weights",
        ],
        check=True,
    )

    print("Loading tokenizer...")
    tok = AutoTokenizer.from_pretrained(out_dir, trust_remote_code=True)

    print("Loading model...")
    model = AutoModelForCausalLM.from_pretrained(out_dir, trust_remote_code=True)
    model.eval()

    # Basic forward pass (text-only)
    ids = tok("Hello", return_tensors="pt").input_ids
    with torch.inference_mode():
        out = model(input_ids=ids)
    assert out.logits.shape[:2] == ids.shape

    # Tiny generate smoke
    with torch.inference_mode():
        gen = model.generate(ids, max_new_tokens=4, use_cache=False)
    assert gen.shape[0] == 1

    print("SMOKE OK")
    return 0


if __name__ == "__main__":
    raise SystemExit(main())