"""Minimal LIVE smoke test of the deployed Modal app — ONE LLM call + ONE voice
call (not the 32-take pre-gen), to validate the real model APIs cheaply.

    python3 scripts/smoke_modal.py

NOTE: the first call downloads model weights (MiniCPM-o ~19GB on A100, VoxCPM2 on
A10G) into the Volume and spins GPUs — this is the real-credit step. Subsequent
calls are warm.
"""
import sys
import numpy as np
import modal

APP = "witnessbox"


def main():
    WitnessLLM = modal.Cls.from_name(APP, "WitnessLLM")()
    WitnessVoice = modal.Cls.from_name(APP, "WitnessVoice")()

    print("→ LLM (MiniCPM-o) cold start + one reply…", flush=True)
    reply = WitnessLLM.respond.remote(
        "You are Marcus Reid, a guarded CFO under cross-examination. Answer in ONE short sentence, in character.",
        [{"role": "user", "content": "Did you authorize the twelve-million-dollar wire?"}],
    )
    print("   LLM reply:", repr(reply))
    assert isinstance(reply, str) and reply, "LLM returned empty/non-string"

    print("→ Voice (VoxCPM2) cold start + one line…", flush=True)
    wav, sr = WitnessVoice.speak.remote(
        "I have nothing to hide, counselor.", "calm, composed, faintly condescending"
    )
    wav = np.asarray(wav)
    print(f"   voice: {wav.shape} samples @ {sr} Hz ({wav.shape[0]/sr:.1f}s)")
    assert wav.size > 0 and sr in (16000, 22050, 24000, 44100, 48000)

    print("\n✅ LIVE smoke passed — MiniCPM-o + VoxCPM2 APIs are correct on GPU.")


if __name__ == "__main__":
    sys.exit(main())