Spaces:
Sleeping
Sleeping
| """Minimal LIVE smoke test of the deployed Modal app — ONE LLM call + ONE voice | |
| call (not the 32-take pre-gen), to validate the real model APIs cheaply. | |
| python3 scripts/smoke_modal.py | |
| NOTE: the first call downloads model weights (MiniCPM-o ~19GB on A100, VoxCPM2 on | |
| A10G) into the Volume and spins GPUs — this is the real-credit step. Subsequent | |
| calls are warm. | |
| """ | |
| import sys | |
| import numpy as np | |
| import modal | |
| APP = "witnessbox" | |
| def main(): | |
| WitnessLLM = modal.Cls.from_name(APP, "WitnessLLM")() | |
| WitnessVoice = modal.Cls.from_name(APP, "WitnessVoice")() | |
| print("→ LLM (MiniCPM-o) cold start + one reply…", flush=True) | |
| reply = WitnessLLM.respond.remote( | |
| "You are Marcus Reid, a guarded CFO under cross-examination. Answer in ONE short sentence, in character.", | |
| [{"role": "user", "content": "Did you authorize the twelve-million-dollar wire?"}], | |
| ) | |
| print(" LLM reply:", repr(reply)) | |
| assert isinstance(reply, str) and reply, "LLM returned empty/non-string" | |
| print("→ Voice (VoxCPM2) cold start + one line…", flush=True) | |
| wav, sr = WitnessVoice.speak.remote( | |
| "I have nothing to hide, counselor.", "calm, composed, faintly condescending" | |
| ) | |
| wav = np.asarray(wav) | |
| print(f" voice: {wav.shape} samples @ {sr} Hz ({wav.shape[0]/sr:.1f}s)") | |
| assert wav.size > 0 and sr in (16000, 22050, 24000, 44100, 48000) | |
| print("\n✅ LIVE smoke passed — MiniCPM-o + VoxCPM2 APIs are correct on GPU.") | |
| if __name__ == "__main__": | |
| sys.exit(main()) | |