WitnessBox / scripts /smoke_modal.py
Farseen0's picture
Deploy WitnessBox
c519923 verified
Raw
History Blame Contribute Delete
1.53 kB
"""Minimal LIVE smoke test of the deployed Modal app — ONE LLM call + ONE voice
call (not the 32-take pre-gen), to validate the real model APIs cheaply.
python3 scripts/smoke_modal.py
NOTE: the first call downloads model weights (MiniCPM-o ~19GB on A100, VoxCPM2 on
A10G) into the Volume and spins GPUs — this is the real-credit step. Subsequent
calls are warm.
"""
import sys
import numpy as np
import modal
APP = "witnessbox"
def main():
WitnessLLM = modal.Cls.from_name(APP, "WitnessLLM")()
WitnessVoice = modal.Cls.from_name(APP, "WitnessVoice")()
print("→ LLM (MiniCPM-o) cold start + one reply…", flush=True)
reply = WitnessLLM.respond.remote(
"You are Marcus Reid, a guarded CFO under cross-examination. Answer in ONE short sentence, in character.",
[{"role": "user", "content": "Did you authorize the twelve-million-dollar wire?"}],
)
print(" LLM reply:", repr(reply))
assert isinstance(reply, str) and reply, "LLM returned empty/non-string"
print("→ Voice (VoxCPM2) cold start + one line…", flush=True)
wav, sr = WitnessVoice.speak.remote(
"I have nothing to hide, counselor.", "calm, composed, faintly condescending"
)
wav = np.asarray(wav)
print(f" voice: {wav.shape} samples @ {sr} Hz ({wav.shape[0]/sr:.1f}s)")
assert wav.size > 0 and sr in (16000, 22050, 24000, 44100, 48000)
print("\n✅ LIVE smoke passed — MiniCPM-o + VoxCPM2 APIs are correct on GPU.")
if __name__ == "__main__":
sys.exit(main())