PregoPal / _test_inference.py
J.B-Lin
[modal] Fix: mmproj single-file, health endpoint, client API consistency
f10673a
Raw
History Blame Contribute Delete
1.56 kB
"""
Test inference endpoint for PregoPal MiniCPM-o API on Modal
Usage:
python _test_inference.py
# Or with custom URL:
python _test_inference.py https://andrew-jiabin--prego-pal-minicpm-serve.modal.run
"""
import sys
import json
import httpx
import os
os.environ["PYTHONIOENCODING"] = "utf-8"
# Default URL - update after deployment
URL = "https://andrew-jiabin--prego-pal-minicpm-serve.modal.run"
if len(sys.argv) > 1:
URL = sys.argv[1].rstrip("/")
payload = {
"model": "MiniCPM-o-4_5",
"messages": [{"role": "user", "content": "你好,请用一句话回答:1+1等于几?"}],
"max_tokens": 50,
"temperature": 0.7
}
print("=" * 60)
print("PregoPal MiniCPM-o API 测试")
print("=" * 60)
print(f"URL: {URL}/v1/chat/completions")
print("注意:冷启动需要 2-10 分钟加载模型...")
print()
try:
r = httpx.post(
URL + "/v1/chat/completions",
json=payload,
timeout=600 # 10 minutes for cold start
)
print(f"状态码: {r.status_code}")
if r.status_code == 200:
data = r.json()
content = data["choices"][0]["message"]["content"]
print(f"\n=== 模型响应 ===")
print(content)
print("=" * 20)
else:
print(f"\n响应 (前500字符):")
print(r.text[:500])
except httpx.TimeoutException:
print("\n请求超时 (600s) - 容器仍在初始化,检查 Modal 日志")
print(" modal app logs $(modal app list | grep prego-pal | awk '{print $1}')")
except Exception as e:
print(f"\n错误: {e}")