""" Test inference endpoint for PregoPal MiniCPM-o API on Modal Usage: python _test_inference.py # Or with custom URL: python _test_inference.py https://andrew-jiabin--prego-pal-minicpm-serve.modal.run """ import sys import json import httpx import os os.environ["PYTHONIOENCODING"] = "utf-8" # Default URL - update after deployment URL = "https://andrew-jiabin--prego-pal-minicpm-serve.modal.run" if len(sys.argv) > 1: URL = sys.argv[1].rstrip("/") payload = { "model": "MiniCPM-o-4_5", "messages": [{"role": "user", "content": "你好,请用一句话回答:1+1等于几?"}], "max_tokens": 50, "temperature": 0.7 } print("=" * 60) print("PregoPal MiniCPM-o API 测试") print("=" * 60) print(f"URL: {URL}/v1/chat/completions") print("注意:冷启动需要 2-10 分钟加载模型...") print() try: r = httpx.post( URL + "/v1/chat/completions", json=payload, timeout=600 # 10 minutes for cold start ) print(f"状态码: {r.status_code}") if r.status_code == 200: data = r.json() content = data["choices"][0]["message"]["content"] print(f"\n=== 模型响应 ===") print(content) print("=" * 20) else: print(f"\n响应 (前500字符):") print(r.text[:500]) except httpx.TimeoutException: print("\n请求超时 (600s) - 容器仍在初始化,检查 Modal 日志") print(" modal app logs $(modal app list | grep prego-pal | awk '{print $1}')") except Exception as e: print(f"\n错误: {e}")