phantom-grid / scripts /_quick_llm_test.py
unity4ar's picture
Ship Phantom Grid Docker Space
d2e6f94 verified
Raw
History Blame Contribute Delete
1.98 kB
"""Hit the underlying llama-server endpoint with a trivial English prompt to
see if the LLM itself is broken or only the witness path is."""
import json, sys, time, urllib.request
from pathlib import Path
REPO = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(REPO))
from config import load_settings
settings = load_settings()
out = REPO / "runtime" / "tmp" / "llm_probe.txt"
out.parent.mkdir(parents=True, exist_ok=True)
prompts = [
("system: You are a helpful assistant. Respond ONLY in English.\nuser: What is 2 + 2? Reply with the digit.",
[{"role": "system", "content": "You are a helpful assistant. Respond ONLY in English."},
{"role": "user", "content": "What is 2 + 2? Reply with the digit."}]),
("system: Reply in English.\nuser: Name a single fruit.",
[{"role": "system", "content": "Reply in English."},
{"role": "user", "content": "Name a single fruit."}]),
("system: -\nuser: Hello, how are you?",
[{"role": "user", "content": "Hello, how are you?"}]),
]
lines: list[str] = []
for label, msgs in prompts:
payload = {
"model": settings.minicpm_quantization or settings.llm_model,
"messages": msgs,
"temperature": 0.3,
"max_tokens": 80,
"stream": False,
}
req = urllib.request.Request(
"http://127.0.0.1:19060/v1/chat/completions",
data=json.dumps(payload).encode("utf-8"),
headers={"Content-Type": "application/json"},
method="POST",
)
start = time.perf_counter()
with urllib.request.urlopen(req, timeout=60) as resp:
data = json.loads(resp.read().decode("utf-8"))
elapsed = time.perf_counter() - start
text = data["choices"][0]["message"]["content"].strip()
lines.append(f"=== {label}")
lines.append(f"elapsed: {elapsed:.2f}s")
lines.append(f"reply: {text!r}")
lines.append(f"raw : {text}")
lines.append("")
out.write_text("\n".join(lines), encoding="utf-8")
print(f"wrote: {out}")