Spaces:
Sleeping
Sleeping
| """Hit the underlying llama-server endpoint with a trivial English prompt to | |
| see if the LLM itself is broken or only the witness path is.""" | |
| import json, sys, time, urllib.request | |
| from pathlib import Path | |
| REPO = Path(__file__).resolve().parent.parent | |
| sys.path.insert(0, str(REPO)) | |
| from config import load_settings | |
| settings = load_settings() | |
| out = REPO / "runtime" / "tmp" / "llm_probe.txt" | |
| out.parent.mkdir(parents=True, exist_ok=True) | |
| prompts = [ | |
| ("system: You are a helpful assistant. Respond ONLY in English.\nuser: What is 2 + 2? Reply with the digit.", | |
| [{"role": "system", "content": "You are a helpful assistant. Respond ONLY in English."}, | |
| {"role": "user", "content": "What is 2 + 2? Reply with the digit."}]), | |
| ("system: Reply in English.\nuser: Name a single fruit.", | |
| [{"role": "system", "content": "Reply in English."}, | |
| {"role": "user", "content": "Name a single fruit."}]), | |
| ("system: -\nuser: Hello, how are you?", | |
| [{"role": "user", "content": "Hello, how are you?"}]), | |
| ] | |
| lines: list[str] = [] | |
| for label, msgs in prompts: | |
| payload = { | |
| "model": settings.minicpm_quantization or settings.llm_model, | |
| "messages": msgs, | |
| "temperature": 0.3, | |
| "max_tokens": 80, | |
| "stream": False, | |
| } | |
| req = urllib.request.Request( | |
| "http://127.0.0.1:19060/v1/chat/completions", | |
| data=json.dumps(payload).encode("utf-8"), | |
| headers={"Content-Type": "application/json"}, | |
| method="POST", | |
| ) | |
| start = time.perf_counter() | |
| with urllib.request.urlopen(req, timeout=60) as resp: | |
| data = json.loads(resp.read().decode("utf-8")) | |
| elapsed = time.perf_counter() - start | |
| text = data["choices"][0]["message"]["content"].strip() | |
| lines.append(f"=== {label}") | |
| lines.append(f"elapsed: {elapsed:.2f}s") | |
| lines.append(f"reply: {text!r}") | |
| lines.append(f"raw : {text}") | |
| lines.append("") | |
| out.write_text("\n".join(lines), encoding="utf-8") | |
| print(f"wrote: {out}") | |