Spaces:

build-small-hackathon
/

phantom-grid

Sleeping

App Files Files Community

phantom-grid / scripts /_quick_llm_test.py

unity4ar

Ship Phantom Grid Docker Space

d2e6f94 verified 17 days ago

Raw

History Blame Contribute Delete

1.98 kB

	"""Hit the underlying llama-server endpoint with a trivial English prompt to
	see if the LLM itself is broken or only the witness path is."""
	import json, sys, time, urllib.request
	from pathlib import Path
	REPO = Path(__file__).resolve().parent.parent
	sys.path.insert(0, str(REPO))
	from config import load_settings

	settings = load_settings()
	out = REPO / "runtime" / "tmp" / "llm_probe.txt"
	out.parent.mkdir(parents=True, exist_ok=True)

	prompts = [
	("system: You are a helpful assistant. Respond ONLY in English.\nuser: What is 2 + 2? Reply with the digit.",
	[{"role": "system", "content": "You are a helpful assistant. Respond ONLY in English."},
	{"role": "user", "content": "What is 2 + 2? Reply with the digit."}]),
	("system: Reply in English.\nuser: Name a single fruit.",
	[{"role": "system", "content": "Reply in English."},
	{"role": "user", "content": "Name a single fruit."}]),
	("system: -\nuser: Hello, how are you?",
	[{"role": "user", "content": "Hello, how are you?"}]),
	]

	lines: list[str] = []
	for label, msgs in prompts:
	payload = {
	"model": settings.minicpm_quantization or settings.llm_model,
	"messages": msgs,
	"temperature": 0.3,
	"max_tokens": 80,
	"stream": False,
	}
	req = urllib.request.Request(
	"http://127.0.0.1:19060/v1/chat/completions",
	data=json.dumps(payload).encode("utf-8"),
	headers={"Content-Type": "application/json"},
	method="POST",
	)
	start = time.perf_counter()
	with urllib.request.urlopen(req, timeout=60) as resp:
	data = json.loads(resp.read().decode("utf-8"))
	elapsed = time.perf_counter() - start
	text = data["choices"][0]["message"]["content"].strip()
	lines.append(f"=== {label}")
	lines.append(f"elapsed: {elapsed:.2f}s")
	lines.append(f"reply: {text!r}")
	lines.append(f"raw : {text}")
	lines.append("")
	out.write_text("\n".join(lines), encoding="utf-8")
	print(f"wrote: {out}")