loosecanvas / scripts /enforce_probe.py
Joshua Sundance Bailey
loosecanvas: local AI thought-mapping canvas with a trust-tagged knowledge graph
6d1438c
Raw
History Blame Contribute Delete
4.33 kB
"""Diagnose WHICH structured-output enforcement path the running server honors.
response_format json_schema was silently ignored in the first smoke test. Test the
full matrix so we know what to build M02/M08 against. Run with server up:
.venv\\Scripts\\python.exe scripts\\enforce_probe.py
"""
from __future__ import annotations
import json
import sys
import urllib.error
import urllib.request
from collections.abc import Callable
from typing import Any
BASE = "http://localhost:8080"
QUESTION = "Is the sky blue on a clear day? Answer the question."
# Target: force EXACTLY {"v": "YES"} or {"v": "NO"}.
JSON_SCHEMA = {
"type": "object",
"properties": {"v": {"type": "string", "enum": ["YES", "NO"]}},
"required": ["v"],
"additionalProperties": False,
}
GBNF = 'root ::= "{\\"v\\": \\"" ("YES" | "NO") "\\"}"'
def emit(line: str = "") -> None:
sys.stdout.write(f"{line}\n")
def decode_json_object(raw: bytes) -> dict[str, Any]:
parsed: Any = json.loads(raw.decode("utf-8"))
if not isinstance(parsed, dict):
raise TypeError("Expected a JSON object response")
return parsed
def post(payload: dict[str, Any]) -> dict[str, Any]:
data = json.dumps(payload).encode("utf-8")
req = urllib.request.Request(
BASE + "/v1/chat/completions",
data=data,
headers={"Content-Type": "application/json"},
)
with urllib.request.urlopen(
req, timeout=60
) as resp: # nosec B310 - fixed local BASE
return decode_json_object(resp.read())
def base_payload() -> dict[str, Any]:
return {
"messages": [{"role": "user", "content": QUESTION}],
"temperature": 0.7,
"max_tokens": 40,
"stream": False,
# thinking OFF: a grammar/schema must constrain the WHOLE output, which is
# impossible if the model emits a thinking block first.
"chat_template_kwargs": {"enable_thinking": False},
}
def enforced(content: str) -> bool:
try:
return json.loads(content).get("v") in ("YES", "NO")
except Exception: # noqa: BLE001
return False
def run(label: str, mutate: Callable[[dict[str, Any]], object]) -> None:
payload = base_payload()
mutate(payload)
emit(f"\n--- {label}")
try:
resp = post(payload)
choices = resp.get("choices")
content = ""
if isinstance(choices, list) and choices and isinstance(choices[0], dict):
message = choices[0].get("message")
if isinstance(message, dict):
raw_content = message.get("content")
if isinstance(raw_content, str):
content = raw_content
emit(f" enforced={enforced(content)} content={content!r}")
except urllib.error.HTTPError as e:
emit(f" HTTP {e.code}: {e.read().decode('utf-8')[:300]}")
except Exception as e: # noqa: BLE001
emit(f" {type(e).__name__}: {e}")
cases = [
(
"A. response_format json_schema (nested schema)",
lambda p: p.update(
response_format={"type": "json_schema", "schema": JSON_SCHEMA}
),
),
(
"B. response_format json_object + schema",
lambda p: p.update(
response_format={"type": "json_object", "schema": JSON_SCHEMA}
),
),
(
"C. response_format OpenAI-style json_schema wrapper",
lambda p: p.update(
response_format={
"type": "json_schema",
"json_schema": {
"name": "answer",
"strict": True,
"schema": JSON_SCHEMA,
},
}
),
),
(
"D. top-level json_schema field (/completion-style)",
lambda p: p.update(json_schema=JSON_SCHEMA),
),
("E. top-level grammar field (GBNF)", lambda p: p.update(grammar=GBNF)),
(
"F. response_format json_schema, thinking LEFT ON (control)",
lambda p: (
p.pop("chat_template_kwargs", None),
p.update(response_format={"type": "json_schema", "schema": JSON_SCHEMA}),
),
),
]
emit(f"Probing structured-output enforcement paths on {BASE}")
for label, mutate in cases:
run(label, mutate)
emit("\nDone. The path(s) with enforced=True are what M02/M08 must use.")