"""Diagnose WHICH structured-output enforcement path the running server honors. response_format json_schema was silently ignored in the first smoke test. Test the full matrix so we know what to build M02/M08 against. Run with server up: .venv\\Scripts\\python.exe scripts\\enforce_probe.py """ from __future__ import annotations import json import sys import urllib.error import urllib.request from collections.abc import Callable from typing import Any BASE = "http://localhost:8080" QUESTION = "Is the sky blue on a clear day? Answer the question." # Target: force EXACTLY {"v": "YES"} or {"v": "NO"}. JSON_SCHEMA = { "type": "object", "properties": {"v": {"type": "string", "enum": ["YES", "NO"]}}, "required": ["v"], "additionalProperties": False, } GBNF = 'root ::= "{\\"v\\": \\"" ("YES" | "NO") "\\"}"' def emit(line: str = "") -> None: sys.stdout.write(f"{line}\n") def decode_json_object(raw: bytes) -> dict[str, Any]: parsed: Any = json.loads(raw.decode("utf-8")) if not isinstance(parsed, dict): raise TypeError("Expected a JSON object response") return parsed def post(payload: dict[str, Any]) -> dict[str, Any]: data = json.dumps(payload).encode("utf-8") req = urllib.request.Request( BASE + "/v1/chat/completions", data=data, headers={"Content-Type": "application/json"}, ) with urllib.request.urlopen( req, timeout=60 ) as resp: # nosec B310 - fixed local BASE return decode_json_object(resp.read()) def base_payload() -> dict[str, Any]: return { "messages": [{"role": "user", "content": QUESTION}], "temperature": 0.7, "max_tokens": 40, "stream": False, # thinking OFF: a grammar/schema must constrain the WHOLE output, which is # impossible if the model emits a thinking block first. "chat_template_kwargs": {"enable_thinking": False}, } def enforced(content: str) -> bool: try: return json.loads(content).get("v") in ("YES", "NO") except Exception: # noqa: BLE001 return False def run(label: str, mutate: Callable[[dict[str, Any]], object]) -> None: payload = base_payload() mutate(payload) emit(f"\n--- {label}") try: resp = post(payload) choices = resp.get("choices") content = "" if isinstance(choices, list) and choices and isinstance(choices[0], dict): message = choices[0].get("message") if isinstance(message, dict): raw_content = message.get("content") if isinstance(raw_content, str): content = raw_content emit(f" enforced={enforced(content)} content={content!r}") except urllib.error.HTTPError as e: emit(f" HTTP {e.code}: {e.read().decode('utf-8')[:300]}") except Exception as e: # noqa: BLE001 emit(f" {type(e).__name__}: {e}") cases = [ ( "A. response_format json_schema (nested schema)", lambda p: p.update( response_format={"type": "json_schema", "schema": JSON_SCHEMA} ), ), ( "B. response_format json_object + schema", lambda p: p.update( response_format={"type": "json_object", "schema": JSON_SCHEMA} ), ), ( "C. response_format OpenAI-style json_schema wrapper", lambda p: p.update( response_format={ "type": "json_schema", "json_schema": { "name": "answer", "strict": True, "schema": JSON_SCHEMA, }, } ), ), ( "D. top-level json_schema field (/completion-style)", lambda p: p.update(json_schema=JSON_SCHEMA), ), ("E. top-level grammar field (GBNF)", lambda p: p.update(grammar=GBNF)), ( "F. response_format json_schema, thinking LEFT ON (control)", lambda p: ( p.pop("chat_template_kwargs", None), p.update(response_format={"type": "json_schema", "schema": JSON_SCHEMA}), ), ), ] emit(f"Probing structured-output enforcement paths on {BASE}") for label, mutate in cases: run(label, mutate) emit("\nDone. The path(s) with enforced=True are what M02/M08 must use.")