| |
| """OpenAI-compatible JSON-spec extraction for Kaiju harnesses.""" |
|
|
| from __future__ import annotations |
|
|
| import json |
| import os |
| import re |
| import urllib.error |
| import urllib.request |
| from pathlib import Path |
| from typing import Any |
|
|
|
|
| FAST_JSON_CONTRACT = """Planner contract: |
| - Return one minified JSON object only. |
| - No markdown, no prose, no reasoning, no comments, no HTML, no code fences. |
| - Keep the whole answer compact, ideally under 150 tokens. |
| - Use short strings and short arrays. End immediately after the final }. |
| """ |
|
|
|
|
| DEFAULT_SYSTEM_PROMPT = """You are the Kaiju website spec planner. |
| Return strict JSON only. Do not return HTML. Do not use markdown fences. |
| The JSON keys must be: |
| business_name, business_type, location, headline, subheadline, cta, |
| services, sections, testimonials, hours, contact_phone, contact_email, |
| palette, image_urls. |
| Keep services to 3-5 short items. Keep sections to practical identifiers. |
| If images are needed, use only real https URLs you are confident exist. |
| """ |
|
|
|
|
| def with_fast_json_contract(system_prompt: str) -> str: |
| if "Planner contract:" in system_prompt: |
| return system_prompt |
| return FAST_JSON_CONTRACT + "\n" + system_prompt.strip() + "\n" |
|
|
|
|
| def extract_json_object(text: str) -> dict[str, Any]: |
| cleaned = text.strip() |
| if cleaned.startswith("```"): |
| cleaned = re.sub(r"^```(?:json)?", "", cleaned).strip() |
| cleaned = re.sub(r"```$", "", cleaned).strip() |
| try: |
| value = json.loads(cleaned) |
| if isinstance(value, dict): |
| return value |
| except json.JSONDecodeError: |
| pass |
| start = cleaned.find("{") |
| end = cleaned.rfind("}") |
| if start == -1 or end == -1 or end <= start: |
| raise ValueError("model did not return a JSON object") |
| value = json.loads(cleaned[start : end + 1]) |
| if not isinstance(value, dict): |
| raise ValueError("model JSON was not an object") |
| return value |
|
|
|
|
| def request_json_spec( |
| *, |
| base_url: str, |
| model: str, |
| prompt: str, |
| api_key_env: str = "KAIJU_EVAL_API_KEY", |
| system_prompt_file: Path | None = None, |
| default_system_prompt: str = DEFAULT_SYSTEM_PROMPT, |
| timeout: int = 90, |
| max_tokens: int = 224, |
| temperature: float = 0.0, |
| disable_thinking: bool = True, |
| ) -> dict[str, Any]: |
| system_prompt = default_system_prompt |
| if system_prompt_file: |
| system_prompt = system_prompt_file.read_text(encoding="utf-8") |
| system_prompt = with_fast_json_contract(system_prompt) |
| body = { |
| "model": model, |
| "messages": [ |
| {"role": "system", "content": system_prompt}, |
| {"role": "user", "content": prompt}, |
| ], |
| "temperature": temperature, |
| "max_tokens": max_tokens, |
| "response_format": {"type": "json_object"}, |
| } |
| if disable_thinking: |
| |
| |
| body["chat_template_kwargs"] = {"enable_thinking": False, "thinking": False} |
| data = json.dumps(body).encode("utf-8") |
| headers = {"Content-Type": "application/json", "User-Agent": "kaiju-website-harness/0.1"} |
| api_key = os.environ.get(api_key_env) |
| if api_key: |
| headers["Authorization"] = f"Bearer {api_key}" |
| request = urllib.request.Request( |
| base_url.rstrip("/") + "/chat/completions", |
| data=data, |
| headers=headers, |
| method="POST", |
| ) |
| try: |
| with urllib.request.urlopen(request, timeout=timeout) as response: |
| payload = json.loads(response.read().decode("utf-8", errors="replace")) |
| except urllib.error.HTTPError as exc: |
| detail = exc.read().decode("utf-8", errors="replace") |
| raise RuntimeError(f"spec model HTTP {exc.code}: {detail[:1000]}") from exc |
| content = payload["choices"][0]["message"]["content"] or "" |
| return extract_json_object(content) |
|
|
|
|
| def request_website_spec( |
| *, |
| base_url: str, |
| model: str, |
| prompt: str, |
| api_key_env: str = "KAIJU_EVAL_API_KEY", |
| system_prompt_file: Path | None = None, |
| timeout: int = 90, |
| max_tokens: int = 224, |
| temperature: float = 0.0, |
| disable_thinking: bool = True, |
| ) -> dict[str, Any]: |
| return request_json_spec( |
| base_url=base_url, |
| model=model, |
| prompt=prompt, |
| api_key_env=api_key_env, |
| system_prompt_file=system_prompt_file, |
| default_system_prompt=DEFAULT_SYSTEM_PROMPT, |
| timeout=timeout, |
| max_tokens=max_tokens, |
| temperature=temperature, |
| disable_thinking=disable_thinking, |
| ) |
|
|