#!/usr/bin/env python3
"""OpenAI-compatible JSON-spec extraction for Kaiju harnesses."""

from __future__ import annotations

import json
import os
import re
import urllib.error
import urllib.request
from pathlib import Path
from typing import Any


FAST_JSON_CONTRACT = """Planner contract:
- Return one minified JSON object only.
- No markdown, no prose, no reasoning, no comments, no HTML, no code fences.
- Keep the whole answer compact, ideally under 150 tokens.
- Use short strings and short arrays. End immediately after the final }.
"""


DEFAULT_SYSTEM_PROMPT = """You are the Kaiju website spec planner.
Return strict JSON only. Do not return HTML. Do not use markdown fences.
The JSON keys must be:
business_name, business_type, location, headline, subheadline, cta,
services, sections, testimonials, hours, contact_phone, contact_email,
palette, image_urls.
Keep services to 3-5 short items. Keep sections to practical identifiers.
If images are needed, use only real https URLs you are confident exist.
"""


def with_fast_json_contract(system_prompt: str) -> str:
    if "Planner contract:" in system_prompt:
        return system_prompt
    return FAST_JSON_CONTRACT + "\n" + system_prompt.strip() + "\n"


def extract_json_object(text: str) -> dict[str, Any]:
    cleaned = text.strip()
    if cleaned.startswith("```"):
        cleaned = re.sub(r"^```(?:json)?", "", cleaned).strip()
        cleaned = re.sub(r"```$", "", cleaned).strip()
    try:
        value = json.loads(cleaned)
        if isinstance(value, dict):
            return value
    except json.JSONDecodeError:
        pass
    start = cleaned.find("{")
    end = cleaned.rfind("}")
    if start == -1 or end == -1 or end <= start:
        raise ValueError("model did not return a JSON object")
    value = json.loads(cleaned[start : end + 1])
    if not isinstance(value, dict):
        raise ValueError("model JSON was not an object")
    return value


def request_json_spec(
    *,
    base_url: str,
    model: str,
    prompt: str,
    api_key_env: str = "KAIJU_EVAL_API_KEY",
    system_prompt_file: Path | None = None,
    default_system_prompt: str = DEFAULT_SYSTEM_PROMPT,
    timeout: int = 90,
    max_tokens: int = 224,
    temperature: float = 0.0,
    disable_thinking: bool = True,
) -> dict[str, Any]:
    system_prompt = default_system_prompt
    if system_prompt_file:
        system_prompt = system_prompt_file.read_text(encoding="utf-8")
    system_prompt = with_fast_json_contract(system_prompt)
    body = {
        "model": model,
        "messages": [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": prompt},
        ],
        "temperature": temperature,
        "max_tokens": max_tokens,
        "response_format": {"type": "json_object"},
    }
    if disable_thinking:
        # SGLang/Qwen reasoning models otherwise spend the entire planner budget
        # in hidden reasoning_content and return no parseable JSON content.
        body["chat_template_kwargs"] = {"enable_thinking": False, "thinking": False}
    data = json.dumps(body).encode("utf-8")
    headers = {"Content-Type": "application/json", "User-Agent": "kaiju-website-harness/0.1"}
    api_key = os.environ.get(api_key_env)
    if api_key:
        headers["Authorization"] = f"Bearer {api_key}"
    request = urllib.request.Request(
        base_url.rstrip("/") + "/chat/completions",
        data=data,
        headers=headers,
        method="POST",
    )
    try:
        with urllib.request.urlopen(request, timeout=timeout) as response:
            payload = json.loads(response.read().decode("utf-8", errors="replace"))
    except urllib.error.HTTPError as exc:
        detail = exc.read().decode("utf-8", errors="replace")
        raise RuntimeError(f"spec model HTTP {exc.code}: {detail[:1000]}") from exc
    content = payload["choices"][0]["message"]["content"] or ""
    return extract_json_object(content)


def request_website_spec(
    *,
    base_url: str,
    model: str,
    prompt: str,
    api_key_env: str = "KAIJU_EVAL_API_KEY",
    system_prompt_file: Path | None = None,
    timeout: int = 90,
    max_tokens: int = 224,
    temperature: float = 0.0,
    disable_thinking: bool = True,
) -> dict[str, Any]:
    return request_json_spec(
        base_url=base_url,
        model=model,
        prompt=prompt,
        api_key_env=api_key_env,
        system_prompt_file=system_prompt_file,
        default_system_prompt=DEFAULT_SYSTEM_PROMPT,
        timeout=timeout,
        max_tokens=max_tokens,
        temperature=temperature,
        disable_thinking=disable_thinking,
    )