"""
modal_client.py — Dual AI Client: Groq (fast turns) + Modal (big moments)
==========================================================================
Architecture:
  • call_dokkaebi()        → Groq API (llama-3.1-8b-instant, 8B) — fast gameplay turns (~2s)
  • call_dokkaebi_modal()  → Modal llama.cpp (Qwen 2.5 14B) — cinematic moments only
                             (scenario intros, end-game report card)

Total model parameter budget: 8B + 14B = 22B — well under the 32B hackathon cap.

Groq is free, LPU-accelerated, and ~10x faster than Modal for standard turns.
Modal is kept for "big moments" to satisfy the Modal prize requirement and to
leverage the larger 14B model for high-quality cinematic narrative generation.

Part of the ORV (Omniscient Reader's Viewpoint) Scenario Simulator.
Build Small Hackathon 2026.
"""

import json
import os
import re
import time
from typing import Any

import requests

# ---------------------------------------------------------------------------
# Load .env if present (for local dev)
# ---------------------------------------------------------------------------
try:
    _env_path = os.path.join(os.path.dirname(__file__), ".env")
    if os.path.isfile(_env_path):
        with open(_env_path) as _f:
            for _line in _f:
                _line = _line.strip()
                if _line and not _line.startswith("#") and "=" in _line:
                    _k, _v = _line.split("=", 1)
                    os.environ.setdefault(_k.strip(), _v.strip())
except Exception:
    pass

# ---------------------------------------------------------------------------
# Configuration
# ---------------------------------------------------------------------------

# ── Groq (fast gameplay turns) ────────────────────────────────────────────
GROQ_API_KEY: str = os.environ.get("GROQ_API_KEY", "")
GROQ_ENDPOINT: str = "https://api.groq.com/openai/v1/chat/completions"
# Must be <32B parameters for the hackathon!
GROQ_MODEL: str = os.environ.get("GROQ_MODEL", "llama-3.1-8b-instant")
GROQ_TIMEOUT: int = 30

# ── Modal (cinematic big moments only) ───────────────────────────────────
MODAL_ENDPOINT_URL: str = os.environ.get(
    "MODAL_ENDPOINT_URL",
    "https://aswinikumary--orv-dokkaebi-server-serve.modal.run"
)
MODAL_TIMEOUT: int = 45  # Reduced: if Modal cold-starts > 45s, fall back to Groq

# Required keys in every valid AI response
_REQUIRED_KEYS: set = {
    "narrative",
    "dokkaebi_comment",
    "stat_changes",
    "suggestions",
    "entertainment_score",
    "constellation_reactions",
}


# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# FALLBACK RESPONSE
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━


def get_fallback_response() -> dict[str, Any]:
    """
    Return a safe, in-character fallback response for when ALL AI
    endpoints are unreachable or return invalid data.
    """
    return {
        "narrative": (
            "The probability wavers. Something shifts in the fabric of reality. "
            "You feel it — a moment of static, as if the universe itself hesitated."
        ),
        "dokkaebi_comment": "...Technical difficulties. How amusing.",
        "meta_detected": False,
        "meta_reason": None,
        "reality_subversion": None,
        "stat_changes": {
            "hp": 0,
            "coins": 10,
            "meta_exposure": 0,
            "prob_stability": 0,
            "trust": {},
            "constellation_affinity": {},
        },
        "new_title": None,
        "hidden_scenario": None,
        "big_moment": False,
        "suggestions": [
            "Look around carefully",
            "Search for survivors",
            "Do something unexpected",
        ],
        "entertainment_score": 3,
        "constellation_reactions": [
            {
                "modifier": "Prisoner of the Golden Headband",
                "reaction": "Even the system glitches. Amusing.",
                "coins": 20,
            }
        ],
        "scenario_complete": False,
        "scenario_rank": None,
        "dokkaebi_internal": "fallback response used",
    }


# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# RESPONSE PARSER (shared)
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━


def parse_ai_response(raw_text: str) -> dict[str, Any]:
    """
    Parse the raw text returned by the AI into a validated dict.

    Handles common LLM output quirks:
    - Strips leading/trailing whitespace
    - Removes markdown code fences
    - Extracts the substring between the first { and last }
    - Validates that all required keys are present
    """
    text = raw_text.strip()

    # Strip markdown code fences if present
    text = re.sub(r"^```(?:json)?\s*", "", text, flags=re.MULTILINE)
    text = re.sub(r"```\s*$", "", text, flags=re.MULTILINE)
    text = text.strip()

    # Find the JSON object boundaries
    first_brace = text.find("{")
    last_brace = text.rfind("}")

    if first_brace == -1 or last_brace == -1 or last_brace <= first_brace:
        raise ValueError(
            f"No valid JSON object found in AI response. "
            f"Raw text (first 200 chars): {raw_text[:200]}"
        )

    json_str = text[first_brace : last_brace + 1]

    try:
        parsed = json.loads(json_str)
    except json.JSONDecodeError as exc:
        raise ValueError(
            f"Failed to parse JSON from AI response: {exc}. "
            f"Extracted text (first 300 chars): {json_str[:300]}"
        ) from exc

    if not isinstance(parsed, dict):
        raise ValueError(
            f"Expected a JSON object (dict), got {type(parsed).__name__}."
        )

    # Validate required keys
    missing = _REQUIRED_KEYS - set(parsed.keys())
    if missing:
        raise ValueError(
            f"AI response is missing required keys: {missing}. "
            f"Present keys: {set(parsed.keys())}"
        )

    return parsed


# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# GROQ CALLER — fast gameplay turns
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━


def _call_groq(
    system_prompt: str,
    user_message: str,
    max_retries: int = 2,
) -> dict[str, Any]:
    """
    Call Groq's LPU-accelerated API for fast gameplay turns.
    Typically responds in 1–3 seconds.
    """
    if not GROQ_API_KEY:
        raise RuntimeError("GROQ_API_KEY is not set.")

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {GROQ_API_KEY}",
    }

    payload = {
        "model": GROQ_MODEL,
        "messages": [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_message},
        ],
        "temperature": 0.85,
        "max_tokens": 480,   # Reduced for speed — 350-450 tokens typical
        "stream": False,
        "stop": None,
    }

    last_error: Exception | None = None

    for attempt in range(max_retries):
        try:
            print(
                f"[Groq] Attempt {attempt + 1}/{max_retries} — "
                f"model={GROQ_MODEL}"
            )
            t0 = time.time()
            response = requests.post(
                GROQ_ENDPOINT,
                json=payload,
                headers=headers,
                timeout=GROQ_TIMEOUT,
            )
            response.raise_for_status()
            elapsed = time.time() - t0

            data = response.json()
            raw_text = data["choices"][0]["message"]["content"]
            print(
                f"[Groq] Response in {elapsed:.1f}s — "
                f"{len(raw_text)} chars"
            )

            parsed = parse_ai_response(raw_text)
            print("[Groq] Parsed successfully.")
            return parsed

        except requests.exceptions.HTTPError as exc:
            last_error = exc
            status = getattr(exc.response, "status_code", "???")
            # 429 = rate limit — wait and retry
            if status == 429:
                wait = 3 * (attempt + 1)
                print(f"[Groq] Rate limited. Waiting {wait}s…")
                time.sleep(wait)
            else:
                print(f"[Groq] HTTP {status}: {exc}")
                break  # Non-retryable HTTP error

        except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as exc:
            last_error = exc
            print(f"[Groq] Network error attempt {attempt + 1}: {exc}")
            if attempt < max_retries - 1:
                time.sleep(1)

        except (ValueError, KeyError, IndexError) as exc:
            last_error = exc
            print(f"[Groq] Parse/structure error attempt {attempt + 1}: {exc}")
            if attempt < max_retries - 1:
                time.sleep(1)

        except Exception as exc:  # noqa: BLE001
            last_error = exc
            print(f"[Groq] Unexpected error: {type(exc).__name__}: {exc}")
            break

    raise RuntimeError(f"Groq failed after {max_retries} attempts: {last_error}")


# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# MODAL CALLER — cinematic big moments only
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━


def _call_modal(
    system_prompt: str,
    user_message: str,
    max_retries: int = 2,
) -> dict[str, Any]:
    """
    Call the Modal-hosted Qwen 2.5 14B endpoint.
    Used only for cinematic big moments (scenario intros, end-game report).
    Slower (~15-25s) but higher quality narrative generation.
    """
    url = f"{MODAL_ENDPOINT_URL}/v1/chat/completions"

    payload = {
        "model": "dokkaebi",
        "messages": [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_message},
        ],
        "temperature": 0.8,
        "max_tokens": 600,
        "stream": False,
    }

    headers = {"Content-Type": "application/json"}
    last_error: Exception | None = None

    for attempt in range(max_retries):
        try:
            print(
                f"[Modal] Attempt {attempt + 1}/{max_retries} — "
                f"POST {url}"
            )
            t0 = time.time()
            response = requests.post(
                url,
                json=payload,
                headers=headers,
                timeout=MODAL_TIMEOUT,
            )
            response.raise_for_status()
            elapsed = time.time() - t0

            data = response.json()
            raw_text = data["choices"][0]["message"]["content"]
            print(f"[Modal] Response in {elapsed:.1f}s — {len(raw_text)} chars")

            parsed = parse_ai_response(raw_text)
            print("[Modal] Parsed successfully.")
            return parsed

        except Exception as exc:  # noqa: BLE001
            last_error = exc
            print(f"[Modal] Error attempt {attempt + 1}: {type(exc).__name__}: {exc}")
            if attempt < max_retries - 1:
                wait = 2 ** attempt
                print(f"[Modal] Retrying in {wait}s…")
                time.sleep(wait)

    raise RuntimeError(f"Modal failed after {max_retries} attempts: {last_error}")


# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# PUBLIC API
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━


def call_dokkaebi(
    system_prompt: str,
    user_message: str,
    max_retries: int = 2,
    use_modal: bool = False,
) -> dict[str, Any]:
    """
    Main entry point for all AI calls.

    Parameters
    ----------
    system_prompt : str
        The full Dokkaebi system prompt (interpolated).
    user_message : str
        The player's action or a trigger phrase.
    max_retries : int
        Max retry attempts per backend.
    use_modal : bool
        If True, use Modal (Qwen 2.5 14B) for high-quality cinematic output.
        If False (default), use Groq (Llama 3.1 8B Instant) for fast gameplay.

    Returns
    -------
    dict
        Parsed AI response. Falls back to get_fallback_response() if all
        backends fail.
    """
    # ── Primary: try the requested backend ───────────────────────────────
    primary = "Modal" if use_modal else "Groq"
    try:
        if use_modal:
            return _call_modal(system_prompt, user_message, max_retries)
        else:
            return _call_groq(system_prompt, user_message, max_retries)
    except Exception as primary_exc:
        print(f"[AI] {primary} failed: {primary_exc}")

    # ── Fallback: try the other backend ──────────────────────────────────
    secondary = "Groq" if use_modal else "Modal"
    print(f"[AI] Falling back to {secondary}…")
    try:
        if use_modal:
            # Modal failed → try Groq
            return _call_groq(system_prompt, user_message, 1)
        else:
            # Groq failed → try Modal
            return _call_modal(system_prompt, user_message, 1)
    except Exception as secondary_exc:
        print(f"[AI] {secondary} also failed: {secondary_exc}")

    # ── Last resort: static fallback ─────────────────────────────────────
    print("[AI] All backends failed. Using static fallback response.")
    return get_fallback_response()


def call_dokkaebi_cinematic(
    system_prompt: str,
    user_message: str,
) -> dict[str, Any]:
    """
    Convenience wrapper: always uses Modal for cinematic big moments.
    Falls back to Groq if Modal is unavailable.

    Use this for:
    - Scenario intro generation (start of each new scenario)
    - End-game report card generation
    """
    return call_dokkaebi(
        system_prompt,
        user_message,
        max_retries=2,
        use_modal=True,
    )