""" modal_client.py — Dual AI Client: Groq (fast turns) + Modal (big moments) ========================================================================== Architecture: • call_dokkaebi() → Groq API (llama-3.1-8b-instant, 8B) — fast gameplay turns (~2s) • call_dokkaebi_modal() → Modal llama.cpp (Qwen 2.5 14B) — cinematic moments only (scenario intros, end-game report card) Total model parameter budget: 8B + 14B = 22B — well under the 32B hackathon cap. Groq is free, LPU-accelerated, and ~10x faster than Modal for standard turns. Modal is kept for "big moments" to satisfy the Modal prize requirement and to leverage the larger 14B model for high-quality cinematic narrative generation. Part of the ORV (Omniscient Reader's Viewpoint) Scenario Simulator. Build Small Hackathon 2026. """ import json import os import re import time from typing import Any import requests # --------------------------------------------------------------------------- # Load .env if present (for local dev) # --------------------------------------------------------------------------- try: _env_path = os.path.join(os.path.dirname(__file__), ".env") if os.path.isfile(_env_path): with open(_env_path) as _f: for _line in _f: _line = _line.strip() if _line and not _line.startswith("#") and "=" in _line: _k, _v = _line.split("=", 1) os.environ.setdefault(_k.strip(), _v.strip()) except Exception: pass # --------------------------------------------------------------------------- # Configuration # --------------------------------------------------------------------------- # ── Groq (fast gameplay turns) ──────────────────────────────────────────── GROQ_API_KEY: str = os.environ.get("GROQ_API_KEY", "") GROQ_ENDPOINT: str = "https://api.groq.com/openai/v1/chat/completions" # Must be <32B parameters for the hackathon! GROQ_MODEL: str = os.environ.get("GROQ_MODEL", "llama-3.1-8b-instant") GROQ_TIMEOUT: int = 30 # ── Modal (cinematic big moments only) ─────────────────────────────────── MODAL_ENDPOINT_URL: str = os.environ.get( "MODAL_ENDPOINT_URL", "https://aswinikumary--orv-dokkaebi-server-serve.modal.run" ) MODAL_TIMEOUT: int = 45 # Reduced: if Modal cold-starts > 45s, fall back to Groq # Required keys in every valid AI response _REQUIRED_KEYS: set = { "narrative", "dokkaebi_comment", "stat_changes", "suggestions", "entertainment_score", "constellation_reactions", } # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ # FALLBACK RESPONSE # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ def get_fallback_response() -> dict[str, Any]: """ Return a safe, in-character fallback response for when ALL AI endpoints are unreachable or return invalid data. """ return { "narrative": ( "The probability wavers. Something shifts in the fabric of reality. " "You feel it — a moment of static, as if the universe itself hesitated." ), "dokkaebi_comment": "...Technical difficulties. How amusing.", "meta_detected": False, "meta_reason": None, "reality_subversion": None, "stat_changes": { "hp": 0, "coins": 10, "meta_exposure": 0, "prob_stability": 0, "trust": {}, "constellation_affinity": {}, }, "new_title": None, "hidden_scenario": None, "big_moment": False, "suggestions": [ "Look around carefully", "Search for survivors", "Do something unexpected", ], "entertainment_score": 3, "constellation_reactions": [ { "modifier": "Prisoner of the Golden Headband", "reaction": "Even the system glitches. Amusing.", "coins": 20, } ], "scenario_complete": False, "scenario_rank": None, "dokkaebi_internal": "fallback response used", } # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ # RESPONSE PARSER (shared) # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ def parse_ai_response(raw_text: str) -> dict[str, Any]: """ Parse the raw text returned by the AI into a validated dict. Handles common LLM output quirks: - Strips leading/trailing whitespace - Removes markdown code fences - Extracts the substring between the first { and last } - Validates that all required keys are present """ text = raw_text.strip() # Strip markdown code fences if present text = re.sub(r"^```(?:json)?\s*", "", text, flags=re.MULTILINE) text = re.sub(r"```\s*$", "", text, flags=re.MULTILINE) text = text.strip() # Find the JSON object boundaries first_brace = text.find("{") last_brace = text.rfind("}") if first_brace == -1 or last_brace == -1 or last_brace <= first_brace: raise ValueError( f"No valid JSON object found in AI response. " f"Raw text (first 200 chars): {raw_text[:200]}" ) json_str = text[first_brace : last_brace + 1] try: parsed = json.loads(json_str) except json.JSONDecodeError as exc: raise ValueError( f"Failed to parse JSON from AI response: {exc}. " f"Extracted text (first 300 chars): {json_str[:300]}" ) from exc if not isinstance(parsed, dict): raise ValueError( f"Expected a JSON object (dict), got {type(parsed).__name__}." ) # Validate required keys missing = _REQUIRED_KEYS - set(parsed.keys()) if missing: raise ValueError( f"AI response is missing required keys: {missing}. " f"Present keys: {set(parsed.keys())}" ) return parsed # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ # GROQ CALLER — fast gameplay turns # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ def _call_groq( system_prompt: str, user_message: str, max_retries: int = 2, ) -> dict[str, Any]: """ Call Groq's LPU-accelerated API for fast gameplay turns. Typically responds in 1–3 seconds. """ if not GROQ_API_KEY: raise RuntimeError("GROQ_API_KEY is not set.") headers = { "Content-Type": "application/json", "Authorization": f"Bearer {GROQ_API_KEY}", } payload = { "model": GROQ_MODEL, "messages": [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_message}, ], "temperature": 0.85, "max_tokens": 480, # Reduced for speed — 350-450 tokens typical "stream": False, "stop": None, } last_error: Exception | None = None for attempt in range(max_retries): try: print( f"[Groq] Attempt {attempt + 1}/{max_retries} — " f"model={GROQ_MODEL}" ) t0 = time.time() response = requests.post( GROQ_ENDPOINT, json=payload, headers=headers, timeout=GROQ_TIMEOUT, ) response.raise_for_status() elapsed = time.time() - t0 data = response.json() raw_text = data["choices"][0]["message"]["content"] print( f"[Groq] Response in {elapsed:.1f}s — " f"{len(raw_text)} chars" ) parsed = parse_ai_response(raw_text) print("[Groq] Parsed successfully.") return parsed except requests.exceptions.HTTPError as exc: last_error = exc status = getattr(exc.response, "status_code", "???") # 429 = rate limit — wait and retry if status == 429: wait = 3 * (attempt + 1) print(f"[Groq] Rate limited. Waiting {wait}s…") time.sleep(wait) else: print(f"[Groq] HTTP {status}: {exc}") break # Non-retryable HTTP error except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as exc: last_error = exc print(f"[Groq] Network error attempt {attempt + 1}: {exc}") if attempt < max_retries - 1: time.sleep(1) except (ValueError, KeyError, IndexError) as exc: last_error = exc print(f"[Groq] Parse/structure error attempt {attempt + 1}: {exc}") if attempt < max_retries - 1: time.sleep(1) except Exception as exc: # noqa: BLE001 last_error = exc print(f"[Groq] Unexpected error: {type(exc).__name__}: {exc}") break raise RuntimeError(f"Groq failed after {max_retries} attempts: {last_error}") # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ # MODAL CALLER — cinematic big moments only # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ def _call_modal( system_prompt: str, user_message: str, max_retries: int = 2, ) -> dict[str, Any]: """ Call the Modal-hosted Qwen 2.5 14B endpoint. Used only for cinematic big moments (scenario intros, end-game report). Slower (~15-25s) but higher quality narrative generation. """ url = f"{MODAL_ENDPOINT_URL}/v1/chat/completions" payload = { "model": "dokkaebi", "messages": [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_message}, ], "temperature": 0.8, "max_tokens": 600, "stream": False, } headers = {"Content-Type": "application/json"} last_error: Exception | None = None for attempt in range(max_retries): try: print( f"[Modal] Attempt {attempt + 1}/{max_retries} — " f"POST {url}" ) t0 = time.time() response = requests.post( url, json=payload, headers=headers, timeout=MODAL_TIMEOUT, ) response.raise_for_status() elapsed = time.time() - t0 data = response.json() raw_text = data["choices"][0]["message"]["content"] print(f"[Modal] Response in {elapsed:.1f}s — {len(raw_text)} chars") parsed = parse_ai_response(raw_text) print("[Modal] Parsed successfully.") return parsed except Exception as exc: # noqa: BLE001 last_error = exc print(f"[Modal] Error attempt {attempt + 1}: {type(exc).__name__}: {exc}") if attempt < max_retries - 1: wait = 2 ** attempt print(f"[Modal] Retrying in {wait}s…") time.sleep(wait) raise RuntimeError(f"Modal failed after {max_retries} attempts: {last_error}") # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ # PUBLIC API # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ def call_dokkaebi( system_prompt: str, user_message: str, max_retries: int = 2, use_modal: bool = False, ) -> dict[str, Any]: """ Main entry point for all AI calls. Parameters ---------- system_prompt : str The full Dokkaebi system prompt (interpolated). user_message : str The player's action or a trigger phrase. max_retries : int Max retry attempts per backend. use_modal : bool If True, use Modal (Qwen 2.5 14B) for high-quality cinematic output. If False (default), use Groq (Llama 3.1 8B Instant) for fast gameplay. Returns ------- dict Parsed AI response. Falls back to get_fallback_response() if all backends fail. """ # ── Primary: try the requested backend ─────────────────────────────── primary = "Modal" if use_modal else "Groq" try: if use_modal: return _call_modal(system_prompt, user_message, max_retries) else: return _call_groq(system_prompt, user_message, max_retries) except Exception as primary_exc: print(f"[AI] {primary} failed: {primary_exc}") # ── Fallback: try the other backend ────────────────────────────────── secondary = "Groq" if use_modal else "Modal" print(f"[AI] Falling back to {secondary}…") try: if use_modal: # Modal failed → try Groq return _call_groq(system_prompt, user_message, 1) else: # Groq failed → try Modal return _call_modal(system_prompt, user_message, 1) except Exception as secondary_exc: print(f"[AI] {secondary} also failed: {secondary_exc}") # ── Last resort: static fallback ───────────────────────────────────── print("[AI] All backends failed. Using static fallback response.") return get_fallback_response() def call_dokkaebi_cinematic( system_prompt: str, user_message: str, ) -> dict[str, Any]: """ Convenience wrapper: always uses Modal for cinematic big moments. Falls back to Groq if Modal is unavailable. Use this for: - Scenario intro generation (start of each new scenario) - End-game report card generation """ return call_dokkaebi( system_prompt, user_message, max_retries=2, use_modal=True, )