| """ |
| modal_client.py — Dual AI Client: Groq (fast turns) + Modal (big moments) |
| ========================================================================== |
| Architecture: |
| • call_dokkaebi() → Groq API (llama-3.1-8b-instant, 8B) — fast gameplay turns (~2s) |
| • call_dokkaebi_modal() → Modal llama.cpp (Qwen 2.5 14B) — cinematic moments only |
| (scenario intros, end-game report card) |
| |
| Total model parameter budget: 8B + 14B = 22B — well under the 32B hackathon cap. |
| |
| Groq is free, LPU-accelerated, and ~10x faster than Modal for standard turns. |
| Modal is kept for "big moments" to satisfy the Modal prize requirement and to |
| leverage the larger 14B model for high-quality cinematic narrative generation. |
| |
| Part of the ORV (Omniscient Reader's Viewpoint) Scenario Simulator. |
| Build Small Hackathon 2026. |
| """ |
|
|
| import json |
| import os |
| import re |
| import time |
| from typing import Any |
|
|
| import requests |
|
|
| |
| |
| |
| try: |
| _env_path = os.path.join(os.path.dirname(__file__), ".env") |
| if os.path.isfile(_env_path): |
| with open(_env_path) as _f: |
| for _line in _f: |
| _line = _line.strip() |
| if _line and not _line.startswith("#") and "=" in _line: |
| _k, _v = _line.split("=", 1) |
| os.environ.setdefault(_k.strip(), _v.strip()) |
| except Exception: |
| pass |
|
|
| |
| |
| |
|
|
| |
| GROQ_API_KEY: str = os.environ.get("GROQ_API_KEY", "") |
| GROQ_ENDPOINT: str = "https://api.groq.com/openai/v1/chat/completions" |
| |
| GROQ_MODEL: str = os.environ.get("GROQ_MODEL", "llama-3.1-8b-instant") |
| GROQ_TIMEOUT: int = 30 |
|
|
| |
| MODAL_ENDPOINT_URL: str = os.environ.get( |
| "MODAL_ENDPOINT_URL", |
| "https://aswinikumary--orv-dokkaebi-server-serve.modal.run" |
| ) |
| MODAL_TIMEOUT: int = 45 |
|
|
| |
| _REQUIRED_KEYS: set = { |
| "narrative", |
| "dokkaebi_comment", |
| "stat_changes", |
| "suggestions", |
| "entertainment_score", |
| "constellation_reactions", |
| } |
|
|
|
|
| |
| |
| |
|
|
|
|
| def get_fallback_response() -> dict[str, Any]: |
| """ |
| Return a safe, in-character fallback response for when ALL AI |
| endpoints are unreachable or return invalid data. |
| """ |
| return { |
| "narrative": ( |
| "The probability wavers. Something shifts in the fabric of reality. " |
| "You feel it — a moment of static, as if the universe itself hesitated." |
| ), |
| "dokkaebi_comment": "...Technical difficulties. How amusing.", |
| "meta_detected": False, |
| "meta_reason": None, |
| "reality_subversion": None, |
| "stat_changes": { |
| "hp": 0, |
| "coins": 10, |
| "meta_exposure": 0, |
| "prob_stability": 0, |
| "trust": {}, |
| "constellation_affinity": {}, |
| }, |
| "new_title": None, |
| "hidden_scenario": None, |
| "big_moment": False, |
| "suggestions": [ |
| "Look around carefully", |
| "Search for survivors", |
| "Do something unexpected", |
| ], |
| "entertainment_score": 3, |
| "constellation_reactions": [ |
| { |
| "modifier": "Prisoner of the Golden Headband", |
| "reaction": "Even the system glitches. Amusing.", |
| "coins": 20, |
| } |
| ], |
| "scenario_complete": False, |
| "scenario_rank": None, |
| "dokkaebi_internal": "fallback response used", |
| } |
|
|
|
|
| |
| |
| |
|
|
|
|
| def parse_ai_response(raw_text: str) -> dict[str, Any]: |
| """ |
| Parse the raw text returned by the AI into a validated dict. |
| |
| Handles common LLM output quirks: |
| - Strips leading/trailing whitespace |
| - Removes markdown code fences |
| - Extracts the substring between the first { and last } |
| - Validates that all required keys are present |
| """ |
| text = raw_text.strip() |
|
|
| |
| text = re.sub(r"^```(?:json)?\s*", "", text, flags=re.MULTILINE) |
| text = re.sub(r"```\s*$", "", text, flags=re.MULTILINE) |
| text = text.strip() |
|
|
| |
| first_brace = text.find("{") |
| last_brace = text.rfind("}") |
|
|
| if first_brace == -1 or last_brace == -1 or last_brace <= first_brace: |
| raise ValueError( |
| f"No valid JSON object found in AI response. " |
| f"Raw text (first 200 chars): {raw_text[:200]}" |
| ) |
|
|
| json_str = text[first_brace : last_brace + 1] |
|
|
| try: |
| parsed = json.loads(json_str) |
| except json.JSONDecodeError as exc: |
| raise ValueError( |
| f"Failed to parse JSON from AI response: {exc}. " |
| f"Extracted text (first 300 chars): {json_str[:300]}" |
| ) from exc |
|
|
| if not isinstance(parsed, dict): |
| raise ValueError( |
| f"Expected a JSON object (dict), got {type(parsed).__name__}." |
| ) |
|
|
| |
| missing = _REQUIRED_KEYS - set(parsed.keys()) |
| if missing: |
| raise ValueError( |
| f"AI response is missing required keys: {missing}. " |
| f"Present keys: {set(parsed.keys())}" |
| ) |
|
|
| return parsed |
|
|
|
|
| |
| |
| |
|
|
|
|
| def _call_groq( |
| system_prompt: str, |
| user_message: str, |
| max_retries: int = 2, |
| ) -> dict[str, Any]: |
| """ |
| Call Groq's LPU-accelerated API for fast gameplay turns. |
| Typically responds in 1–3 seconds. |
| """ |
| if not GROQ_API_KEY: |
| raise RuntimeError("GROQ_API_KEY is not set.") |
|
|
| headers = { |
| "Content-Type": "application/json", |
| "Authorization": f"Bearer {GROQ_API_KEY}", |
| } |
|
|
| payload = { |
| "model": GROQ_MODEL, |
| "messages": [ |
| {"role": "system", "content": system_prompt}, |
| {"role": "user", "content": user_message}, |
| ], |
| "temperature": 0.85, |
| "max_tokens": 480, |
| "stream": False, |
| "stop": None, |
| } |
|
|
| last_error: Exception | None = None |
|
|
| for attempt in range(max_retries): |
| try: |
| print( |
| f"[Groq] Attempt {attempt + 1}/{max_retries} — " |
| f"model={GROQ_MODEL}" |
| ) |
| t0 = time.time() |
| response = requests.post( |
| GROQ_ENDPOINT, |
| json=payload, |
| headers=headers, |
| timeout=GROQ_TIMEOUT, |
| ) |
| response.raise_for_status() |
| elapsed = time.time() - t0 |
|
|
| data = response.json() |
| raw_text = data["choices"][0]["message"]["content"] |
| print( |
| f"[Groq] Response in {elapsed:.1f}s — " |
| f"{len(raw_text)} chars" |
| ) |
|
|
| parsed = parse_ai_response(raw_text) |
| print("[Groq] Parsed successfully.") |
| return parsed |
|
|
| except requests.exceptions.HTTPError as exc: |
| last_error = exc |
| status = getattr(exc.response, "status_code", "???") |
| |
| if status == 429: |
| wait = 3 * (attempt + 1) |
| print(f"[Groq] Rate limited. Waiting {wait}s…") |
| time.sleep(wait) |
| else: |
| print(f"[Groq] HTTP {status}: {exc}") |
| break |
|
|
| except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as exc: |
| last_error = exc |
| print(f"[Groq] Network error attempt {attempt + 1}: {exc}") |
| if attempt < max_retries - 1: |
| time.sleep(1) |
|
|
| except (ValueError, KeyError, IndexError) as exc: |
| last_error = exc |
| print(f"[Groq] Parse/structure error attempt {attempt + 1}: {exc}") |
| if attempt < max_retries - 1: |
| time.sleep(1) |
|
|
| except Exception as exc: |
| last_error = exc |
| print(f"[Groq] Unexpected error: {type(exc).__name__}: {exc}") |
| break |
|
|
| raise RuntimeError(f"Groq failed after {max_retries} attempts: {last_error}") |
|
|
|
|
| |
| |
| |
|
|
|
|
| def _call_modal( |
| system_prompt: str, |
| user_message: str, |
| max_retries: int = 2, |
| ) -> dict[str, Any]: |
| """ |
| Call the Modal-hosted Qwen 2.5 14B endpoint. |
| Used only for cinematic big moments (scenario intros, end-game report). |
| Slower (~15-25s) but higher quality narrative generation. |
| """ |
| url = f"{MODAL_ENDPOINT_URL}/v1/chat/completions" |
|
|
| payload = { |
| "model": "dokkaebi", |
| "messages": [ |
| {"role": "system", "content": system_prompt}, |
| {"role": "user", "content": user_message}, |
| ], |
| "temperature": 0.8, |
| "max_tokens": 600, |
| "stream": False, |
| } |
|
|
| headers = {"Content-Type": "application/json"} |
| last_error: Exception | None = None |
|
|
| for attempt in range(max_retries): |
| try: |
| print( |
| f"[Modal] Attempt {attempt + 1}/{max_retries} — " |
| f"POST {url}" |
| ) |
| t0 = time.time() |
| response = requests.post( |
| url, |
| json=payload, |
| headers=headers, |
| timeout=MODAL_TIMEOUT, |
| ) |
| response.raise_for_status() |
| elapsed = time.time() - t0 |
|
|
| data = response.json() |
| raw_text = data["choices"][0]["message"]["content"] |
| print(f"[Modal] Response in {elapsed:.1f}s — {len(raw_text)} chars") |
|
|
| parsed = parse_ai_response(raw_text) |
| print("[Modal] Parsed successfully.") |
| return parsed |
|
|
| except Exception as exc: |
| last_error = exc |
| print(f"[Modal] Error attempt {attempt + 1}: {type(exc).__name__}: {exc}") |
| if attempt < max_retries - 1: |
| wait = 2 ** attempt |
| print(f"[Modal] Retrying in {wait}s…") |
| time.sleep(wait) |
|
|
| raise RuntimeError(f"Modal failed after {max_retries} attempts: {last_error}") |
|
|
|
|
| |
| |
| |
|
|
|
|
| def call_dokkaebi( |
| system_prompt: str, |
| user_message: str, |
| max_retries: int = 2, |
| use_modal: bool = False, |
| ) -> dict[str, Any]: |
| """ |
| Main entry point for all AI calls. |
| |
| Parameters |
| ---------- |
| system_prompt : str |
| The full Dokkaebi system prompt (interpolated). |
| user_message : str |
| The player's action or a trigger phrase. |
| max_retries : int |
| Max retry attempts per backend. |
| use_modal : bool |
| If True, use Modal (Qwen 2.5 14B) for high-quality cinematic output. |
| If False (default), use Groq (Llama 3.1 8B Instant) for fast gameplay. |
| |
| Returns |
| ------- |
| dict |
| Parsed AI response. Falls back to get_fallback_response() if all |
| backends fail. |
| """ |
| |
| primary = "Modal" if use_modal else "Groq" |
| try: |
| if use_modal: |
| return _call_modal(system_prompt, user_message, max_retries) |
| else: |
| return _call_groq(system_prompt, user_message, max_retries) |
| except Exception as primary_exc: |
| print(f"[AI] {primary} failed: {primary_exc}") |
|
|
| |
| secondary = "Groq" if use_modal else "Modal" |
| print(f"[AI] Falling back to {secondary}…") |
| try: |
| if use_modal: |
| |
| return _call_groq(system_prompt, user_message, 1) |
| else: |
| |
| return _call_modal(system_prompt, user_message, 1) |
| except Exception as secondary_exc: |
| print(f"[AI] {secondary} also failed: {secondary_exc}") |
|
|
| |
| print("[AI] All backends failed. Using static fallback response.") |
| return get_fallback_response() |
|
|
|
|
| def call_dokkaebi_cinematic( |
| system_prompt: str, |
| user_message: str, |
| ) -> dict[str, Any]: |
| """ |
| Convenience wrapper: always uses Modal for cinematic big moments. |
| Falls back to Groq if Modal is unavailable. |
| |
| Use this for: |
| - Scenario intro generation (start of each new scenario) |
| - End-game report card generation |
| """ |
| return call_dokkaebi( |
| system_prompt, |
| user_message, |
| max_retries=2, |
| use_modal=True, |
| ) |
|
|