Spaces:
Sleeping
Sleeping
| """Resolution + epilogue + interlude LLM calls for The Wizard's Oracles. | |
| `resolve_trial` is THE star call of the demo: it takes an obstacle and an | |
| oracle whose text may be literally anything the player typed (wisdom, | |
| nonsense, a meme, a recipe, a wall of emoji, an empty string) and produces | |
| a narration in which the oracle's words save the hero. | |
| All four public generators (`resolve_trial`, `generate_interlude`, | |
| `generate_dragon_interlude`, `generate_epilogue`) are theme-aware: they | |
| look up the active Theme via `oracles.themes.get_theme` and substitute | |
| its placeholders into the prompt templates. There is no offline / mock | |
| fallback — the Modal-hosted Qwen endpoint is the source of truth. On | |
| LLM failure the generators raise a `RuntimeError` so the caller can show | |
| a clear error in the UI instead of silently rendering fantasy text. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import os | |
| from typing import Optional | |
| from oracles.llm_client import LLMClient | |
| from oracles.state import GameState, Obstacle, Oracle, Resolution | |
| from oracles.themes import Theme, get_theme | |
| # --------------------------------------------------------------------------- | |
| # Prompt-file caching (read once per process) | |
| # --------------------------------------------------------------------------- | |
| _PROMPTS_DIR = os.path.join( | |
| os.path.dirname(os.path.dirname(os.path.abspath(__file__))), | |
| "prompts", | |
| ) | |
| _RESOLUTION_PROMPT: Optional[str] = None | |
| _EPILOGUE_PROMPT: Optional[str] = None | |
| _INTERLUDE_PROMPT: Optional[str] = None | |
| _DRAGON_INTERLUDE_PROMPT: Optional[str] = None | |
| def _model_for_lang(language: str) -> str: | |
| """Return the served-model alias to use for a given target language. | |
| The deployed humor LoRA was distilled on English-only data and biases | |
| outputs to English regardless of system-prompt directives. For | |
| Chinese we route to the bare base Qwen (alias ``llm``) which speaks | |
| Simplified Chinese natively; this restores localization at the cost | |
| of losing the humor-mode steering on Chinese trials. | |
| Returns "" to fall back to the client's default (the LoRA) for any | |
| language not explicitly handled here. | |
| """ | |
| if language and ("Chinese" in language or "中文" in language): | |
| return "llm" | |
| return "" | |
| def _is_chinese(language: str) -> bool: | |
| return bool(language and ("Chinese" in language or "中文" in language)) | |
| def _complete_text_with_retry( | |
| client, | |
| system: str, | |
| base_user: str, | |
| min_chars: int, | |
| max_tokens: int, | |
| temperature: float, | |
| language: str, | |
| site: str, | |
| max_attempts: int = 2, | |
| ) -> str: | |
| """Call ``client.complete_text`` with one retry on too-short response. | |
| If the response is shorter than ``min_chars`` characters AND we have | |
| a retry left, fire a second call that tells the model exactly how | |
| short the previous attempt was and asks for more. Connection-class | |
| errors bubble immediately (retrying wouldn't help). | |
| """ | |
| user_msg = base_user | |
| last_short = None | |
| for attempt in range(1, max_attempts + 1): | |
| try: | |
| text = client.complete_text( | |
| system, user=user_msg, | |
| max_tokens=max_tokens, | |
| temperature=temperature, | |
| model=_model_for_lang(language), | |
| ) | |
| except Exception as e: | |
| raise RuntimeError( | |
| f"{site}: LLM call failed [{type(e).__name__}] {e}" | |
| ) from e | |
| text = (text or "").strip() | |
| if len(text) >= min_chars: | |
| return text | |
| if attempt < max_attempts: | |
| last_short = (text[:120], len(text)) | |
| zh_extra = ( | |
| f"\n\n你上一次的回复太短(只有 {len(text)} 字)。请重写一份" | |
| f"更详尽的内容,至少 {min_chars} 字。" | |
| ) if _is_chinese(language) else "" | |
| user_msg = ( | |
| f"{base_user}\n\n" | |
| f"YOUR PREVIOUS ATTEMPT WAS REJECTED — TOO SHORT: only " | |
| f"{len(text)} characters (minimum required: {min_chars}). " | |
| f"Previous response began: {text[:120]!r}\n" | |
| f"Write a LONGER, more detailed response this time — at " | |
| f"least {min_chars} characters." + zh_extra | |
| ) | |
| continue | |
| # Out of retries. | |
| prior = ( | |
| f", prior_attempt={last_short[1]} chars / preview={last_short[0]!r}" | |
| if last_short else "" | |
| ) | |
| raise RuntimeError( | |
| f"{site}: response too short after {attempt} attempts — " | |
| f"got {len(text)} chars, need >= {min_chars} " | |
| f"(lang={language!r}, preview={text[:80]!r}{prior})" | |
| ) | |
| # Unreachable, but Python flow requires it. | |
| return "" | |
| def _wrap_with_language_force(system: str, language: str) -> str: | |
| """When ``language`` indicates Chinese, prepend AND append a forceful | |
| Chinese-only directive to the system prompt. Pass-through otherwise. | |
| The English prompt template + its English examples bias the model | |
| toward English output regardless of the late ``Write the narration | |
| in {language}`` clause. Bracketing the prompt with native Chinese | |
| directives means the FIRST and LAST tokens the model attends to | |
| are both in zh, which steers token generation reliably. | |
| """ | |
| if not _is_chinese(language): | |
| return system | |
| prefix = ( | |
| "【极其重要】本次任务必须**完全用简体中文**输出 narration " | |
| "与 tactic 字段,**禁止出现任何英文单词、英文短语或英文标点**。" | |
| "无论下面的示例与说明使用哪种语言,**最终输出只能是简体中文**。" | |
| "如出现任何英文字符,则视为完全失败。\n\n" | |
| ) | |
| suffix = ( | |
| "\n\n【重申】narration 与 tactic 两个字段都必须是简体中文。" | |
| "不要英文。不要中英混杂。**只用简体中文**。" | |
| ) | |
| return prefix + system + suffix | |
| def _length_units(text: str, language: str) -> int: | |
| """Language-aware count of "narration units" for the too-short check. | |
| English: whitespace-separated words. | |
| Chinese: CJK characters (no inter-character spaces, so ``split()`` is | |
| useless). Each Chinese character roughly equals 1.5 English words of | |
| information density; the validator's floor is loosened by the same | |
| factor when called with a Chinese narration so it stays comparable. | |
| """ | |
| if _is_chinese(language): | |
| return sum(1 for ch in text if "一" <= ch <= "鿿") | |
| return len(text.split()) | |
| def _load_resolution_prompt() -> str: | |
| global _RESOLUTION_PROMPT | |
| if _RESOLUTION_PROMPT is None: | |
| with open(os.path.join(_PROMPTS_DIR, "resolution_system.txt"), "r", encoding="utf-8") as fh: | |
| _RESOLUTION_PROMPT = fh.read() | |
| return _RESOLUTION_PROMPT | |
| def _load_epilogue_prompt() -> str: | |
| global _EPILOGUE_PROMPT | |
| if _EPILOGUE_PROMPT is None: | |
| with open(os.path.join(_PROMPTS_DIR, "epilogue_system.txt"), "r", encoding="utf-8") as fh: | |
| _EPILOGUE_PROMPT = fh.read() | |
| return _EPILOGUE_PROMPT | |
| def _load_interlude_prompt(dragon: bool = False) -> str: | |
| global _INTERLUDE_PROMPT, _DRAGON_INTERLUDE_PROMPT | |
| fname = "interlude_dragon_system.txt" if dragon else "interlude_system.txt" | |
| if dragon: | |
| if _DRAGON_INTERLUDE_PROMPT is None: | |
| with open(os.path.join(_PROMPTS_DIR, fname), "r", encoding="utf-8") as fh: | |
| _DRAGON_INTERLUDE_PROMPT = fh.read() | |
| return _DRAGON_INTERLUDE_PROMPT | |
| if _INTERLUDE_PROMPT is None: | |
| with open(os.path.join(_PROMPTS_DIR, fname), "r", encoding="utf-8") as fh: | |
| _INTERLUDE_PROMPT = fh.read() | |
| return _INTERLUDE_PROMPT | |
| # --------------------------------------------------------------------------- | |
| # Helpers for sanitizing weird oracle text | |
| # --------------------------------------------------------------------------- | |
| _LONG_INPUT_HARD_CAP = 4000 # absurdly long; protects the prompt budget | |
| def _safe_oracle_for_template(text: str) -> str: | |
| """Sanitize an oracle text for safe template substitution. | |
| - None -> empty string. | |
| - Strip NUL bytes (would break some downstream tools). | |
| - Hard-cap absurdly long inputs so we don't bust the prompt window. | |
| """ | |
| if not isinstance(text, str): | |
| text = "" if text is None else str(text) | |
| text = text.replace("\x00", "") | |
| if len(text) > _LONG_INPUT_HARD_CAP: | |
| text = text[:_LONG_INPUT_HARD_CAP] + "…" | |
| return text | |
| def _apply_theme(template: str, theme: Theme) -> str: | |
| """Substitute every theme placeholder into a prompt template.""" | |
| return ( | |
| template | |
| .replace("{theme_name}", theme.display_name) | |
| .replace("{mentor_archetype}", theme.mentor_archetype) | |
| .replace("{mentor_action_verb}", theme.mentor_action_verb) | |
| .replace("{oracle_artifact}", theme.oracle_artifact) | |
| .replace("{oracle_singular}", theme.oracle_singular) | |
| .replace("{goal_verb}", theme.goal_verb) | |
| .replace("{finale_descriptor}", theme.finale_descriptor) | |
| .replace("{finale_short}", theme.finale_short) | |
| .replace("{hero_label}", theme.hero_label) | |
| .replace("{village_label}", theme.village_label) | |
| .replace("{style_cues}", theme.style_cues) | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # Public API | |
| # --------------------------------------------------------------------------- | |
| def resolve_trial( | |
| obstacle: Obstacle, | |
| oracle: Oracle, | |
| hero_name: str, | |
| village_name: str, | |
| client: LLMClient, | |
| language: str = "English", | |
| theme: str = "fantasy", | |
| narration_length: str = "medium", | |
| ) -> Resolution: | |
| """Generate the narration for one trial via the live LLM. | |
| `narration_length` selects from NARRATION_LENGTHS in state.py — drives | |
| the word-range in the prompt template and the max_tokens cap on the | |
| LLM call. Default 'medium' = 180-240 words (the prior hard-coded range). | |
| Raises RuntimeError if the client is unconfigured or the LLM call | |
| fails (network / malformed JSON / missing fields / too-short body). | |
| """ | |
| if client is None or getattr(client, "using_mock", True): | |
| raise RuntimeError( | |
| "LLM client is not configured. Set MODAL_URL, MODAL_KEY and " | |
| "MODAL_SECRET so the oracles can be interpreted." | |
| ) | |
| from oracles.state import NARRATION_LENGTHS as _LENGTHS | |
| _, n_min, n_max, max_tokens = _LENGTHS.get( | |
| narration_length, _LENGTHS["medium"] | |
| ) | |
| template = _load_resolution_prompt() | |
| safe_oracle_text = _safe_oracle_for_template(oracle.text if oracle else "") | |
| th = get_theme(theme) | |
| system = _apply_theme(template, th) | |
| system = ( | |
| system | |
| .replace("{hero_name}", hero_name or "the hero") | |
| .replace("{village_name}", village_name or "his village") | |
| .replace("{obstacle_setup}", obstacle.setup or "") | |
| .replace("{oracle_text}", safe_oracle_text) | |
| .replace("{language}", language or "English") | |
| .replace("{narration_min}", str(n_min)) | |
| .replace("{narration_max}", str(n_max)) | |
| ) | |
| # The prompt template + examples are all English, so the LoRA/base | |
| # model often parrots English even when {language}="Simplified Chinese | |
| # …". When the player picked Chinese we *prepend* a strong, native- | |
| # Chinese directive so the first tokens the model sees are in zh — | |
| # and append the same constraint in zh after the English template, so | |
| # the LAST words it sees before generating are also Chinese. | |
| system = _wrap_with_language_force(system, language) | |
| # Lower bound for the validator. Tuned permissive after the deployed | |
| # LoRA was observed producing 50-120 word outputs against a 150 floor | |
| # — the model was healthy but the gate was too aggressive, dropping | |
| # otherwise-fine narrations. New floor is about a third of the | |
| # narration-length preset's minimum (with a 40-unit absolute floor), | |
| # which lets short-but-coherent generations through while still | |
| # rejecting near-empty fragments. | |
| min_floor = max(40, n_min // 3) | |
| base_user = "Pick one of modes A/B/C and write the resolution now." | |
| user_msg = base_user | |
| last_short_attempt = None # (narration_preview, units) of the prior try | |
| attempts = 0 | |
| max_attempts = 3 # initial + two retries | |
| while True: | |
| attempts += 1 | |
| try: | |
| raw = client.complete_json( | |
| system, | |
| user=user_msg, | |
| max_tokens=max_tokens, | |
| temperature=1.05, | |
| model=_model_for_lang(language), | |
| ) | |
| except Exception as e: | |
| # Connection-class errors are NOT retried — same network state | |
| # would just fail the same way; surface immediately. | |
| raise RuntimeError( | |
| f"resolve_trial: LLM call failed [{type(e).__name__}] {e}" | |
| ) from e | |
| if not isinstance(raw, dict): | |
| raise RuntimeError( | |
| f"resolve_trial: LLM returned non-JSON ({type(raw).__name__}, " | |
| f"first 200 chars: {str(raw)[:200]!r})" | |
| ) | |
| narration = raw.get("narration") | |
| tactic = raw.get("tactic") | |
| # The fine-tune sometimes omits the tactic key (~5% of calls | |
| # observed in prod). Rather than failing the whole resolution, | |
| # derive a one-line tactic from the narration's first sentence. | |
| # Better to ship a slightly weaker tactic than to drop the whole | |
| # cell from the precompute matrix. | |
| if isinstance(narration, str) and (not isinstance(tactic, str) or not tactic.strip()): | |
| first_sentence = narration.strip().split(".")[0] | |
| tactic = first_sentence[:120].strip() or "He found a way." | |
| if not isinstance(narration, str) or not isinstance(tactic, str): | |
| raise RuntimeError( | |
| f"resolve_trial: JSON missing narration/tactic keys " | |
| f"(got keys={list(raw.keys())[:6]})" | |
| ) | |
| narration = narration.strip() | |
| tactic = tactic.strip() | |
| units = _length_units(narration, language) | |
| if not tactic: | |
| # Empty tactic isn't a length problem — surface the failure | |
| # without retrying (the model may just refuse). Same pattern | |
| # for non-Chinese-locale tactic emissions. | |
| raise RuntimeError( | |
| "resolve_trial: LLM omitted the tactic one-liner " | |
| f"(narration was {units} units, language={language!r})" | |
| ) | |
| if units >= min_floor: | |
| break # PASS | |
| # Too short. If we still have a retry left, build a sharpened user | |
| # message that quotes the previous failure and asks for a longer | |
| # response. Otherwise raise with full diagnostic context. | |
| if attempts < max_attempts: | |
| last_short_attempt = (narration[:120], units) | |
| unit_type = "Chinese characters" if _is_chinese(language) else "English words" | |
| zh_extra = ( | |
| "\n\n你上一次的回复太短了。请重写一份更长更详尽的 narration," | |
| f"至少 {min_floor} 个汉字。先前长度仅 {units} 字。" | |
| ) if _is_chinese(language) else "" | |
| user_msg = ( | |
| f"{base_user}\n\n" | |
| f"YOUR PREVIOUS ATTEMPT WAS REJECTED — TOO SHORT: only " | |
| f"{units} {unit_type} (minimum required: {min_floor}). " | |
| f"Previous narration began: {narration[:120]!r}\n" | |
| f"Write a LONGER, more detailed narration this time — at " | |
| f"least {min_floor} {unit_type}. Keep the same JSON shape " | |
| f"{{\"narration\": ..., \"tactic\": ...}}." | |
| + zh_extra | |
| ) | |
| continue | |
| # No retries left — escalate. | |
| unit_type = "Chinese chars" if _is_chinese(language) else "English words" | |
| req = getattr(client, "last_requested_model", "?") | |
| got = getattr(client, "last_returned_model", "?") | |
| prior = ( | |
| f", prior_attempt={last_short_attempt[1]} units / preview=" | |
| f"{last_short_attempt[0]!r}" | |
| if last_short_attempt else "" | |
| ) | |
| raise RuntimeError( | |
| f"resolve_trial: narration too short after {attempts} attempts — " | |
| f"got {units} {unit_type}, need >= {min_floor} " | |
| f"(lang={language!r}, model_requested={req!r}, " | |
| f"model_returned={got!r}, narration_preview=" | |
| f"{narration[:80]!r}{prior})" | |
| ) | |
| return Resolution( | |
| trial_index=obstacle.index, | |
| obstacle=obstacle, | |
| oracle=oracle, | |
| narration=narration, | |
| tactic=tactic, | |
| image_path="", | |
| image_caption=tactic, | |
| ) | |
| def generate_epilogue( | |
| state: GameState, | |
| client: LLMClient, | |
| language: str = "English", | |
| theme: str = "fantasy", | |
| ) -> str: | |
| """Generate the closing paragraph via the live LLM. | |
| Raises RuntimeError on any failure. | |
| """ | |
| if client is None or getattr(client, "using_mock", True): | |
| raise RuntimeError( | |
| "LLM client is not configured. Set MODAL_URL, MODAL_KEY and " | |
| "MODAL_SECRET so the wizard's epilogue can be spoken." | |
| ) | |
| template = _load_epilogue_prompt() | |
| tactics = [r.tactic for r in state.resolutions if getattr(r, "tactic", "")] | |
| if tactics: | |
| tactics_block = "\n".join(f"- {t}" for t in tactics) | |
| else: | |
| tactics_block = "- (no tactics recorded)" | |
| th = get_theme(theme) | |
| system = _apply_theme(template, th) | |
| # If the player walked the story tree (any theme), the leaf node's | |
| # ``ending_id`` chooses one of 5 endings. Splice an ending seed into | |
| # the prompt so the LLM expands it in the player's language with the | |
| # recorded tactics as flavor. | |
| # | |
| # Fantasy uses the hand-authored seed_en/seed_zh. Every other theme | |
| # asks the LLM to first render the abstract ending.shape in this | |
| # theme's world (a single short call), then splices that themed seed. | |
| ending_seed = "" | |
| story_path = getattr(state, "story_path", None) or [] | |
| if story_path: # truthy AND non-empty (len >= 1) | |
| from oracles.story_graph import get_node, get_ending, render_themed_ending_seed | |
| leaf = get_node(story_path[-1]) | |
| if leaf is not None and leaf.ending_id: | |
| ending = get_ending(leaf.ending_id) | |
| lang_code = "zh" if (language and ("Chinese" in language or "中文" in language)) else "en" | |
| if theme == "fantasy": | |
| ending_seed = ending.seed(lang_code) | |
| else: | |
| ending_seed = render_themed_ending_seed( | |
| ending, th, client, | |
| language=language, | |
| hero_name=state.hero_name or "the hero", | |
| village_name=state.village_name or "his village", | |
| ) | |
| system = system + ( | |
| "\n\n[Branching-story epilogue seed]\nThe seed below " | |
| "contains the OUTCOME of this run plus two named beats: " | |
| "WHY THE BOSS BEHAVED AS IT DID and WHAT THE APPRENTICE " | |
| "CARRIED HOME. Your 3-paragraph epilogue MUST honor BOTH " | |
| "of those beats — do not skip either, do not contradict " | |
| "the seed's tone or outcome. Use the recorded tactics as " | |
| "flavor when describing what the apprentice carried home. " | |
| "Write in {language}.\n\n" | |
| "SEED:\n" | |
| f"{ending_seed}" | |
| ).replace("{language}", language or "English") | |
| system = ( | |
| system | |
| .replace("{hero_name}", state.hero_name or "the hero") | |
| .replace("{village_name}", state.village_name or "his village") | |
| .replace("{tactics_block}", tactics_block) | |
| .replace("{language}", language or "English") | |
| ) | |
| system = _wrap_with_language_force(system, language) | |
| text = _complete_text_with_retry( | |
| client, system, | |
| base_user="Write the epilogue now — three paragraphs.", | |
| # 3-paragraph epilogue: ~280 words ≈ 1400 chars EN / 280 chars zh. | |
| # Floor at 250 chars (5 sentences min) so terse one-paragraph | |
| # responses trigger the retry-with-feedback path. | |
| min_chars=250, | |
| max_tokens=900, | |
| temperature=0.9, | |
| language=language, | |
| site="generate_epilogue", | |
| ) | |
| return text | |
| # --------------------------------------------------------------------------- | |
| # Background precomputation — fills state.resolution_cache so trial reveals | |
| # are instant. The thread silently swallows per-pair errors and lets the | |
| # main render path retry synchronously. | |
| # --------------------------------------------------------------------------- | |
| def precompute_all_resolutions( | |
| state: GameState, | |
| client: LLMClient, | |
| language: str = "English", | |
| max_workers: int = 8, | |
| ) -> None: | |
| """Spawn a background daemon thread that fills ``state.resolution_cache`` | |
| with a Resolution for every (oracle, obstacle) pair. | |
| The cache key is ``(oracle.index, obstacle.index)``. The thread writes | |
| to the cache as each LLM call returns; callers can read partial results. | |
| """ | |
| import threading | |
| from concurrent.futures import ThreadPoolExecutor | |
| if client is None or getattr(client, "using_mock", True): | |
| return | |
| if not state.oracles or not state.obstacles: | |
| return | |
| if state.precompute_in_flight: | |
| return | |
| pairs = [ | |
| (oracle, obstacle) | |
| for oracle in state.oracles | |
| for obstacle in state.obstacles | |
| ] | |
| state.precompute_total = len(pairs) | |
| state.precompute_done = 0 | |
| state.precompute_in_flight = True | |
| def _one(oracle: Oracle, obstacle: Obstacle) -> None: | |
| key = (oracle.index, obstacle.index) | |
| try: | |
| res = resolve_trial( | |
| obstacle, oracle, | |
| state.hero_name, state.village_name, | |
| client, language=language, | |
| theme=getattr(state, "theme", "fantasy"), | |
| narration_length=getattr(state, "narration_length", "medium"), | |
| ) | |
| state.resolution_cache[key] = res | |
| except Exception as _e: | |
| # Track failures so the synchronous fallback can warn the | |
| # player which (oracle,obstacle) pair will retry live and | |
| # might be slow. State-level dict isn't critical to gameplay | |
| # — log to stderr for the developer's tail. | |
| import sys | |
| state.precompute_failed = getattr(state, "precompute_failed", {}) | |
| state.precompute_failed[key] = f"{type(_e).__name__}: {_e}" | |
| print(f"[resolution.precompute] pair {key} failed: " | |
| f"{type(_e).__name__} {_e}", file=sys.stderr) | |
| finally: | |
| state.precompute_done += 1 | |
| def _worker() -> None: | |
| try: | |
| with ThreadPoolExecutor(max_workers=max_workers) as ex: | |
| list(ex.map(lambda p: _one(*p), pairs)) | |
| except Exception: | |
| pass | |
| finally: | |
| state.precompute_in_flight = False | |
| threading.Thread(target=_worker, daemon=True).start() | |
| # --------------------------------------------------------------------------- | |
| # Background interlude / epilogue precompute — fired at trial-reveal time so | |
| # the next Continue / epilogue click is instant. | |
| # --------------------------------------------------------------------------- | |
| def kick_background_interlude( | |
| state: GameState, | |
| trial_index: int, | |
| client: LLMClient, | |
| language: str = "English", | |
| theme: str = "fantasy", | |
| ) -> None: | |
| """Fire-and-forget: generate the interlude bridging ``trial_index`` → | |
| ``trial_index + 1`` in a daemon thread; write to ``state.interludes``. | |
| The state.interludes slot layout matches handle_continue: | |
| slot 0 → after trial 1's resolution, before trial 2's setup | |
| slot 1 → after trial 2 → before trial 3 | |
| slot 2 → after trial 3 → before trial 4 | |
| slot 3 → after trial 4 → before trial 5 (DRAGON_INTERLUDE shape) | |
| No-op if trial_index is out of range, no previous resolution exists, | |
| or the next obstacle is missing. | |
| """ | |
| import threading | |
| from oracles.state import NUM_TRIALS as _NUM_TRIALS | |
| if trial_index < 1 or trial_index >= _NUM_TRIALS: | |
| return | |
| slot = trial_index - 1 | |
| if not (0 <= slot < len(state.interludes)): | |
| return | |
| # Skip if already populated (e.g. user clicked Continue before precompute | |
| # finished and we filled it synchronously). | |
| if (state.interludes[slot] or "").strip(): | |
| return | |
| prev_res = state.resolutions[-1] if state.resolutions else None | |
| next_ob = next( | |
| (ob for ob in state.obstacles if ob.index == trial_index + 1), | |
| None, | |
| ) | |
| if prev_res is None or next_ob is None: | |
| return | |
| trials_remaining = _NUM_TRIALS - trial_index | |
| def _worker() -> None: | |
| try: | |
| if next_ob.is_dragon: | |
| four_tactics = [ | |
| (r.tactic or "").strip() for r in state.resolutions | |
| ] | |
| text = generate_dragon_interlude( | |
| prev_res.obstacle, four_tactics, | |
| state.hero_name, state.village_name, | |
| client, language=language, theme=theme, | |
| ) | |
| else: | |
| text = generate_interlude( | |
| prev_res.obstacle, next_ob, | |
| prev_res.tactic or "", | |
| state.hero_name, state.village_name, | |
| trials_remaining=trials_remaining, | |
| client=client, language=language, theme=theme, | |
| ) | |
| # Last-write-wins. Only set if still empty so we don't clobber a | |
| # synchronous fallback that may have already filled the slot. | |
| if not (state.interludes[slot] or "").strip(): | |
| state.interludes[slot] = text | |
| except Exception: | |
| # Leave the slot empty; handle_continue will retry synchronously | |
| # and surface its own error message if that also fails. | |
| pass | |
| threading.Thread(target=_worker, daemon=True).start() | |
| def kick_background_epilogue( | |
| state: GameState, | |
| client: LLMClient, | |
| language: str = "English", | |
| theme: str = "fantasy", | |
| ) -> None: | |
| """Fire-and-forget: generate the epilogue in a daemon thread so the | |
| final Continue click is instant. Writes to ``state.epilogue``. | |
| No-op if the epilogue is already populated. | |
| """ | |
| import threading | |
| if (state.epilogue or "").strip(): | |
| return | |
| def _worker() -> None: | |
| try: | |
| text = generate_epilogue( | |
| state, client, language=language, theme=theme, | |
| ) | |
| if not (state.epilogue or "").strip(): | |
| state.epilogue = text | |
| except Exception: | |
| pass | |
| threading.Thread(target=_worker, daemon=True).start() | |
| # --------------------------------------------------------------------------- | |
| # Interludes — short journeying narrative between consecutive trials. | |
| # --------------------------------------------------------------------------- | |
| def generate_interlude( | |
| prev_obstacle: Obstacle, | |
| next_obstacle: Obstacle, | |
| prev_tactic: str, | |
| hero_name: str, | |
| village_name: str, | |
| trials_remaining: int, | |
| client: LLMClient, | |
| language: str = "English", | |
| theme: str = "fantasy", | |
| ) -> str: | |
| """Generate a 1-paragraph interlude between two trials via the live LLM. | |
| Raises RuntimeError on failure. | |
| """ | |
| if client is None or getattr(client, "using_mock", True): | |
| raise RuntimeError( | |
| "LLM client is not configured. Cannot bridge the trials." | |
| ) | |
| template = _load_interlude_prompt(dragon=False) | |
| th = get_theme(theme) | |
| system = _apply_theme(template, th) | |
| system = ( | |
| system | |
| .replace("{hero_name}", hero_name or "the hero") | |
| .replace("{village_name}", village_name or "his village") | |
| .replace("{prev_obstacle_setup}", prev_obstacle.setup or "") | |
| .replace("{prev_tactic}", prev_tactic or "") | |
| .replace("{next_obstacle_setup}", next_obstacle.setup or "") | |
| .replace("{trials_remaining}", str(trials_remaining)) | |
| .replace("{language}", language or "English") | |
| ) | |
| system = _wrap_with_language_force(system, language) | |
| return _complete_text_with_retry( | |
| client, system, | |
| base_user="Write the interlude now.", | |
| min_chars=60, | |
| max_tokens=400, | |
| temperature=0.85, | |
| language=language, | |
| site="generate_interlude", | |
| ) | |
| def generate_dragon_interlude( | |
| prev_obstacle: Obstacle, | |
| four_tactics: list[str], | |
| hero_name: str, | |
| village_name: str, | |
| client: LLMClient, | |
| language: str = "English", | |
| theme: str = "fantasy", | |
| ) -> str: | |
| """Generate the climactic lead-in to the final trial via the live LLM. | |
| Raises RuntimeError on failure. | |
| """ | |
| if client is None or getattr(client, "using_mock", True): | |
| raise RuntimeError( | |
| "LLM client is not configured. Cannot approach the finale." | |
| ) | |
| template = _load_interlude_prompt(dragon=True) | |
| th = get_theme(theme) | |
| tactics_block = "\n".join( | |
| f" - {t}" for t in four_tactics if (t or "").strip() | |
| ) or " - (none recorded)" | |
| system = _apply_theme(template, th) | |
| system = ( | |
| system | |
| .replace("{hero_name}", hero_name or "the hero") | |
| .replace("{village_name}", village_name or "his village") | |
| .replace("{prev_obstacle_setup}", prev_obstacle.setup or "") | |
| .replace("{four_tactics}", tactics_block) | |
| .replace("{language}", language or "English") | |
| ) | |
| system = _wrap_with_language_force(system, language) | |
| return _complete_text_with_retry( | |
| client, system, | |
| base_user="Write the lead-in now.", | |
| min_chars=80, | |
| max_tokens=500, | |
| temperature=0.9, | |
| language=language, | |
| site="generate_dragon_interlude", | |
| ) | |