Spaces:
Running
Running
File size: 13,151 Bytes
414dc55 80cd1f2 414dc55 80cd1f2 414dc55 80cd1f2 414dc55 80cd1f2 414dc55 80cd1f2 414dc55 80cd1f2 414dc55 80cd1f2 414dc55 80cd1f2 414dc55 80cd1f2 414dc55 80cd1f2 414dc55 80cd1f2 414dc55 80cd1f2 414dc55 80cd1f2 414dc55 80cd1f2 414dc55 80cd1f2 414dc55 80cd1f2 414dc55 80cd1f2 414dc55 80cd1f2 414dc55 80cd1f2 414dc55 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 | """Live game runtime: lazily builds the in-process llama.cpp backend, generates cases,
and holds live ``Session`` objects per run.
Single-flight is MANDATORY: ``llama_cpp.Llama`` is not thread-safe, so every model call
(generation + interrogation) runs under one lock - never concurrently, on any machine.
Background case generation runs on EVERY box, but it can never make a player wait:
each generation call holds the lock for just that one call (never the whole pipeline),
and on a small box it streams with an interrupt check between tokens - the moment a
player asks a question the in-flight generation aborts within a token, the lock frees,
and the turn runs. Generation resumes once the table has been idle for a while. Fresh
AI cases land in the buffer (served on the very next New Case) AND join the shuffled
rotation, so the pool of mysteries grows for as long as the Space stays up.
"""
from __future__ import annotations
import random
import threading
import time
import uuid
from collections.abc import Iterator
from dataclasses import dataclass
from ..config import effective_cpus, get_settings
from ..engine.session import Session
from ..generator.pipeline import generate_case
from ..llm.backend import GenParams, LLMBackend, LLMError, make_backend
from ..persistence.case_store import load_case, save_runtime_case
from ..persistence.paths import prebaked_cases_dir, runtime_cases_dir
from ..schemas.accusation import Accusation
from ..schemas.case import CaseFile
from ..schemas.enums import Relevance
from .case_adapter import casefile_to_public
from .public_view import PublicCase
# How long the table must be quiet (no interrogation turn) before a small box starts -
# or resumes - a background generation.
_IDLE_SECS = 90.0
class _SharedLockBackend:
"""Per-call single-flight wrapper. Each model call holds the runtime lock for JUST
that call, so a player's turn waits behind at most one in-flight call - never a
whole multi-call generation. With an ``interrupt`` event the call streams internally
and aborts between tokens the moment a player shows up, freeing the lock at once."""
def __init__(self, inner: LLMBackend, lock: threading.Lock,
interrupt: threading.Event | None = None) -> None:
self._inner = inner
self._lock = lock
self._interrupt = interrupt
def generate(self, prompt: str, params: GenParams) -> str:
with self._lock:
if self._interrupt is None:
return self._inner.generate(prompt, params)
if self._interrupt.is_set():
raise LLMError("generation interrupted by player")
parts: list[str] = []
for delta in self._inner.stream(prompt, params):
parts.append(delta)
if self._interrupt.is_set():
raise LLMError("generation interrupted by player")
return "".join(parts)
def stream(self, prompt: str, params: GenParams) -> Iterator[str]:
with self._lock:
yield from self._inner.stream(prompt, params)
@dataclass
class LiveRun:
run_id: str
case: CaseFile
session: Session
public: PublicCase
baselines: dict[str, int]
class GameRuntime:
def __init__(self) -> None:
self._lock = threading.Lock() # MANDATORY single-flight over all model calls
self._backend: LLMBackend | None = None
self._backend_failed = False
self._runs: dict[str, LiveRun] = {}
self._buffer: CaseFile | None = None
self._buffer_lock = threading.Lock()
self._seed = int(time.time()) % 900_000 + 1000
self._rng = random.Random(self._seed)
# Pre-baked pool: full, model-authored cases shipped with the Space, served instantly
# on New Case so nobody waits ~2 min for live generation. Interrogation is still live.
self._prebaked: list[CaseFile] = []
self._prebaked_idx = 0
self._prebaked_loaded = False
# Background generation: a fast box generates immediately; a small box (the 2-vCPU
# Space) waits for an idle table and aborts between tokens when a player shows up.
self._fast_box = effective_cpus() > 4
self._gen_interrupt = threading.Event()
self._gen_running = False
self._gen_flag_lock = threading.Lock()
self._last_player_ts = 0.0
# ---- backend ----
def _get_backend(self) -> LLMBackend | None:
if self._backend is None and not self._backend_failed:
try:
self._backend = make_backend(get_settings())
except LLMError:
self._backend_failed = True
return self._backend
def available(self) -> bool:
return self._get_backend() is not None
def _next_seed(self) -> int:
self._seed += 1
return self._seed
# ---- generation ----
def _generate(self, seed: int, *, interruptible: bool = False) -> CaseFile:
backend = self._get_backend()
if backend is None:
raise LLMError("no backend")
wrapped = _SharedLockBackend(backend, self._lock,
self._gen_interrupt if interruptible else None)
result = generate_case(wrapped, seed=seed)
save_runtime_case(result.case)
return result.case
def _prebuild(self) -> None:
"""Generate ONE fresh AI case in the background, yielding to players. On a small
box: wait until the table is idle, abort between tokens if a player interrupts,
then wait for idle again and retry. The finished case is served on the very next
New Case and joins the rotation for good."""
try:
for _ in range(8):
try:
if not self._fast_box:
while time.time() - self._last_player_ts < _IDLE_SECS:
time.sleep(5)
case = self._generate(self._next_seed(), interruptible=not self._fast_box)
except LLMError:
continue # interrupted by a player, or malformed output - try again
except Exception:
break
with self._buffer_lock:
self._buffer = case
self._prebaked.append(case)
break
finally:
with self._gen_flag_lock:
self._gen_running = False
def _spawn_gen(self) -> None:
if not self.available():
return
with self._gen_flag_lock:
if self._gen_running:
return
self._gen_running = True
threading.Thread(target=self._prebuild, daemon=True).start()
def _load_prebaked(self) -> None:
if self._prebaked_loaded:
return
self._prebaked_loaded = True
pool_dir = prebaked_cases_dir()
if not pool_dir.is_dir():
return
for path in sorted(pool_dir.glob("*.json")):
try:
self._prebaked.append(load_case(path))
except Exception:
continue
# Shuffle per process (the seed is time-based) so the FIRST case of every fresh
# visit/restart is randomized - never the same opening mystery twice in a row.
self._rng.shuffle(self._prebaked)
def start_buffer(self) -> None:
"""Make the first New Case instant (shipped pool, shuffled) and start growing the
pool with a fresh AI-generated case in the background."""
self._load_prebaked()
self._spawn_gen()
def _take_buffered(self) -> CaseFile | None:
with self._buffer_lock:
case = self._buffer
self._buffer = None
return case
def _take_prebaked(self) -> CaseFile | None:
self._load_prebaked()
if not self._prebaked:
return None
if self._prebaked_idx >= len(self._prebaked):
# Bag exhausted: reshuffle for a fresh order on the next lap.
self._rng.shuffle(self._prebaked)
self._prebaked_idx = 0
case = self._prebaked[self._prebaked_idx]
self._prebaked_idx += 1
return case
def _maybe_refill(self) -> None:
"""Keep one fresh AI case cooking in the background whenever the buffer is empty."""
if self._buffer is None:
self._spawn_gen()
def new_generated_run(self) -> tuple[PublicCase, str] | None:
if not self.available():
return None
# Prefer a freshly generated case if one is ready; else serve the pre-baked pool
# instantly; only with neither do we generate synchronously (first run, no pool).
case = self._take_buffered() or self._take_prebaked()
if case is None:
try:
case = self._generate(self._next_seed())
except Exception:
return None
self._maybe_refill()
return self._register(case)
def load_generated_run(self, case_id: str) -> tuple[PublicCase, str] | None:
if not self.available():
return None
self._load_prebaked()
case = next((c for c in self._prebaked if c.case_id == case_id), None)
if case is None:
for directory in (prebaked_cases_dir(), runtime_cases_dir()):
path = directory / f"{case_id}.json"
if path.exists():
try:
case = load_case(path)
except Exception:
case = None
break
if case is None:
return None
return self._register(case)
def _register(self, case: CaseFile) -> tuple[PublicCase, str]:
public = casefile_to_public(case)
session = Session(case, self._get_backend()) # type: ignore[arg-type]
run_id = uuid.uuid4().hex
baselines = {s.id: s.baseline_suspicion for s in public.suspects}
self._runs[run_id] = LiveRun(run_id, case, session, public, baselines)
return public, run_id
def get(self, run_id: str) -> LiveRun | None:
return self._runs.get(run_id)
# ---- live turn / verdict ----
def _suspicion(self, run: LiveRun, sus_id: str) -> int:
st = run.session.state.state_for(sus_id)
base = run.baselines.get(sus_id, 25)
val = base + round(st.stress * 55) + (20 if st.broken_lie_ids else 0)
return max(0, min(100, val))
def interrogate_live(
self, run: LiveRun, sus_id: str, question: str, clue_id: str | None
) -> dict:
prev = self._suspicion(run, sus_id)
# Tell any in-flight background generation to yield the lock NOW (it aborts
# between tokens), then take the table.
self._gen_interrupt.set()
self._last_player_ts = time.time()
with self._lock:
self._gen_interrupt.clear()
final = None
for ev in run.session.interrogate(sus_id, question, presented_clue_id=clue_id):
if ev.final is not None:
final = ev.final
self._last_player_ts = time.time()
reply = final.turn.spoken if final else "…I have nothing to say to that."
after = self._suspicion(run, sus_id)
adj = final.adjudication if final else None
rattled = bool(adj and adj.relevance in (Relevance.DIRECT, Relevance.BREAKING))
cornered = bool(adj and adj.is_contradiction)
return {
"reply": reply,
"suspicionDelta": after - prev,
"suspicion": after,
"flags": {"rattled": rattled, "contradictionExposed": cornered, "cornered": cornered},
}
def accuse_live(self, run: LiveRun, suspect_id: str, motive_id: str, evidence_ids: list[str]) -> dict:
verdict = run.session.accuse(
Accusation(accused_sus_id=suspect_id, motive_id=motive_id, cited_clue_ids=tuple(evidence_ids))
)
culprit_id = run.case.culprit.sus_id
killer = run.case.suspect(culprit_id)
if verdict.culprit_correct:
truth = verdict.rationale or run.case.culprit.method_narrative
else:
accused = run.case.suspect(suspect_id).name if any(s.sus_id == suspect_id for s in run.case.suspects) else "the accused"
truth = (
f"You charged {accused}. The case held for a night - but the evidence led past "
f"them to {killer.name}, who walked out into the rain."
)
return {
"correct": verdict.culprit_correct,
"verdict": {
"stamp": "CASE CLOSED" if verdict.culprit_correct else "MISTRIAL",
"killerId": culprit_id,
"killerName": killer.name,
"truth": truth,
},
"score": {
"points": verdict.score,
"max": 100,
"killerCorrect": verdict.culprit_correct,
"motiveCorrect": verdict.motive_correct,
"evidenceHits": len(evidence_ids),
},
"stats": [],
}
RUNTIME = GameRuntime()
|