case0 / src /case_zero /ui /blocks.py
HusseinEid's picture
Case Zero - initial public release (fully local: Qwen2.5-1.5B via llama.cpp + Supertonic, custom pixel-noir SPA via gradio.Server)
414dc55
raw
history blame
23 kB
"""The Gradio Blocks UI: a scene-based, animated pixel detective game.
Single-player: one GameController lives in gr.State per session. The interrogation
stage (animated suspect sprite in a room), the notebook, and the scenery are
deterministic CSS-animated graphics. Suspect dialogue streams into a visual-novel box;
hidden state never reaches the browser. SFX/music play client-side from data URIs.
"""
from __future__ import annotations
import random
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
import gradio as gr
from ..config import get_settings
from ..schemas.enums import MotiveCategory
from ..suspects.scrub import scrub_spoken
from .app_state import (
_TTS_DIR,
GameController,
_load_audio,
_obtain_shared_backend,
_obtain_shared_tts,
note_interaction,
start_case_buffer,
)
from .formatters import (
briefing_html,
dialogue_html,
evidence_html,
how_to_play_html,
notebook_html,
stage_html,
verdict_html,
)
from .theme import build_css
_WEAPON_DECOYS = ("Poison", "Strangulation", "A fall")
_FLAVOR_LINES = (
"Opening the precinct", "Dusting for fingerprints", "Rounding up the usual suspects",
"Brewing the detective's coffee", "Reviewing the case files", "Polishing the interrogation lamp",
"Chasing a lead down the alley", "Cataloguing the evidence", "Tuning the suspects' alibis",
)
def _audio_setup_js(sfx: dict[str, str], music: str) -> str:
"""Load-event JS: create the audio elements, define the audio/animation helpers,
cycle the loading-screen flavor text, and try to start the music. Gradio strips
<script> from gr.HTML, so this load-event JS is how audio/animation actually work."""
makers = [f"mk('cz-bgm',{music!r},true);"]
for event, uri in sfx.items():
makers.append(f"mk('cz-sfx-{event}',{uri!r},false);")
flavor = list(_FLAVOR_LINES)
return (
"() => {"
"if(window.czAudioReady)return; window.czAudioReady=true;"
"const mk=(id,src,loop)=>{const a=document.createElement('audio');a.id=id;a.src=src;"
"a.preload='auto';if(loop)a.loop=true;document.body.appendChild(a);};"
+ "".join(makers)
+ "window.czSfx=function(n){var a=document.getElementById('cz-sfx-'+n);"
"if(a){try{a.currentTime=0;a.volume=(n==='page')?0.22:0.45;a.play();}catch(e){}}};"
"window.czMusicOn=function(){var m=document.getElementById('cz-bgm');"
"if(m){m.volume=0.3;m.play().catch(function(){});}};"
"window.czMusicOff=function(){var m=document.getElementById('cz-bgm');if(m)m.pause();};"
"window.czTalk=function(){var s=document.getElementById('cz-sprite');"
"if(s){s.classList.add('talking');setTimeout(function(){s.classList.remove('talking');},3000);}};"
f"var fl={flavor!r}; var i=0;"
"var tick=function(){var e=document.getElementById('cz-flavor');"
"if(e){e.textContent=fl[i%fl.length]+'...'; i++;}};tick();setInterval(tick,2800);"
"try{czMusicOn();var b=document.getElementById('czmusicbtn');"
"if(b)b.classList.add('cz-on');}catch(e){}"
"}"
)
def _next_sentence(text: str, start: int) -> tuple[str, int] | None:
"""Return (sentence, new_cursor) for the next COMPLETE sentence after ``start`` - a
terminator (.!?) followed by whitespace - or None while it is still being streamed.
Lets us synth a suspect's reply sentence-by-sentence as the tokens arrive."""
n = len(text)
i = start
while i < n and text[i].isspace():
i += 1
j = i
while j < n:
if text[j] in ".!?":
k = j + 1
while k < n and text[k] in ".!?":
k += 1
if k < n and text[k].isspace():
return text[i:k].strip(), k
j = k
else:
j += 1
return None
def _stage(controller: GameController, sus_id: str) -> str:
# The suspect is always questioned in a neutral interrogation room - the stage never
# implies where they "were", so searching a room can no longer contradict their alibi.
return stage_html(controller.interrogation_uri(), controller.portrait_sheet_uri(sus_id),
controller.case.suspect(sus_id).name, "Interrogation Room")
def _evidence_items(controller: GameController) -> list[tuple[str, str, str]]:
return [(c.name, c.reveal_text, controller.prop_uri(c)) for c in controller.session.evidence()]
def _full_state(controller: GameController):
case = controller.case
first = case.suspects[0]
motives = [m.value for m in MotiveCategory]
return (
controller,
briefing_html(controller.session.player_view),
gr.update(value=controller.roster()),
_stage(controller, first.sus_id),
dialogue_html(first.name, "I suppose you have questions. Get on with it."),
gr.update(choices=controller.location_choices(), value=None),
gr.update(choices=[], value=None),
evidence_html([]),
notebook_html(controller.session.state, case),
gr.update(choices=[s.name for s in case.suspects], value=None),
gr.update(choices=[case.weapon.name,
*(d for d in _WEAPON_DECOYS if d != case.weapon.name)], value=None),
gr.update(choices=motives, value=None),
gr.update(choices=[], value=[]),
gr.update(value="", visible=False), # verdict: hidden until an accusation is made
)
def _new_case(controller: GameController | None):
controller = controller or GameController()
# Fast path: a freshly-generated case is already waiting in the background buffer -
# reveal it instantly, no overlay. (The worker immediately starts the next one.)
if controller.start_buffered(wait_secs=0.0):
yield (gr.update(visible=False), *_full_state(controller))
return
# Otherwise generate live behind the overlay (buffer not ready yet).
yield (gr.update(value=_overlay_html("generate"), visible=True), controller, *([gr.update()] * 13))
controller.start("generate", seed=random.randint(1, 999_999))
yield (gr.update(visible=False), *_full_state(controller))
def _select_suspect(controller: GameController | None, evt: gr.SelectData):
if controller is None or controller.session is None:
return gr.update(), gr.update()
controller.select_by_index(evt.index)
sus_id = controller.current_sus
transcript = controller.session.state.state_for(sus_id).transcript
last = transcript[-1].answer if transcript else "Well? Ask your questions."
return _stage(controller, sus_id), dialogue_html(controller.current_name(), last)
def _ask(controller: GameController | None, question: str, evidence_name: str | None):
if controller is None or controller.session is None:
yield dialogue_html("", ""), gr.update(), gr.update(), gr.update(), gr.update()
return
name = controller.current_name()
if not question.strip():
yield (dialogue_html(name, "Ask me something, Detective."),
gr.update(), gr.update(), gr.update(), gr.update())
return
sus_id = controller.current_sus
clue_id = controller.clue_id_for_name(evidence_name)
breaking = controller.relevance_breaking(clue_id)
note_interaction() # tell the background case generator to yield CPU to this reply
# Voice is synthesized sentence-by-sentence in a background worker WHILE the LLM keeps
# streaming (both release the GIL), so the first words are spoken almost immediately
# instead of after the whole reply. Chunks play in order via a JS queue.
voice_on = bool(getattr(controller.tts, "available", False)) and bool(sus_id)
pool = ThreadPoolExecutor(max_workers=1) if voice_on else None
futures: list = []
fptr = 0
raw = "" # full raw stream (kept only for the cursor; never shown directly)
shown = "" # scrubbed, sentence-complete text the player actually sees + hears
cursor = 0
final = None
try:
for event in controller.session.interrogate(sus_id, question, clue_id):
if event.spoken_delta:
raw += event.spoken_delta
note_interaction() # keep the generator backed off for the whole reply
# Reveal (and voice) only COMPLETE, scrubbed sentences. A confession can
# never flash on screen or be spoken, and the text appears in step with
# the voice rather than racing ahead of it.
while (nxt := _next_sentence(raw, cursor)) is not None:
sentence, cursor = nxt
clean = scrub_spoken(sentence, breaking=breaking)
if not clean:
continue
shown = f"{shown} {clean}".strip()
if pool is not None:
futures.append(pool.submit(controller.speak, clean))
uri = gr.update() # emit at most one ready audio chunk per streamed tick
if fptr < len(futures) and futures[fptr].done():
got = futures[fptr].result()
fptr += 1
if got:
uri = got
yield (dialogue_html(name, shown, streaming=True),
gr.update(), gr.update(), uri, gr.update())
if event.final is not None:
final = event.final
# Flush the trailing partial sentence (the last line rarely ends with whitespace).
tail = scrub_spoken(raw[cursor:].strip(), breaking=breaking)
if tail:
shown = f"{shown} {tail}".strip()
if pool is not None:
futures.append(pool.submit(controller.speak, tail))
# The engine's final spoken line is the authoritative (already-scrubbed) text.
line = final.turn.spoken if final else (shown or "...")
note = notebook_html(controller.session.state, controller.case)
# Clear the evidence selection so it is presented ONLY on the turn it was chosen
# (otherwise it would silently re-present every subsequent question).
clear_ev = gr.update(value=None)
# Drain any remaining audio chunks in order, one yield each so every chunk plays.
while fptr < len(futures):
got = futures[fptr].result()
fptr += 1
yield (dialogue_html(name, line), clear_ev, note, (got or gr.update()), gr.update())
yield (dialogue_html(name, line), clear_ev, note, gr.update(), gr.update())
finally:
if pool is not None:
pool.shutdown(wait=False)
def _search(controller: GameController | None, loc_name: str | None):
# Searching reveals a room's evidence; it does NOT move the suspect (the stage stays in
# the interrogation room), so it can never imply a false whereabouts for them.
if controller is None or controller.session is None or not loc_name:
return gr.update(), gr.update(), gr.update(), gr.update()
controller.search(loc_name)
items = _evidence_items(controller)
return (
evidence_html(items),
gr.update(choices=controller.evidence_choices()),
notebook_html(controller.session.state, controller.case),
gr.update(choices=controller.evidence_choices()),
)
def _add_note(controller: GameController | None, text: str):
if controller is None or controller.session is None or not text.strip():
return gr.update(), ""
controller.add_note(text)
return notebook_html(controller.session.state, controller.case), ""
def _accuse(controller: GameController | None, accused: str | None, weapon: str | None,
motive: str | None, cited: list[str] | None):
if controller is None or controller.session is None or not accused:
return gr.update(value="<div class='cz-verdict'>Name who you are accusing first.</div>",
visible=True)
weapon_ok = weapon == controller.case.weapon.name
motive_ok = motive == controller.case.culprit.true_motive.category.value
verdict = controller.accuse(accused, weapon_ok, motive_ok, cited or [])
# The verdict panel is hidden until now (no empty box before the player accuses).
return gr.update(value=verdict_html(controller.case, verdict), visible=True)
# Music toggle is driven entirely by the actual <audio> element state (no race with a
# Python state flip). A 'cz-on' class lights the button when the track is playing.
_MUSIC_BTN_FIND = "var b=document.getElementById('czmusicbtn');"
_MUSIC_TOGGLE_JS = (
"() => { var m=document.getElementById('cz-bgm'); if(!m)return;"
"if(m.paused){m.volume=0.3;m.play().catch(function(){});}else{m.pause();}"
+ _MUSIC_BTN_FIND + "if(b)b.classList.toggle('cz-on', !m.paused); }"
)
_MUSIC_ON_JS = (
"() => { if(window.czMusicOn) czMusicOn();" + _MUSIC_BTN_FIND + "if(b)b.classList.add('cz-on'); }"
)
def _overlay_html(mode: str) -> str:
"""Full-screen loading overlay, shared by model warmup and case generation."""
if mode == "generate":
head = "A NEW CASE LANDS ON YOUR DESK"
sub = ("Give it a minute or two, detective - the case file is being assembled: the "
"victim, the suspects, their alibis, and the evidence.")
extra = ""
else:
head = "" # no fixed header on warmup; "Opening the precinct" cycles as a flavor line
sub = ("Hang tight for a minute or two, detective - first boot warms up the case room, "
"the suspects, and the interrogation lamp.")
extra = how_to_play_html()
head_html = f"<div class='cz-overlay-head'>{head}</div>" if head else ""
return (
"<div class='cz-overlay'><div class='cz-overlay-inner'>"
"<div class='cz-bigtitle'>CASE&nbsp;ZERO</div>"
f"{head_html}"
"<div id='cz-flavor' class='cz-flavor'>Opening the precinct...</div>"
"<div class='cz-loadbar'><span></span></div>"
f"<div class='cz-muted' style='text-align:center;margin-bottom:12px'>{sub}</div>"
f"{extra}</div></div>"
)
def _warmup(controller: GameController | None):
"""Behind the full-screen overlay: warm the models (cold-start) and serve the first
case from the background buffer (it has been generating since startup). Every case is
live-generated and unique; the buffer just lets us start without a second wait."""
settings = get_settings()
start_case_buffer(settings) # ensure background generation is running
tts = _obtain_shared_tts(settings)
# Warm the interrogation backend + TTS in the background so the briefing reveals as
# soon as the first case is ready (and the first question/voice are responsive).
def _warm() -> None:
try:
from ..llm.backend import GenParams
_obtain_shared_backend(settings).generate("ok", GenParams(max_tokens=1, temperature=0.0))
except Exception:
pass
try:
from ..schemas.suspect import VoiceAssignment
warm = tts.synth_to_file("Ready.", VoiceAssignment(engine="supertonic", speaker_id=0),
_TTS_DIR / "warm.wav")
if warm:
Path(warm).unlink(missing_ok=True)
except Exception:
pass
import threading
threading.Thread(target=_warm, daemon=True).start()
controller = controller or GameController()
# Wait for the first background-generated case; fall back to live generation if needed.
if not controller.start_buffered(wait_secs=600.0):
controller.start("generate", seed=random.randint(1, 999_999))
yield (gr.update(visible=False), gr.update(visible=True), *_full_state(controller))
# Sentence-chunked playback: each synthesized chunk is queued and played in order, and
# the suspect's mouth moves while the queue is draining (stops when it empties).
_TTS_PLAY_JS = (
"(u)=>{ if(!u) return; try{"
"window.czQ = window.czQ || [];"
"var s=document.getElementById('cz-sprite');"
"window.czQ.push(u);"
"if(window.czPlaying) return;"
"var play=function(){"
" if(!window.czQ.length){ window.czPlaying=false; if(s)s.classList.remove('talking'); return; }"
" window.czPlaying=true; if(s)s.classList.add('talking');"
" var a=new Audio(window.czQ.shift()); a.volume=0.95; window.czCur=a;"
" var nx=function(){ play(); };"
" a.addEventListener('ended',nx); a.addEventListener('error',nx);"
" a.play().catch(nx);"
"}; play();"
"}catch(e){} }"
)
def build_app() -> gr.Blocks:
sfx, music = _load_audio()
with gr.Blocks(css=build_css(), title="Case Zero", theme=gr.themes.Base(),
analytics_enabled=False) as demo:
state = gr.State(None)
demo.load(None, None, None, js=_audio_setup_js(sfx, music))
# Music toggle floats above everything (reachable on the loading overlay too).
# A compact note icon - small and elegant on phones (lights amber when playing).
music_btn = gr.Button("♪", elem_id="czmusicbtn", elem_classes=["cz-music"])
# Full-screen overlay (warmup + live generation). Its content is position:fixed, so
# its Gradio wrapper is collapsed (cz-overlay-host) to avoid an empty box in flow.
overlay = gr.HTML(value=_overlay_html("warmup"), visible=True,
elem_classes=["cz-overlay-host"])
# Title + credit live INSIDE the game column so the loading screen shows only the
# overlay (no stray header/footer boxes during warmup).
with gr.Column(visible=False) as game:
gr.HTML("<div id='cz-title'>CASE&nbsp;ZERO</div>"
"<div id='cz-subtitle'>interrogate &middot; investigate &middot; accuse</div>")
with gr.Row():
new_btn = gr.Button("New Case")
briefing = gr.HTML(value=how_to_play_html())
with gr.Row(equal_height=False):
with gr.Column(scale=3):
gr.HTML("<h3>Suspects</h3>")
roster = gr.Gallery(columns=2, height=330, show_label=False, allow_preview=False,
object_fit="contain", elem_classes="cz-panel")
gr.HTML("<h3>Investigate</h3>")
loc_dd = gr.Dropdown(label="Search a room", choices=[])
search_btn = gr.Button("Search Room")
with gr.Column(scale=6):
stage = gr.HTML()
dialogue = gr.HTML()
evidence_dd = gr.Dropdown(label="Present evidence (search rooms first)",
choices=[])
question = gr.Textbox(label="Ask the suspect",
placeholder="Where were you that night?", lines=1)
ask_btn = gr.Button("Ask")
tts_audio = gr.Textbox(visible=False, elem_id="cz-tts")
with gr.Column(scale=3):
gr.HTML("<h3>Evidence</h3>")
evidence_box = gr.HTML()
gr.HTML("<h3>Notebook</h3>")
notebook = gr.HTML()
note_in = gr.Textbox(placeholder="Jot a note to yourself...", lines=1,
show_label=False, elem_classes="cz-note-in")
note_btn = gr.Button("Add Note")
gr.HTML("<h3>Make Your Accusation</h3>")
with gr.Row():
accused_dd = gr.Dropdown(label="The killer is", choices=[])
weapon_dd = gr.Dropdown(label="With the", choices=[])
motive_dd = gr.Dropdown(label="Because of", choices=[])
cited_cg = gr.CheckboxGroup(label="Citing this evidence", choices=[])
accuse_btn = gr.Button("Accuse", elem_classes="cz-accuse")
verdict = gr.HTML(visible=False) # appears only after an accusation is made
gr.HTML(
"<div class='cz-credit'>Case Zero &middot; Story &amp; suspects: Qwen2.5-1.5B "
"(local, via llama.cpp) &middot; Voices: Supertonic (local on-device TTS) "
"&middot; Music: \"Backbay Lounge\" by Kevin MacLeod (incompetech.com), licensed "
"under Creative Commons Attribution 4.0 (CC BY 4.0 - free to use with credit)</div>"
)
# Warm the models AND serve the first case from the buffer behind the overlay.
warm_outputs = [overlay, game, state, briefing, roster, stage, dialogue, loc_dd,
evidence_dd, evidence_box, notebook, accused_dd, weapon_dd, motive_dd,
cited_cg, verdict]
demo.load(_warmup, [state], warm_outputs)
new_outputs = [overlay, state, briefing, roster, stage, dialogue, loc_dd, evidence_dd,
evidence_box, notebook, accused_dd, weapon_dd, motive_dd, cited_cg, verdict]
new_btn.click(_new_case, [state], new_outputs)
new_btn.click(None, None, None, js=_MUSIC_ON_JS)
music_btn.click(None, None, None, js=_MUSIC_TOGGLE_JS)
roster.select(_select_suspect, [state], [stage, dialogue])
roster.select(None, None, None, js="() => { if(window.czSfx) czSfx('select'); }")
ask_outputs = [dialogue, evidence_dd, notebook, tts_audio, evidence_box]
# Stop any leftover voice and clear the queue before a new reply starts streaming.
ask_js = ("() => { try{ if(window.czCur){window.czCur.pause();} window.czQ=[];"
" window.czPlaying=false; var s=document.getElementById('cz-sprite');"
" if(s)s.classList.remove('talking'); }catch(e){}"
" if(window.czSfx) czSfx('select'); }")
ask_btn.click(_ask, [state, question, evidence_dd], ask_outputs)
ask_btn.click(None, None, None, js=ask_js)
ask_btn.click(lambda: "", None, question) # clear the question box after asking
question.submit(_ask, [state, question, evidence_dd], ask_outputs)
question.submit(None, None, None, js=ask_js)
question.submit(lambda: "", None, question)
# Speak the line and sync the mouth to the actual audio playback.
tts_audio.change(None, [tts_audio], None, js=_TTS_PLAY_JS)
search_outputs = [evidence_box, evidence_dd, notebook, cited_cg]
search_btn.click(_search, [state, loc_dd], search_outputs)
search_btn.click(None, None, None, js="() => { if(window.czSfx) czSfx('page'); }")
# Let the player add their own notebook entries.
note_btn.click(_add_note, [state, note_in], [notebook, note_in])
note_in.submit(_add_note, [state, note_in], [notebook, note_in])
accuse_btn.click(_accuse, [state, accused_dd, weapon_dd, motive_dd, cited_cg], [verdict])
accuse_btn.click(None, None, None, js="() => { if(window.czSfx) czSfx('accuse'); }")
return demo