Spaces:
Running
Running
Case Zero - initial public release (fully local: Qwen2.5-1.5B via llama.cpp + Supertonic, custom pixel-noir SPA via gradio.Server)
414dc55 | """The Gradio Blocks UI: a scene-based, animated pixel detective game. | |
| Single-player: one GameController lives in gr.State per session. The interrogation | |
| stage (animated suspect sprite in a room), the notebook, and the scenery are | |
| deterministic CSS-animated graphics. Suspect dialogue streams into a visual-novel box; | |
| hidden state never reaches the browser. SFX/music play client-side from data URIs. | |
| """ | |
| from __future__ import annotations | |
| import random | |
| from concurrent.futures import ThreadPoolExecutor | |
| from pathlib import Path | |
| import gradio as gr | |
| from ..config import get_settings | |
| from ..schemas.enums import MotiveCategory | |
| from ..suspects.scrub import scrub_spoken | |
| from .app_state import ( | |
| _TTS_DIR, | |
| GameController, | |
| _load_audio, | |
| _obtain_shared_backend, | |
| _obtain_shared_tts, | |
| note_interaction, | |
| start_case_buffer, | |
| ) | |
| from .formatters import ( | |
| briefing_html, | |
| dialogue_html, | |
| evidence_html, | |
| how_to_play_html, | |
| notebook_html, | |
| stage_html, | |
| verdict_html, | |
| ) | |
| from .theme import build_css | |
| _WEAPON_DECOYS = ("Poison", "Strangulation", "A fall") | |
| _FLAVOR_LINES = ( | |
| "Opening the precinct", "Dusting for fingerprints", "Rounding up the usual suspects", | |
| "Brewing the detective's coffee", "Reviewing the case files", "Polishing the interrogation lamp", | |
| "Chasing a lead down the alley", "Cataloguing the evidence", "Tuning the suspects' alibis", | |
| ) | |
| def _audio_setup_js(sfx: dict[str, str], music: str) -> str: | |
| """Load-event JS: create the audio elements, define the audio/animation helpers, | |
| cycle the loading-screen flavor text, and try to start the music. Gradio strips | |
| <script> from gr.HTML, so this load-event JS is how audio/animation actually work.""" | |
| makers = [f"mk('cz-bgm',{music!r},true);"] | |
| for event, uri in sfx.items(): | |
| makers.append(f"mk('cz-sfx-{event}',{uri!r},false);") | |
| flavor = list(_FLAVOR_LINES) | |
| return ( | |
| "() => {" | |
| "if(window.czAudioReady)return; window.czAudioReady=true;" | |
| "const mk=(id,src,loop)=>{const a=document.createElement('audio');a.id=id;a.src=src;" | |
| "a.preload='auto';if(loop)a.loop=true;document.body.appendChild(a);};" | |
| + "".join(makers) | |
| + "window.czSfx=function(n){var a=document.getElementById('cz-sfx-'+n);" | |
| "if(a){try{a.currentTime=0;a.volume=(n==='page')?0.22:0.45;a.play();}catch(e){}}};" | |
| "window.czMusicOn=function(){var m=document.getElementById('cz-bgm');" | |
| "if(m){m.volume=0.3;m.play().catch(function(){});}};" | |
| "window.czMusicOff=function(){var m=document.getElementById('cz-bgm');if(m)m.pause();};" | |
| "window.czTalk=function(){var s=document.getElementById('cz-sprite');" | |
| "if(s){s.classList.add('talking');setTimeout(function(){s.classList.remove('talking');},3000);}};" | |
| f"var fl={flavor!r}; var i=0;" | |
| "var tick=function(){var e=document.getElementById('cz-flavor');" | |
| "if(e){e.textContent=fl[i%fl.length]+'...'; i++;}};tick();setInterval(tick,2800);" | |
| "try{czMusicOn();var b=document.getElementById('czmusicbtn');" | |
| "if(b)b.classList.add('cz-on');}catch(e){}" | |
| "}" | |
| ) | |
| def _next_sentence(text: str, start: int) -> tuple[str, int] | None: | |
| """Return (sentence, new_cursor) for the next COMPLETE sentence after ``start`` - a | |
| terminator (.!?) followed by whitespace - or None while it is still being streamed. | |
| Lets us synth a suspect's reply sentence-by-sentence as the tokens arrive.""" | |
| n = len(text) | |
| i = start | |
| while i < n and text[i].isspace(): | |
| i += 1 | |
| j = i | |
| while j < n: | |
| if text[j] in ".!?": | |
| k = j + 1 | |
| while k < n and text[k] in ".!?": | |
| k += 1 | |
| if k < n and text[k].isspace(): | |
| return text[i:k].strip(), k | |
| j = k | |
| else: | |
| j += 1 | |
| return None | |
| def _stage(controller: GameController, sus_id: str) -> str: | |
| # The suspect is always questioned in a neutral interrogation room - the stage never | |
| # implies where they "were", so searching a room can no longer contradict their alibi. | |
| return stage_html(controller.interrogation_uri(), controller.portrait_sheet_uri(sus_id), | |
| controller.case.suspect(sus_id).name, "Interrogation Room") | |
| def _evidence_items(controller: GameController) -> list[tuple[str, str, str]]: | |
| return [(c.name, c.reveal_text, controller.prop_uri(c)) for c in controller.session.evidence()] | |
| def _full_state(controller: GameController): | |
| case = controller.case | |
| first = case.suspects[0] | |
| motives = [m.value for m in MotiveCategory] | |
| return ( | |
| controller, | |
| briefing_html(controller.session.player_view), | |
| gr.update(value=controller.roster()), | |
| _stage(controller, first.sus_id), | |
| dialogue_html(first.name, "I suppose you have questions. Get on with it."), | |
| gr.update(choices=controller.location_choices(), value=None), | |
| gr.update(choices=[], value=None), | |
| evidence_html([]), | |
| notebook_html(controller.session.state, case), | |
| gr.update(choices=[s.name for s in case.suspects], value=None), | |
| gr.update(choices=[case.weapon.name, | |
| *(d for d in _WEAPON_DECOYS if d != case.weapon.name)], value=None), | |
| gr.update(choices=motives, value=None), | |
| gr.update(choices=[], value=[]), | |
| gr.update(value="", visible=False), # verdict: hidden until an accusation is made | |
| ) | |
| def _new_case(controller: GameController | None): | |
| controller = controller or GameController() | |
| # Fast path: a freshly-generated case is already waiting in the background buffer - | |
| # reveal it instantly, no overlay. (The worker immediately starts the next one.) | |
| if controller.start_buffered(wait_secs=0.0): | |
| yield (gr.update(visible=False), *_full_state(controller)) | |
| return | |
| # Otherwise generate live behind the overlay (buffer not ready yet). | |
| yield (gr.update(value=_overlay_html("generate"), visible=True), controller, *([gr.update()] * 13)) | |
| controller.start("generate", seed=random.randint(1, 999_999)) | |
| yield (gr.update(visible=False), *_full_state(controller)) | |
| def _select_suspect(controller: GameController | None, evt: gr.SelectData): | |
| if controller is None or controller.session is None: | |
| return gr.update(), gr.update() | |
| controller.select_by_index(evt.index) | |
| sus_id = controller.current_sus | |
| transcript = controller.session.state.state_for(sus_id).transcript | |
| last = transcript[-1].answer if transcript else "Well? Ask your questions." | |
| return _stage(controller, sus_id), dialogue_html(controller.current_name(), last) | |
| def _ask(controller: GameController | None, question: str, evidence_name: str | None): | |
| if controller is None or controller.session is None: | |
| yield dialogue_html("", ""), gr.update(), gr.update(), gr.update(), gr.update() | |
| return | |
| name = controller.current_name() | |
| if not question.strip(): | |
| yield (dialogue_html(name, "Ask me something, Detective."), | |
| gr.update(), gr.update(), gr.update(), gr.update()) | |
| return | |
| sus_id = controller.current_sus | |
| clue_id = controller.clue_id_for_name(evidence_name) | |
| breaking = controller.relevance_breaking(clue_id) | |
| note_interaction() # tell the background case generator to yield CPU to this reply | |
| # Voice is synthesized sentence-by-sentence in a background worker WHILE the LLM keeps | |
| # streaming (both release the GIL), so the first words are spoken almost immediately | |
| # instead of after the whole reply. Chunks play in order via a JS queue. | |
| voice_on = bool(getattr(controller.tts, "available", False)) and bool(sus_id) | |
| pool = ThreadPoolExecutor(max_workers=1) if voice_on else None | |
| futures: list = [] | |
| fptr = 0 | |
| raw = "" # full raw stream (kept only for the cursor; never shown directly) | |
| shown = "" # scrubbed, sentence-complete text the player actually sees + hears | |
| cursor = 0 | |
| final = None | |
| try: | |
| for event in controller.session.interrogate(sus_id, question, clue_id): | |
| if event.spoken_delta: | |
| raw += event.spoken_delta | |
| note_interaction() # keep the generator backed off for the whole reply | |
| # Reveal (and voice) only COMPLETE, scrubbed sentences. A confession can | |
| # never flash on screen or be spoken, and the text appears in step with | |
| # the voice rather than racing ahead of it. | |
| while (nxt := _next_sentence(raw, cursor)) is not None: | |
| sentence, cursor = nxt | |
| clean = scrub_spoken(sentence, breaking=breaking) | |
| if not clean: | |
| continue | |
| shown = f"{shown} {clean}".strip() | |
| if pool is not None: | |
| futures.append(pool.submit(controller.speak, clean)) | |
| uri = gr.update() # emit at most one ready audio chunk per streamed tick | |
| if fptr < len(futures) and futures[fptr].done(): | |
| got = futures[fptr].result() | |
| fptr += 1 | |
| if got: | |
| uri = got | |
| yield (dialogue_html(name, shown, streaming=True), | |
| gr.update(), gr.update(), uri, gr.update()) | |
| if event.final is not None: | |
| final = event.final | |
| # Flush the trailing partial sentence (the last line rarely ends with whitespace). | |
| tail = scrub_spoken(raw[cursor:].strip(), breaking=breaking) | |
| if tail: | |
| shown = f"{shown} {tail}".strip() | |
| if pool is not None: | |
| futures.append(pool.submit(controller.speak, tail)) | |
| # The engine's final spoken line is the authoritative (already-scrubbed) text. | |
| line = final.turn.spoken if final else (shown or "...") | |
| note = notebook_html(controller.session.state, controller.case) | |
| # Clear the evidence selection so it is presented ONLY on the turn it was chosen | |
| # (otherwise it would silently re-present every subsequent question). | |
| clear_ev = gr.update(value=None) | |
| # Drain any remaining audio chunks in order, one yield each so every chunk plays. | |
| while fptr < len(futures): | |
| got = futures[fptr].result() | |
| fptr += 1 | |
| yield (dialogue_html(name, line), clear_ev, note, (got or gr.update()), gr.update()) | |
| yield (dialogue_html(name, line), clear_ev, note, gr.update(), gr.update()) | |
| finally: | |
| if pool is not None: | |
| pool.shutdown(wait=False) | |
| def _search(controller: GameController | None, loc_name: str | None): | |
| # Searching reveals a room's evidence; it does NOT move the suspect (the stage stays in | |
| # the interrogation room), so it can never imply a false whereabouts for them. | |
| if controller is None or controller.session is None or not loc_name: | |
| return gr.update(), gr.update(), gr.update(), gr.update() | |
| controller.search(loc_name) | |
| items = _evidence_items(controller) | |
| return ( | |
| evidence_html(items), | |
| gr.update(choices=controller.evidence_choices()), | |
| notebook_html(controller.session.state, controller.case), | |
| gr.update(choices=controller.evidence_choices()), | |
| ) | |
| def _add_note(controller: GameController | None, text: str): | |
| if controller is None or controller.session is None or not text.strip(): | |
| return gr.update(), "" | |
| controller.add_note(text) | |
| return notebook_html(controller.session.state, controller.case), "" | |
| def _accuse(controller: GameController | None, accused: str | None, weapon: str | None, | |
| motive: str | None, cited: list[str] | None): | |
| if controller is None or controller.session is None or not accused: | |
| return gr.update(value="<div class='cz-verdict'>Name who you are accusing first.</div>", | |
| visible=True) | |
| weapon_ok = weapon == controller.case.weapon.name | |
| motive_ok = motive == controller.case.culprit.true_motive.category.value | |
| verdict = controller.accuse(accused, weapon_ok, motive_ok, cited or []) | |
| # The verdict panel is hidden until now (no empty box before the player accuses). | |
| return gr.update(value=verdict_html(controller.case, verdict), visible=True) | |
| # Music toggle is driven entirely by the actual <audio> element state (no race with a | |
| # Python state flip). A 'cz-on' class lights the button when the track is playing. | |
| _MUSIC_BTN_FIND = "var b=document.getElementById('czmusicbtn');" | |
| _MUSIC_TOGGLE_JS = ( | |
| "() => { var m=document.getElementById('cz-bgm'); if(!m)return;" | |
| "if(m.paused){m.volume=0.3;m.play().catch(function(){});}else{m.pause();}" | |
| + _MUSIC_BTN_FIND + "if(b)b.classList.toggle('cz-on', !m.paused); }" | |
| ) | |
| _MUSIC_ON_JS = ( | |
| "() => { if(window.czMusicOn) czMusicOn();" + _MUSIC_BTN_FIND + "if(b)b.classList.add('cz-on'); }" | |
| ) | |
| def _overlay_html(mode: str) -> str: | |
| """Full-screen loading overlay, shared by model warmup and case generation.""" | |
| if mode == "generate": | |
| head = "A NEW CASE LANDS ON YOUR DESK" | |
| sub = ("Give it a minute or two, detective - the case file is being assembled: the " | |
| "victim, the suspects, their alibis, and the evidence.") | |
| extra = "" | |
| else: | |
| head = "" # no fixed header on warmup; "Opening the precinct" cycles as a flavor line | |
| sub = ("Hang tight for a minute or two, detective - first boot warms up the case room, " | |
| "the suspects, and the interrogation lamp.") | |
| extra = how_to_play_html() | |
| head_html = f"<div class='cz-overlay-head'>{head}</div>" if head else "" | |
| return ( | |
| "<div class='cz-overlay'><div class='cz-overlay-inner'>" | |
| "<div class='cz-bigtitle'>CASE ZERO</div>" | |
| f"{head_html}" | |
| "<div id='cz-flavor' class='cz-flavor'>Opening the precinct...</div>" | |
| "<div class='cz-loadbar'><span></span></div>" | |
| f"<div class='cz-muted' style='text-align:center;margin-bottom:12px'>{sub}</div>" | |
| f"{extra}</div></div>" | |
| ) | |
| def _warmup(controller: GameController | None): | |
| """Behind the full-screen overlay: warm the models (cold-start) and serve the first | |
| case from the background buffer (it has been generating since startup). Every case is | |
| live-generated and unique; the buffer just lets us start without a second wait.""" | |
| settings = get_settings() | |
| start_case_buffer(settings) # ensure background generation is running | |
| tts = _obtain_shared_tts(settings) | |
| # Warm the interrogation backend + TTS in the background so the briefing reveals as | |
| # soon as the first case is ready (and the first question/voice are responsive). | |
| def _warm() -> None: | |
| try: | |
| from ..llm.backend import GenParams | |
| _obtain_shared_backend(settings).generate("ok", GenParams(max_tokens=1, temperature=0.0)) | |
| except Exception: | |
| pass | |
| try: | |
| from ..schemas.suspect import VoiceAssignment | |
| warm = tts.synth_to_file("Ready.", VoiceAssignment(engine="supertonic", speaker_id=0), | |
| _TTS_DIR / "warm.wav") | |
| if warm: | |
| Path(warm).unlink(missing_ok=True) | |
| except Exception: | |
| pass | |
| import threading | |
| threading.Thread(target=_warm, daemon=True).start() | |
| controller = controller or GameController() | |
| # Wait for the first background-generated case; fall back to live generation if needed. | |
| if not controller.start_buffered(wait_secs=600.0): | |
| controller.start("generate", seed=random.randint(1, 999_999)) | |
| yield (gr.update(visible=False), gr.update(visible=True), *_full_state(controller)) | |
| # Sentence-chunked playback: each synthesized chunk is queued and played in order, and | |
| # the suspect's mouth moves while the queue is draining (stops when it empties). | |
| _TTS_PLAY_JS = ( | |
| "(u)=>{ if(!u) return; try{" | |
| "window.czQ = window.czQ || [];" | |
| "var s=document.getElementById('cz-sprite');" | |
| "window.czQ.push(u);" | |
| "if(window.czPlaying) return;" | |
| "var play=function(){" | |
| " if(!window.czQ.length){ window.czPlaying=false; if(s)s.classList.remove('talking'); return; }" | |
| " window.czPlaying=true; if(s)s.classList.add('talking');" | |
| " var a=new Audio(window.czQ.shift()); a.volume=0.95; window.czCur=a;" | |
| " var nx=function(){ play(); };" | |
| " a.addEventListener('ended',nx); a.addEventListener('error',nx);" | |
| " a.play().catch(nx);" | |
| "}; play();" | |
| "}catch(e){} }" | |
| ) | |
| def build_app() -> gr.Blocks: | |
| sfx, music = _load_audio() | |
| with gr.Blocks(css=build_css(), title="Case Zero", theme=gr.themes.Base(), | |
| analytics_enabled=False) as demo: | |
| state = gr.State(None) | |
| demo.load(None, None, None, js=_audio_setup_js(sfx, music)) | |
| # Music toggle floats above everything (reachable on the loading overlay too). | |
| # A compact note icon - small and elegant on phones (lights amber when playing). | |
| music_btn = gr.Button("♪", elem_id="czmusicbtn", elem_classes=["cz-music"]) | |
| # Full-screen overlay (warmup + live generation). Its content is position:fixed, so | |
| # its Gradio wrapper is collapsed (cz-overlay-host) to avoid an empty box in flow. | |
| overlay = gr.HTML(value=_overlay_html("warmup"), visible=True, | |
| elem_classes=["cz-overlay-host"]) | |
| # Title + credit live INSIDE the game column so the loading screen shows only the | |
| # overlay (no stray header/footer boxes during warmup). | |
| with gr.Column(visible=False) as game: | |
| gr.HTML("<div id='cz-title'>CASE ZERO</div>" | |
| "<div id='cz-subtitle'>interrogate · investigate · accuse</div>") | |
| with gr.Row(): | |
| new_btn = gr.Button("New Case") | |
| briefing = gr.HTML(value=how_to_play_html()) | |
| with gr.Row(equal_height=False): | |
| with gr.Column(scale=3): | |
| gr.HTML("<h3>Suspects</h3>") | |
| roster = gr.Gallery(columns=2, height=330, show_label=False, allow_preview=False, | |
| object_fit="contain", elem_classes="cz-panel") | |
| gr.HTML("<h3>Investigate</h3>") | |
| loc_dd = gr.Dropdown(label="Search a room", choices=[]) | |
| search_btn = gr.Button("Search Room") | |
| with gr.Column(scale=6): | |
| stage = gr.HTML() | |
| dialogue = gr.HTML() | |
| evidence_dd = gr.Dropdown(label="Present evidence (search rooms first)", | |
| choices=[]) | |
| question = gr.Textbox(label="Ask the suspect", | |
| placeholder="Where were you that night?", lines=1) | |
| ask_btn = gr.Button("Ask") | |
| tts_audio = gr.Textbox(visible=False, elem_id="cz-tts") | |
| with gr.Column(scale=3): | |
| gr.HTML("<h3>Evidence</h3>") | |
| evidence_box = gr.HTML() | |
| gr.HTML("<h3>Notebook</h3>") | |
| notebook = gr.HTML() | |
| note_in = gr.Textbox(placeholder="Jot a note to yourself...", lines=1, | |
| show_label=False, elem_classes="cz-note-in") | |
| note_btn = gr.Button("Add Note") | |
| gr.HTML("<h3>Make Your Accusation</h3>") | |
| with gr.Row(): | |
| accused_dd = gr.Dropdown(label="The killer is", choices=[]) | |
| weapon_dd = gr.Dropdown(label="With the", choices=[]) | |
| motive_dd = gr.Dropdown(label="Because of", choices=[]) | |
| cited_cg = gr.CheckboxGroup(label="Citing this evidence", choices=[]) | |
| accuse_btn = gr.Button("Accuse", elem_classes="cz-accuse") | |
| verdict = gr.HTML(visible=False) # appears only after an accusation is made | |
| gr.HTML( | |
| "<div class='cz-credit'>Case Zero · Story & suspects: Qwen2.5-1.5B " | |
| "(local, via llama.cpp) · Voices: Supertonic (local on-device TTS) " | |
| "· Music: \"Backbay Lounge\" by Kevin MacLeod (incompetech.com), licensed " | |
| "under Creative Commons Attribution 4.0 (CC BY 4.0 - free to use with credit)</div>" | |
| ) | |
| # Warm the models AND serve the first case from the buffer behind the overlay. | |
| warm_outputs = [overlay, game, state, briefing, roster, stage, dialogue, loc_dd, | |
| evidence_dd, evidence_box, notebook, accused_dd, weapon_dd, motive_dd, | |
| cited_cg, verdict] | |
| demo.load(_warmup, [state], warm_outputs) | |
| new_outputs = [overlay, state, briefing, roster, stage, dialogue, loc_dd, evidence_dd, | |
| evidence_box, notebook, accused_dd, weapon_dd, motive_dd, cited_cg, verdict] | |
| new_btn.click(_new_case, [state], new_outputs) | |
| new_btn.click(None, None, None, js=_MUSIC_ON_JS) | |
| music_btn.click(None, None, None, js=_MUSIC_TOGGLE_JS) | |
| roster.select(_select_suspect, [state], [stage, dialogue]) | |
| roster.select(None, None, None, js="() => { if(window.czSfx) czSfx('select'); }") | |
| ask_outputs = [dialogue, evidence_dd, notebook, tts_audio, evidence_box] | |
| # Stop any leftover voice and clear the queue before a new reply starts streaming. | |
| ask_js = ("() => { try{ if(window.czCur){window.czCur.pause();} window.czQ=[];" | |
| " window.czPlaying=false; var s=document.getElementById('cz-sprite');" | |
| " if(s)s.classList.remove('talking'); }catch(e){}" | |
| " if(window.czSfx) czSfx('select'); }") | |
| ask_btn.click(_ask, [state, question, evidence_dd], ask_outputs) | |
| ask_btn.click(None, None, None, js=ask_js) | |
| ask_btn.click(lambda: "", None, question) # clear the question box after asking | |
| question.submit(_ask, [state, question, evidence_dd], ask_outputs) | |
| question.submit(None, None, None, js=ask_js) | |
| question.submit(lambda: "", None, question) | |
| # Speak the line and sync the mouth to the actual audio playback. | |
| tts_audio.change(None, [tts_audio], None, js=_TTS_PLAY_JS) | |
| search_outputs = [evidence_box, evidence_dd, notebook, cited_cg] | |
| search_btn.click(_search, [state, loc_dd], search_outputs) | |
| search_btn.click(None, None, None, js="() => { if(window.czSfx) czSfx('page'); }") | |
| # Let the player add their own notebook entries. | |
| note_btn.click(_add_note, [state, note_in], [notebook, note_in]) | |
| note_in.submit(_add_note, [state, note_in], [notebook, note_in]) | |
| accuse_btn.click(_accuse, [state, accused_dd, weapon_dd, motive_dd, cited_cg], [verdict]) | |
| accuse_btn.click(None, None, None, js="() => { if(window.czSfx) czSfx('accuse'); }") | |
| return demo | |