Spaces:

build-small-hackathon
/

case0

Running

Case Zero - initial public release (fully local: Qwen2.5-1.5B via llama.cpp + Supertonic, custom pixel-noir SPA via gradio.Server)

414dc55 3 days ago

raw

history blame

23 kB

	"""The Gradio Blocks UI: a scene-based, animated pixel detective game.

	Single-player: one GameController lives in gr.State per session. The interrogation
	stage (animated suspect sprite in a room), the notebook, and the scenery are
	deterministic CSS-animated graphics. Suspect dialogue streams into a visual-novel box;
	hidden state never reaches the browser. SFX/music play client-side from data URIs.
	"""

	from __future__ import annotations

	import random
	from concurrent.futures import ThreadPoolExecutor
	from pathlib import Path

	import gradio as gr

	from ..config import get_settings
	from ..schemas.enums import MotiveCategory
	from ..suspects.scrub import scrub_spoken
	from .app_state import (
	_TTS_DIR,
	GameController,
	_load_audio,
	_obtain_shared_backend,
	_obtain_shared_tts,
	note_interaction,
	start_case_buffer,
	)
	from .formatters import (
	briefing_html,
	dialogue_html,
	evidence_html,
	how_to_play_html,
	notebook_html,
	stage_html,
	verdict_html,
	)
	from .theme import build_css

	_WEAPON_DECOYS = ("Poison", "Strangulation", "A fall")


	_FLAVOR_LINES = (
	"Opening the precinct", "Dusting for fingerprints", "Rounding up the usual suspects",
	"Brewing the detective's coffee", "Reviewing the case files", "Polishing the interrogation lamp",
	"Chasing a lead down the alley", "Cataloguing the evidence", "Tuning the suspects' alibis",
	)


	def _audio_setup_js(sfx: dict[str, str], music: str) -> str:
	"""Load-event JS: create the audio elements, define the audio/animation helpers,
	cycle the loading-screen flavor text, and try to start the music. Gradio strips
	<script> from gr.HTML, so this load-event JS is how audio/animation actually work."""
	makers = [f"mk('cz-bgm',{music!r},true);"]
	for event, uri in sfx.items():
	makers.append(f"mk('cz-sfx-{event}',{uri!r},false);")
	flavor = list(_FLAVOR_LINES)
	return (
	"() => {"
	"if(window.czAudioReady)return; window.czAudioReady=true;"
	"const mk=(id,src,loop)=>{const a=document.createElement('audio');a.id=id;a.src=src;"
	"a.preload='auto';if(loop)a.loop=true;document.body.appendChild(a);};"
	+ "".join(makers)
	+ "window.czSfx=function(n){var a=document.getElementById('cz-sfx-'+n);"
	"if(a){try{a.currentTime=0;a.volume=(n==='page')?0.22:0.45;a.play();}catch(e){}}};"
	"window.czMusicOn=function(){var m=document.getElementById('cz-bgm');"
	"if(m){m.volume=0.3;m.play().catch(function(){});}};"
	"window.czMusicOff=function(){var m=document.getElementById('cz-bgm');if(m)m.pause();};"
	"window.czTalk=function(){var s=document.getElementById('cz-sprite');"
	"if(s){s.classList.add('talking');setTimeout(function(){s.classList.remove('talking');},3000);}};"
	f"var fl={flavor!r}; var i=0;"
	"var tick=function(){var e=document.getElementById('cz-flavor');"
	"if(e){e.textContent=fl[i%fl.length]+'...'; i++;}};tick();setInterval(tick,2800);"
	"try{czMusicOn();var b=document.getElementById('czmusicbtn');"
	"if(b)b.classList.add('cz-on');}catch(e){}"
	"}"
	)


	def _next_sentence(text: str, start: int) -> tuple[str, int] \| None:
	"""Return (sentence, new_cursor) for the next COMPLETE sentence after ``start`` - a
	terminator (.!?) followed by whitespace - or None while it is still being streamed.
	Lets us synth a suspect's reply sentence-by-sentence as the tokens arrive."""
	n = len(text)
	i = start
	while i < n and text[i].isspace():
	i += 1
	j = i
	while j < n:
	if text[j] in ".!?":
	k = j + 1
	while k < n and text[k] in ".!?":
	k += 1
	if k < n and text[k].isspace():
	return text[i:k].strip(), k
	j = k
	else:
	j += 1
	return None


	def _stage(controller: GameController, sus_id: str) -> str:
	# The suspect is always questioned in a neutral interrogation room - the stage never
	# implies where they "were", so searching a room can no longer contradict their alibi.
	return stage_html(controller.interrogation_uri(), controller.portrait_sheet_uri(sus_id),
	controller.case.suspect(sus_id).name, "Interrogation Room")


	def _evidence_items(controller: GameController) -> list[tuple[str, str, str]]:
	return [(c.name, c.reveal_text, controller.prop_uri(c)) for c in controller.session.evidence()]


	def _full_state(controller: GameController):
	case = controller.case
	first = case.suspects[0]
	motives = [m.value for m in MotiveCategory]
	return (
	controller,
	briefing_html(controller.session.player_view),
	gr.update(value=controller.roster()),
	_stage(controller, first.sus_id),
	dialogue_html(first.name, "I suppose you have questions. Get on with it."),
	gr.update(choices=controller.location_choices(), value=None),
	gr.update(choices=[], value=None),
	evidence_html([]),
	notebook_html(controller.session.state, case),
	gr.update(choices=[s.name for s in case.suspects], value=None),
	gr.update(choices=[case.weapon.name,
	*(d for d in _WEAPON_DECOYS if d != case.weapon.name)], value=None),
	gr.update(choices=motives, value=None),
	gr.update(choices=[], value=[]),
	gr.update(value="", visible=False), # verdict: hidden until an accusation is made
	)


	def _new_case(controller: GameController \| None):
	controller = controller or GameController()
	# Fast path: a freshly-generated case is already waiting in the background buffer -
	# reveal it instantly, no overlay. (The worker immediately starts the next one.)
	if controller.start_buffered(wait_secs=0.0):
	yield (gr.update(visible=False), *_full_state(controller))
	return
	# Otherwise generate live behind the overlay (buffer not ready yet).
	yield (gr.update(value=_overlay_html("generate"), visible=True), controller, ([gr.update()] 13))
	controller.start("generate", seed=random.randint(1, 999_999))
	yield (gr.update(visible=False), *_full_state(controller))


	def _select_suspect(controller: GameController \| None, evt: gr.SelectData):
	if controller is None or controller.session is None:
	return gr.update(), gr.update()
	controller.select_by_index(evt.index)
	sus_id = controller.current_sus
	transcript = controller.session.state.state_for(sus_id).transcript
	last = transcript[-1].answer if transcript else "Well? Ask your questions."
	return _stage(controller, sus_id), dialogue_html(controller.current_name(), last)


	def _ask(controller: GameController \| None, question: str, evidence_name: str \| None):
	if controller is None or controller.session is None:
	yield dialogue_html("", ""), gr.update(), gr.update(), gr.update(), gr.update()
	return
	name = controller.current_name()
	if not question.strip():
	yield (dialogue_html(name, "Ask me something, Detective."),
	gr.update(), gr.update(), gr.update(), gr.update())
	return

	sus_id = controller.current_sus
	clue_id = controller.clue_id_for_name(evidence_name)
	breaking = controller.relevance_breaking(clue_id)
	note_interaction() # tell the background case generator to yield CPU to this reply
	# Voice is synthesized sentence-by-sentence in a background worker WHILE the LLM keeps
	# streaming (both release the GIL), so the first words are spoken almost immediately
	# instead of after the whole reply. Chunks play in order via a JS queue.
	voice_on = bool(getattr(controller.tts, "available", False)) and bool(sus_id)
	pool = ThreadPoolExecutor(max_workers=1) if voice_on else None
	futures: list = []
	fptr = 0
	raw = "" # full raw stream (kept only for the cursor; never shown directly)
	shown = "" # scrubbed, sentence-complete text the player actually sees + hears
	cursor = 0
	final = None
	try:
	for event in controller.session.interrogate(sus_id, question, clue_id):
	if event.spoken_delta:
	raw += event.spoken_delta
	note_interaction() # keep the generator backed off for the whole reply
	# Reveal (and voice) only COMPLETE, scrubbed sentences. A confession can
	# never flash on screen or be spoken, and the text appears in step with
	# the voice rather than racing ahead of it.
	while (nxt := _next_sentence(raw, cursor)) is not None:
	sentence, cursor = nxt
	clean = scrub_spoken(sentence, breaking=breaking)
	if not clean:
	continue
	shown = f"{shown} {clean}".strip()
	if pool is not None:
	futures.append(pool.submit(controller.speak, clean))
	uri = gr.update() # emit at most one ready audio chunk per streamed tick
	if fptr < len(futures) and futures[fptr].done():
	got = futures[fptr].result()
	fptr += 1
	if got:
	uri = got
	yield (dialogue_html(name, shown, streaming=True),
	gr.update(), gr.update(), uri, gr.update())
	if event.final is not None:
	final = event.final

	# Flush the trailing partial sentence (the last line rarely ends with whitespace).
	tail = scrub_spoken(raw[cursor:].strip(), breaking=breaking)
	if tail:
	shown = f"{shown} {tail}".strip()
	if pool is not None:
	futures.append(pool.submit(controller.speak, tail))

	# The engine's final spoken line is the authoritative (already-scrubbed) text.
	line = final.turn.spoken if final else (shown or "...")
	note = notebook_html(controller.session.state, controller.case)
	# Clear the evidence selection so it is presented ONLY on the turn it was chosen
	# (otherwise it would silently re-present every subsequent question).
	clear_ev = gr.update(value=None)
	# Drain any remaining audio chunks in order, one yield each so every chunk plays.
	while fptr < len(futures):
	got = futures[fptr].result()
	fptr += 1
	yield (dialogue_html(name, line), clear_ev, note, (got or gr.update()), gr.update())
	yield (dialogue_html(name, line), clear_ev, note, gr.update(), gr.update())
	finally:
	if pool is not None:
	pool.shutdown(wait=False)


	def _search(controller: GameController \| None, loc_name: str \| None):
	# Searching reveals a room's evidence; it does NOT move the suspect (the stage stays in
	# the interrogation room), so it can never imply a false whereabouts for them.
	if controller is None or controller.session is None or not loc_name:
	return gr.update(), gr.update(), gr.update(), gr.update()
	controller.search(loc_name)
	items = _evidence_items(controller)
	return (
	evidence_html(items),
	gr.update(choices=controller.evidence_choices()),
	notebook_html(controller.session.state, controller.case),
	gr.update(choices=controller.evidence_choices()),
	)


	def _add_note(controller: GameController \| None, text: str):
	if controller is None or controller.session is None or not text.strip():
	return gr.update(), ""
	controller.add_note(text)
	return notebook_html(controller.session.state, controller.case), ""


	def _accuse(controller: GameController \| None, accused: str \| None, weapon: str \| None,
	motive: str \| None, cited: list[str] \| None):
	if controller is None or controller.session is None or not accused:
	return gr.update(value="<div class='cz-verdict'>Name who you are accusing first.</div>",
	visible=True)
	weapon_ok = weapon == controller.case.weapon.name
	motive_ok = motive == controller.case.culprit.true_motive.category.value
	verdict = controller.accuse(accused, weapon_ok, motive_ok, cited or [])
	# The verdict panel is hidden until now (no empty box before the player accuses).
	return gr.update(value=verdict_html(controller.case, verdict), visible=True)


	# Music toggle is driven entirely by the actual <audio> element state (no race with a
	# Python state flip). A 'cz-on' class lights the button when the track is playing.
	_MUSIC_BTN_FIND = "var b=document.getElementById('czmusicbtn');"
	_MUSIC_TOGGLE_JS = (
	"() => { var m=document.getElementById('cz-bgm'); if(!m)return;"
	"if(m.paused){m.volume=0.3;m.play().catch(function(){});}else{m.pause();}"
	+ _MUSIC_BTN_FIND + "if(b)b.classList.toggle('cz-on', !m.paused); }"
	)
	_MUSIC_ON_JS = (
	"() => { if(window.czMusicOn) czMusicOn();" + _MUSIC_BTN_FIND + "if(b)b.classList.add('cz-on'); }"
	)


	def _overlay_html(mode: str) -> str:
	"""Full-screen loading overlay, shared by model warmup and case generation."""
	if mode == "generate":
	head = "A NEW CASE LANDS ON YOUR DESK"
	sub = ("Give it a minute or two, detective - the case file is being assembled: the "
	"victim, the suspects, their alibis, and the evidence.")
	extra = ""
	else:
	head = "" # no fixed header on warmup; "Opening the precinct" cycles as a flavor line
	sub = ("Hang tight for a minute or two, detective - first boot warms up the case room, "
	"the suspects, and the interrogation lamp.")
	extra = how_to_play_html()
	head_html = f"<div class='cz-overlay-head'>{head}</div>" if head else ""
	return (
	"<div class='cz-overlay'><div class='cz-overlay-inner'>"
	"<div class='cz-bigtitle'>CASE ZERO</div>"
	f"{head_html}"
	"<div id='cz-flavor' class='cz-flavor'>Opening the precinct...</div>"
	"<div class='cz-loadbar'><span></span></div>"
	f"<div class='cz-muted' style='text-align:center;margin-bottom:12px'>{sub}</div>"
	f"{extra}</div></div>"
	)


	def _warmup(controller: GameController \| None):
	"""Behind the full-screen overlay: warm the models (cold-start) and serve the first
	case from the background buffer (it has been generating since startup). Every case is
	live-generated and unique; the buffer just lets us start without a second wait."""
	settings = get_settings()
	start_case_buffer(settings) # ensure background generation is running
	tts = _obtain_shared_tts(settings)
	# Warm the interrogation backend + TTS in the background so the briefing reveals as
	# soon as the first case is ready (and the first question/voice are responsive).
	def _warm() -> None:
	try:
	from ..llm.backend import GenParams

	_obtain_shared_backend(settings).generate("ok", GenParams(max_tokens=1, temperature=0.0))
	except Exception:
	pass
	try:
	from ..schemas.suspect import VoiceAssignment

	warm = tts.synth_to_file("Ready.", VoiceAssignment(engine="supertonic", speaker_id=0),
	_TTS_DIR / "warm.wav")
	if warm:
	Path(warm).unlink(missing_ok=True)
	except Exception:
	pass

	import threading

	threading.Thread(target=_warm, daemon=True).start()
	controller = controller or GameController()
	# Wait for the first background-generated case; fall back to live generation if needed.
	if not controller.start_buffered(wait_secs=600.0):
	controller.start("generate", seed=random.randint(1, 999_999))
	yield (gr.update(visible=False), gr.update(visible=True), *_full_state(controller))


	# Sentence-chunked playback: each synthesized chunk is queued and played in order, and
	# the suspect's mouth moves while the queue is draining (stops when it empties).
	_TTS_PLAY_JS = (
	"(u)=>{ if(!u) return; try{"
	"window.czQ = window.czQ \|\| [];"
	"var s=document.getElementById('cz-sprite');"
	"window.czQ.push(u);"
	"if(window.czPlaying) return;"
	"var play=function(){"
	" if(!window.czQ.length){ window.czPlaying=false; if(s)s.classList.remove('talking'); return; }"
	" window.czPlaying=true; if(s)s.classList.add('talking');"
	" var a=new Audio(window.czQ.shift()); a.volume=0.95; window.czCur=a;"
	" var nx=function(){ play(); };"
	" a.addEventListener('ended',nx); a.addEventListener('error',nx);"
	" a.play().catch(nx);"
	"}; play();"
	"}catch(e){} }"
	)


	def build_app() -> gr.Blocks:
	sfx, music = _load_audio()

	with gr.Blocks(css=build_css(), title="Case Zero", theme=gr.themes.Base(),
	analytics_enabled=False) as demo:
	state = gr.State(None)

	demo.load(None, None, None, js=_audio_setup_js(sfx, music))

	# Music toggle floats above everything (reachable on the loading overlay too).
	# A compact note icon - small and elegant on phones (lights amber when playing).
	music_btn = gr.Button("♪", elem_id="czmusicbtn", elem_classes=["cz-music"])

	# Full-screen overlay (warmup + live generation). Its content is position:fixed, so
	# its Gradio wrapper is collapsed (cz-overlay-host) to avoid an empty box in flow.
	overlay = gr.HTML(value=_overlay_html("warmup"), visible=True,
	elem_classes=["cz-overlay-host"])

	# Title + credit live INSIDE the game column so the loading screen shows only the
	# overlay (no stray header/footer boxes during warmup).
	with gr.Column(visible=False) as game:
	gr.HTML("<div id='cz-title'>CASE ZERO</div>"
	"<div id='cz-subtitle'>interrogate · investigate · accuse</div>")
	with gr.Row():
	new_btn = gr.Button("New Case")
	briefing = gr.HTML(value=how_to_play_html())
	with gr.Row(equal_height=False):
	with gr.Column(scale=3):
	gr.HTML("<h3>Suspects</h3>")
	roster = gr.Gallery(columns=2, height=330, show_label=False, allow_preview=False,
	object_fit="contain", elem_classes="cz-panel")
	gr.HTML("<h3>Investigate</h3>")
	loc_dd = gr.Dropdown(label="Search a room", choices=[])
	search_btn = gr.Button("Search Room")
	with gr.Column(scale=6):
	stage = gr.HTML()
	dialogue = gr.HTML()
	evidence_dd = gr.Dropdown(label="Present evidence (search rooms first)",
	choices=[])
	question = gr.Textbox(label="Ask the suspect",
	placeholder="Where were you that night?", lines=1)
	ask_btn = gr.Button("Ask")
	tts_audio = gr.Textbox(visible=False, elem_id="cz-tts")
	with gr.Column(scale=3):
	gr.HTML("<h3>Evidence</h3>")
	evidence_box = gr.HTML()
	gr.HTML("<h3>Notebook</h3>")
	notebook = gr.HTML()
	note_in = gr.Textbox(placeholder="Jot a note to yourself...", lines=1,
	show_label=False, elem_classes="cz-note-in")
	note_btn = gr.Button("Add Note")

	gr.HTML("<h3>Make Your Accusation</h3>")
	with gr.Row():
	accused_dd = gr.Dropdown(label="The killer is", choices=[])
	weapon_dd = gr.Dropdown(label="With the", choices=[])
	motive_dd = gr.Dropdown(label="Because of", choices=[])
	cited_cg = gr.CheckboxGroup(label="Citing this evidence", choices=[])
	accuse_btn = gr.Button("Accuse", elem_classes="cz-accuse")
	verdict = gr.HTML(visible=False) # appears only after an accusation is made

	gr.HTML(
	"<div class='cz-credit'>Case Zero · Story & suspects: Qwen2.5-1.5B "
	"(local, via llama.cpp) · Voices: Supertonic (local on-device TTS) "
	"· Music: \"Backbay Lounge\" by Kevin MacLeod (incompetech.com), licensed "
	"under Creative Commons Attribution 4.0 (CC BY 4.0 - free to use with credit)</div>"
	)

	# Warm the models AND serve the first case from the buffer behind the overlay.
	warm_outputs = [overlay, game, state, briefing, roster, stage, dialogue, loc_dd,
	evidence_dd, evidence_box, notebook, accused_dd, weapon_dd, motive_dd,
	cited_cg, verdict]
	demo.load(_warmup, [state], warm_outputs)

	new_outputs = [overlay, state, briefing, roster, stage, dialogue, loc_dd, evidence_dd,
	evidence_box, notebook, accused_dd, weapon_dd, motive_dd, cited_cg, verdict]
	new_btn.click(_new_case, [state], new_outputs)
	new_btn.click(None, None, None, js=_MUSIC_ON_JS)
	music_btn.click(None, None, None, js=_MUSIC_TOGGLE_JS)

	roster.select(_select_suspect, [state], [stage, dialogue])
	roster.select(None, None, None, js="() => { if(window.czSfx) czSfx('select'); }")

	ask_outputs = [dialogue, evidence_dd, notebook, tts_audio, evidence_box]
	# Stop any leftover voice and clear the queue before a new reply starts streaming.
	ask_js = ("() => { try{ if(window.czCur){window.czCur.pause();} window.czQ=[];"
	" window.czPlaying=false; var s=document.getElementById('cz-sprite');"
	" if(s)s.classList.remove('talking'); }catch(e){}"
	" if(window.czSfx) czSfx('select'); }")
	ask_btn.click(_ask, [state, question, evidence_dd], ask_outputs)
	ask_btn.click(None, None, None, js=ask_js)
	ask_btn.click(lambda: "", None, question) # clear the question box after asking
	question.submit(_ask, [state, question, evidence_dd], ask_outputs)
	question.submit(None, None, None, js=ask_js)
	question.submit(lambda: "", None, question)
	# Speak the line and sync the mouth to the actual audio playback.
	tts_audio.change(None, [tts_audio], None, js=_TTS_PLAY_JS)

	search_outputs = [evidence_box, evidence_dd, notebook, cited_cg]
	search_btn.click(_search, [state, loc_dd], search_outputs)
	search_btn.click(None, None, None, js="() => { if(window.czSfx) czSfx('page'); }")

	# Let the player add their own notebook entries.
	note_btn.click(_add_note, [state, note_in], [notebook, note_in])
	note_in.submit(_add_note, [state, note_in], [notebook, note_in])

	accuse_btn.click(_accuse, [state, accused_dd, weapon_dd, motive_dd, cited_cg], [verdict])
	accuse_btn.click(None, None, None, js="() => { if(window.czSfx) czSfx('accuse'); }")

	return demo