case0 / src /case_zero /api /tts_service.py
HusseinEid's picture
Case Zero - initial public release (fully local: Qwen2.5-1.5B via llama.cpp + Supertonic, custom pixel-noir SPA via gradio.Server)
414dc55
"""On-device Supertonic voice synthesis for suspect replies.
Lazily loads the provider (or degrades to silent), synthesizes a WAV per reply, and
caches it on disk keyed by (voice, text) so re-asks are instant. Single-flight: synthesis
runs under a lock so it never oversubscribes the CPU it shares with the LLM.
"""
from __future__ import annotations
import hashlib
import threading
from pathlib import Path
from ..config import get_settings
from ..constants import PROJECT_ROOT
from ..schemas.suspect import VoiceAssignment
from ..tts.provider import make_tts_provider
_CACHE_DIR = PROJECT_ROOT / ".cache" / "tts"
class TtsService:
def __init__(self) -> None:
self._provider = None
self._failed = False
self._lock = threading.Lock()
def _get(self):
if self._provider is None and not self._failed:
try:
self._provider = make_tts_provider(get_settings())
except Exception:
self._failed = True
return self._provider
def available(self) -> bool:
p = self._get()
return bool(p and getattr(p, "available", False))
def synth(self, text: str, voice: VoiceAssignment | None) -> Path | None:
p = self._get()
if not p or not getattr(p, "available", False) or not text.strip():
return None
sid = voice.speaker_id if voice else 0
scale = voice.length_scale if voice else 1.0
key = hashlib.sha256(f"{sid}|{scale}|{text}".encode()).hexdigest()[:16]
out = _CACHE_DIR / f"{key}.wav"
if out.exists():
return out
with self._lock:
if out.exists():
return out
return p.synth_to_file(text, voice, out)
TTS = TtsService()
def voice_seed(sus_id: str, *, female: bool | None = None) -> VoiceAssignment:
"""A stable VoiceAssignment from a suspect id when no CaseFile suspect is available
(e.g. the golden case). Gender-matched if known, else any of the 10 voices."""
seed = int.from_bytes(hashlib.sha256(sus_id.encode()).digest()[:4], "big")
if female is True:
speaker = 5 + (seed % 5)
elif female is False:
speaker = seed % 5
else:
speaker = seed % 10
return VoiceAssignment(
engine="supertonic",
speaker_id=speaker,
length_scale=round(0.95 + (seed % 20) / 100.0, 3),
)