File size: 2,396 Bytes
414dc55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
"""On-device Supertonic voice synthesis for suspect replies.

Lazily loads the provider (or degrades to silent), synthesizes a WAV per reply, and
caches it on disk keyed by (voice, text) so re-asks are instant. Single-flight: synthesis
runs under a lock so it never oversubscribes the CPU it shares with the LLM.
"""

from __future__ import annotations

import hashlib
import threading
from pathlib import Path

from ..config import get_settings
from ..constants import PROJECT_ROOT
from ..schemas.suspect import VoiceAssignment
from ..tts.provider import make_tts_provider

_CACHE_DIR = PROJECT_ROOT / ".cache" / "tts"


class TtsService:
    def __init__(self) -> None:
        self._provider = None
        self._failed = False
        self._lock = threading.Lock()

    def _get(self):
        if self._provider is None and not self._failed:
            try:
                self._provider = make_tts_provider(get_settings())
            except Exception:
                self._failed = True
        return self._provider

    def available(self) -> bool:
        p = self._get()
        return bool(p and getattr(p, "available", False))

    def synth(self, text: str, voice: VoiceAssignment | None) -> Path | None:
        p = self._get()
        if not p or not getattr(p, "available", False) or not text.strip():
            return None
        sid = voice.speaker_id if voice else 0
        scale = voice.length_scale if voice else 1.0
        key = hashlib.sha256(f"{sid}|{scale}|{text}".encode()).hexdigest()[:16]
        out = _CACHE_DIR / f"{key}.wav"
        if out.exists():
            return out
        with self._lock:
            if out.exists():
                return out
            return p.synth_to_file(text, voice, out)


TTS = TtsService()


def voice_seed(sus_id: str, *, female: bool | None = None) -> VoiceAssignment:
    """A stable VoiceAssignment from a suspect id when no CaseFile suspect is available
    (e.g. the golden case). Gender-matched if known, else any of the 10 voices."""
    seed = int.from_bytes(hashlib.sha256(sus_id.encode()).digest()[:4], "big")
    if female is True:
        speaker = 5 + (seed % 5)
    elif female is False:
        speaker = seed % 5
    else:
        speaker = seed % 10
    return VoiceAssignment(
        engine="supertonic",
        speaker_id=speaker,
        length_scale=round(0.95 + (seed % 20) / 100.0, 3),
    )