the-echo / echo /tools /voice.py
frankyy03's picture
Deploy The Echo (MockLLM path): Gradio app + echo package
897d5bd verified
"""
echo/tools/voice.py
-------------------
The voice tool gives each echo a spoken line — the same "you" at different ages
and emotional registers. Hearing an alternate self speak is the visceral beat
that text can't reach.
VoiceTool interface:
* MockVoice — writes a tiny placeholder file path, no audio deps (testing).
* PiperVoice — wraps a local TTS (Piper/Coqui) at deploy time, with the
WorldState's voice_hint shaping pace/warmth, and an optional
pitch shift so each branch's voice differs subtly.
"""
from __future__ import annotations
import os
from abc import ABC, abstractmethod
from ..core.world_state import WorldState # module import (not the core package) avoids a cycle
class VoiceTool(ABC):
@abstractmethod
def speak(self, state: WorldState, out_dir: str) -> str:
"""Synthesize state.voice_line; return the audio file path."""
...
class MockVoice(VoiceTool):
"""Writes a placeholder .txt 'audio' marker so the pipeline runs offline."""
def speak(self, state: WorldState, out_dir: str) -> str:
os.makedirs(out_dir, exist_ok=True)
path = os.path.join(out_dir, f"voice_{state.node_id}.txt")
with open(path, "w") as f:
f.write(f"[VOICE hint={state.tone.voice_hint!r}] {state.voice_line}")
return path
class PiperVoice(VoiceTool):
"""
Deploy-time TTS. Lazy-imports the synth backend. A small pitch offset keyed
to the branch makes parallel selves sound like the same person tuned
differently (older/wearier/brighter).
"""
def __init__(self, model_path: str, base_pitch: float = 1.0):
self.model_path = model_path
self.base_pitch = base_pitch
def speak(self, state: WorldState, out_dir: str) -> str:
os.makedirs(out_dir, exist_ok=True)
path = os.path.join(out_dir, f"voice_{state.node_id}.wav")
# pitch nudged by emotional valence: down when struggling, up when light
pitch = self.base_pitch + 0.04 * state.tone.valence
self._synthesize(state.voice_line, path, pitch, state.tone.voice_hint)
return path
def _synthesize(self, text: str, path: str, pitch: float, hint: str) -> None:
# Lazy import keeps the package importable without TTS installed.
try:
from piper import PiperVoice as _Piper # type: ignore
except Exception:
# graceful fallback: write a marker instead of crashing the demo
with open(path + ".txt", "w") as f:
f.write(f"[TTS pitch={pitch:.2f} hint={hint!r}] {text}")
return
voice = _Piper.load(self.model_path)
with open(path, "wb") as f:
voice.synthesize(text, f, length_scale=1.0, sentence_silence=0.3)