Spaces:

onitsche
/

talk

Running

App Files Files Community

onitsche commited on 16 days ago

Commit

6fba46b

1 Parent(s): cbd73b1

Use piper-tts neural TTS for natural German voice

Browse files

Files changed (3) hide show

.gitignore +1 -0
pyproject.toml +1 -0
talk/tts.py +64 -33

.gitignore CHANGED Viewed

@@ -1,3 +1,4 @@
 __pycache__/
 *.egg-info/
 build/

 __pycache__/
 *.egg-info/
 build/
+talk/models/

pyproject.toml CHANGED Viewed

@@ -11,6 +11,7 @@ readme = "README.md"
 requires-python = ">=3.10"
 dependencies = [
     "reachy-mini",
 ]
 keywords = ["reachy-mini-app", "reachy-mini"]

 requires-python = ">=3.10"
 dependencies = [
     "reachy-mini",
+    "piper-tts",
 ]
 keywords = ["reachy-mini-app", "reachy-mini"]

talk/tts.py CHANGED Viewed

@@ -1,53 +1,88 @@
-"""Text-to-speech via espeak-ng (or espeak) → WAV → Reachy Mini audio."""
 import logging
 import os
-import shutil
-import subprocess
-import tempfile
 import time
 from typing import Optional
 logger = logging.getLogger(__name__)
-_ESPEAK_CMD: Optional[str] = None
-_ESPEAK_CHECKED = False
-def _find_espeak() -> Optional[str]:
-    global _ESPEAK_CMD, _ESPEAK_CHECKED
-    if not _ESPEAK_CHECKED:
-        _ESPEAK_CMD = shutil.which("espeak-ng") or shutil.which("espeak")
-        if _ESPEAK_CMD:
-            logger.info("TTS engine: %s", _ESPEAK_CMD)
-        else:
-            logger.warning(
-                "No TTS engine found. Install with: sudo apt-get install espeak-ng"
-            )
-        _ESPEAK_CHECKED = True
-    return _ESPEAK_CMD
 def speak(text: str, reachy_mini, words_per_minute: int = 120, lang: str = "de") -> None:
-    """Synthesize *text* in the given language and play it through the robot's speakers.
     Blocks until playback should be complete.
     """
-    cmd = _find_espeak()
-    if cmd is None:
-        return
     wav_path: Optional[str] = None
     try:
         with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
             wav_path = f.name
-        subprocess.run(
-            [cmd, "-v", lang, "-s", str(words_per_minute), "-w", wav_path, "--", text],
-            check=True,
-            timeout=15,
-            capture_output=True,
-        )
         try:
             reachy_mini.media.play_sound(wav_path)
@@ -55,15 +90,11 @@ def speak(text: str, reachy_mini, words_per_minute: int = 120, lang: str = "de")
             logger.warning("play_sound failed: %s", exc)
             return
-        # play_sound() is async — wait for playback to finish.
         wps = words_per_minute / 60.0
         estimated = len(text.split()) / wps + 1.0
         time.sleep(max(estimated, 1.5))
-    except subprocess.CalledProcessError as exc:
-        logger.warning("espeak failed: %s", exc.stderr.decode(errors="replace"))
-    except subprocess.TimeoutExpired:
-        logger.warning("espeak timed out synthesising: %r", text)
     except Exception as exc:
         logger.warning("TTS error: %s", exc)
     finally:

+"""Text-to-speech via piper-tts (neural, offline) → WAV → Reachy Mini audio.
+The German model (de_DE-thorsten-high, ~65 MB) is downloaded from Hugging Face
+on first run and cached in talk/models/. Fully offline thereafter.
+Falls back to espeak-ng if piper-tts is not installed.
+"""
 import logging
 import os
 import time
+import wave
+from pathlib import Path
 from typing import Optional
 logger = logging.getLogger(__name__)
+_MODELS_DIR = Path(__file__).parent / "models"
+_MODEL_NAME = "de_DE-thorsten-high"
+_MODEL_BASE_URL = (
+    "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0"
+    "/de/de_DE/thorsten/high/"
+)
+_voice = None
+_voice_loaded = False
+def _load_voice():
+    global _voice, _voice_loaded
+    if _voice_loaded:
+        return _voice
+    _voice_loaded = True
+    try:
+        import urllib.request
+        from piper.voice import PiperVoice
+        _MODELS_DIR.mkdir(exist_ok=True)
+        onnx_path = _MODELS_DIR / f"{_MODEL_NAME}.onnx"
+        json_path = _MODELS_DIR / f"{_MODEL_NAME}.onnx.json"
+        if not onnx_path.exists():
+            logger.info("Downloading piper model %s (~65 MB) …", _MODEL_NAME)
+            urllib.request.urlretrieve(_MODEL_BASE_URL + f"{_MODEL_NAME}.onnx", onnx_path)
+            urllib.request.urlretrieve(_MODEL_BASE_URL + f"{_MODEL_NAME}.onnx.json", json_path)
+            logger.info("Piper model downloaded.")
+        _voice = PiperVoice.load(str(onnx_path), config_path=str(json_path))
+        logger.info("Piper TTS ready (%s)", _MODEL_NAME)
+    except ImportError:
+        logger.warning("piper-tts not installed — falling back to espeak-ng")
+    except Exception as exc:
+        logger.warning("Failed to load piper: %s", exc)
+    return _voice
 def speak(text: str, reachy_mini, words_per_minute: int = 120, lang: str = "de") -> None:
+    """Synthesize *text* and play it through the robot's speakers.
+    Uses piper-tts (neural) when available, espeak-ng otherwise.
     Blocks until playback should be complete.
     """
+    import tempfile
+    voice = _load_voice()
     wav_path: Optional[str] = None
     try:
         with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
             wav_path = f.name
+        if voice is not None:
+            with wave.open(wav_path, "wb") as wav_file:
+                voice.synthesize(text, wav_file)
+        else:
+            import shutil
+            import subprocess
+            cmd = shutil.which("espeak-ng") or shutil.which("espeak")
+            if cmd is None:
+                logger.warning("No TTS engine available. Install piper-tts or espeak-ng.")
+                return
+            subprocess.run(
+                [cmd, "-v", lang, "-s", str(words_per_minute), "-w", wav_path, "--", text],
+                check=True, timeout=15, capture_output=True,
+            )
         try:
             reachy_mini.media.play_sound(wav_path)
             logger.warning("play_sound failed: %s", exc)
             return
+        # play_sound() is async — wait for estimated playback duration.
         wps = words_per_minute / 60.0
         estimated = len(text.split()) / wps + 1.0
         time.sleep(max(estimated, 1.5))
     except Exception as exc:
         logger.warning("TTS error: %s", exc)
     finally: