onitsche commited on
Commit
6fba46b
·
1 Parent(s): cbd73b1

Use piper-tts neural TTS for natural German voice

Browse files
Files changed (3) hide show
  1. .gitignore +1 -0
  2. pyproject.toml +1 -0
  3. talk/tts.py +64 -33
.gitignore CHANGED
@@ -1,3 +1,4 @@
1
  __pycache__/
2
  *.egg-info/
3
  build/
 
 
1
  __pycache__/
2
  *.egg-info/
3
  build/
4
+ talk/models/
pyproject.toml CHANGED
@@ -11,6 +11,7 @@ readme = "README.md"
11
  requires-python = ">=3.10"
12
  dependencies = [
13
  "reachy-mini",
 
14
  ]
15
  keywords = ["reachy-mini-app", "reachy-mini"]
16
 
 
11
  requires-python = ">=3.10"
12
  dependencies = [
13
  "reachy-mini",
14
+ "piper-tts",
15
  ]
16
  keywords = ["reachy-mini-app", "reachy-mini"]
17
 
talk/tts.py CHANGED
@@ -1,53 +1,88 @@
1
- """Text-to-speech via espeak-ng (or espeak) → WAV → Reachy Mini audio."""
 
 
 
 
 
 
2
 
3
  import logging
4
  import os
5
- import shutil
6
- import subprocess
7
- import tempfile
8
  import time
 
 
9
  from typing import Optional
10
 
11
  logger = logging.getLogger(__name__)
12
 
13
- _ESPEAK_CMD: Optional[str] = None
14
- _ESPEAK_CHECKED = False
 
 
 
 
15
 
 
 
16
 
17
- def _find_espeak() -> Optional[str]:
18
- global _ESPEAK_CMD, _ESPEAK_CHECKED
19
- if not _ESPEAK_CHECKED:
20
- _ESPEAK_CMD = shutil.which("espeak-ng") or shutil.which("espeak")
21
- if _ESPEAK_CMD:
22
- logger.info("TTS engine: %s", _ESPEAK_CMD)
23
- else:
24
- logger.warning(
25
- "No TTS engine found. Install with: sudo apt-get install espeak-ng"
26
- )
27
- _ESPEAK_CHECKED = True
28
- return _ESPEAK_CMD
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
 
31
  def speak(text: str, reachy_mini, words_per_minute: int = 120, lang: str = "de") -> None:
32
- """Synthesize *text* in the given language and play it through the robot's speakers.
33
 
 
34
  Blocks until playback should be complete.
35
  """
36
- cmd = _find_espeak()
37
- if cmd is None:
38
- return
39
 
 
40
  wav_path: Optional[str] = None
 
41
  try:
42
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
43
  wav_path = f.name
44
 
45
- subprocess.run(
46
- [cmd, "-v", lang, "-s", str(words_per_minute), "-w", wav_path, "--", text],
47
- check=True,
48
- timeout=15,
49
- capture_output=True,
50
- )
 
 
 
 
 
 
 
 
51
 
52
  try:
53
  reachy_mini.media.play_sound(wav_path)
@@ -55,15 +90,11 @@ def speak(text: str, reachy_mini, words_per_minute: int = 120, lang: str = "de")
55
  logger.warning("play_sound failed: %s", exc)
56
  return
57
 
58
- # play_sound() is async — wait for playback to finish.
59
  wps = words_per_minute / 60.0
60
  estimated = len(text.split()) / wps + 1.0
61
  time.sleep(max(estimated, 1.5))
62
 
63
- except subprocess.CalledProcessError as exc:
64
- logger.warning("espeak failed: %s", exc.stderr.decode(errors="replace"))
65
- except subprocess.TimeoutExpired:
66
- logger.warning("espeak timed out synthesising: %r", text)
67
  except Exception as exc:
68
  logger.warning("TTS error: %s", exc)
69
  finally:
 
1
+ """Text-to-speech via piper-tts (neural, offline) → WAV → Reachy Mini audio.
2
+
3
+ The German model (de_DE-thorsten-high, ~65 MB) is downloaded from Hugging Face
4
+ on first run and cached in talk/models/. Fully offline thereafter.
5
+
6
+ Falls back to espeak-ng if piper-tts is not installed.
7
+ """
8
 
9
  import logging
10
  import os
 
 
 
11
  import time
12
+ import wave
13
+ from pathlib import Path
14
  from typing import Optional
15
 
16
  logger = logging.getLogger(__name__)
17
 
18
+ _MODELS_DIR = Path(__file__).parent / "models"
19
+ _MODEL_NAME = "de_DE-thorsten-high"
20
+ _MODEL_BASE_URL = (
21
+ "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0"
22
+ "/de/de_DE/thorsten/high/"
23
+ )
24
 
25
+ _voice = None
26
+ _voice_loaded = False
27
 
28
+
29
+ def _load_voice():
30
+ global _voice, _voice_loaded
31
+ if _voice_loaded:
32
+ return _voice
33
+ _voice_loaded = True
34
+ try:
35
+ import urllib.request
36
+ from piper.voice import PiperVoice
37
+
38
+ _MODELS_DIR.mkdir(exist_ok=True)
39
+ onnx_path = _MODELS_DIR / f"{_MODEL_NAME}.onnx"
40
+ json_path = _MODELS_DIR / f"{_MODEL_NAME}.onnx.json"
41
+
42
+ if not onnx_path.exists():
43
+ logger.info("Downloading piper model %s (~65 MB) …", _MODEL_NAME)
44
+ urllib.request.urlretrieve(_MODEL_BASE_URL + f"{_MODEL_NAME}.onnx", onnx_path)
45
+ urllib.request.urlretrieve(_MODEL_BASE_URL + f"{_MODEL_NAME}.onnx.json", json_path)
46
+ logger.info("Piper model downloaded.")
47
+
48
+ _voice = PiperVoice.load(str(onnx_path), config_path=str(json_path))
49
+ logger.info("Piper TTS ready (%s)", _MODEL_NAME)
50
+ except ImportError:
51
+ logger.warning("piper-tts not installed — falling back to espeak-ng")
52
+ except Exception as exc:
53
+ logger.warning("Failed to load piper: %s", exc)
54
+ return _voice
55
 
56
 
57
  def speak(text: str, reachy_mini, words_per_minute: int = 120, lang: str = "de") -> None:
58
+ """Synthesize *text* and play it through the robot's speakers.
59
 
60
+ Uses piper-tts (neural) when available, espeak-ng otherwise.
61
  Blocks until playback should be complete.
62
  """
63
+ import tempfile
 
 
64
 
65
+ voice = _load_voice()
66
  wav_path: Optional[str] = None
67
+
68
  try:
69
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
70
  wav_path = f.name
71
 
72
+ if voice is not None:
73
+ with wave.open(wav_path, "wb") as wav_file:
74
+ voice.synthesize(text, wav_file)
75
+ else:
76
+ import shutil
77
+ import subprocess
78
+ cmd = shutil.which("espeak-ng") or shutil.which("espeak")
79
+ if cmd is None:
80
+ logger.warning("No TTS engine available. Install piper-tts or espeak-ng.")
81
+ return
82
+ subprocess.run(
83
+ [cmd, "-v", lang, "-s", str(words_per_minute), "-w", wav_path, "--", text],
84
+ check=True, timeout=15, capture_output=True,
85
+ )
86
 
87
  try:
88
  reachy_mini.media.play_sound(wav_path)
 
90
  logger.warning("play_sound failed: %s", exc)
91
  return
92
 
93
+ # play_sound() is async — wait for estimated playback duration.
94
  wps = words_per_minute / 60.0
95
  estimated = len(text.split()) / wps + 1.0
96
  time.sleep(max(estimated, 1.5))
97
 
 
 
 
 
98
  except Exception as exc:
99
  logger.warning("TTS error: %s", exc)
100
  finally: