Oliver Nitsche Claude Sonnet 4.6 commited on
Commit
be17ec0
Β·
1 Parent(s): f7c60d9

Make TTS robust: fallback to espeak, catch play_sound exceptions

Browse files

Previously speak() silently returned when espeak-ng was missing (only
a log warning) and left play_sound() exceptions uncaught, which would
crash the state machine and leave the robot mute.

Changes:
- Auto-detect espeak-ng or espeak via shutil.which (cached per process);
logs clearly at startup which engine is found or that none is available.
- Wrap play_sound() in its own try/except so an audio driver error no
longer propagates and crashes the GREETING/ENROLLING state.
- Switch sleep calculation from words_per_second to words_per_minute to
match the espeak -s flag unit; use 2.0 WPS (conservative at 140 WPM)
+ 1 s buffer so short phrases like "Hi Oliver!" are never cut off.
- Wrap the entire body in a broad except so any unexpected error is
logged rather than silently swallowed or propagated.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. recognizer/tts.py +59 -19
recognizer/tts.py CHANGED
@@ -1,39 +1,79 @@
1
- """Text-to-speech via espeak-ng β†’ WAV file β†’ Reachy Mini audio device."""
2
 
3
  import logging
4
  import os
 
5
  import subprocess
6
  import tempfile
7
  import time
 
8
 
9
  logger = logging.getLogger(__name__)
10
 
 
 
 
11
 
12
- def speak(text: str, reachy_mini, words_per_second: float = 2.5) -> None:
13
- """Synthesize *text* with espeak-ng and play it through the robot's speakers.
14
 
15
- Blocks until playback should be finished.
16
- Requires: sudo apt-get install espeak-ng
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  """
18
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
19
- wav_path = f.name
 
 
 
20
  try:
 
 
 
21
  subprocess.run(
22
- ["espeak-ng", "-s", "140", "-w", wav_path, "--", text],
23
  check=True,
24
  timeout=15,
25
  capture_output=True,
26
  )
27
- reachy_mini.media.play_sound(wav_path)
28
- # play_sound() returns immediately; wait for GStreamer playback to finish.
29
- estimated = len(text.split()) / words_per_second + 0.8
30
- time.sleep(max(estimated, 1.0))
31
- except FileNotFoundError:
32
- logger.warning("espeak-ng not found β€” install with: sudo apt-get install espeak-ng")
 
 
 
 
 
 
 
33
  except subprocess.CalledProcessError as exc:
34
- logger.warning("espeak-ng failed: %s", exc.stderr.decode(errors="replace"))
 
 
 
 
35
  finally:
36
- try:
37
- os.unlink(wav_path)
38
- except OSError:
39
- pass
 
 
1
+ """Text-to-speech via espeak-ng (or espeak) β†’ WAV β†’ Reachy Mini audio."""
2
 
3
  import logging
4
  import os
5
+ import shutil
6
  import subprocess
7
  import tempfile
8
  import time
9
+ from typing import Optional
10
 
11
  logger = logging.getLogger(__name__)
12
 
13
+ # Cached path to the espeak binary (checked once per process).
14
+ _ESPEAK_CMD: Optional[str] = None
15
+ _ESPEAK_CHECKED = False
16
 
 
 
17
 
18
+ def _find_espeak() -> Optional[str]:
19
+ """Return the path to espeak-ng or espeak, or None if neither is available."""
20
+ global _ESPEAK_CMD, _ESPEAK_CHECKED
21
+ if not _ESPEAK_CHECKED:
22
+ _ESPEAK_CMD = shutil.which("espeak-ng") or shutil.which("espeak")
23
+ if _ESPEAK_CMD:
24
+ logger.info("TTS engine: %s", _ESPEAK_CMD)
25
+ else:
26
+ logger.warning(
27
+ "No TTS engine found. Install one with: "
28
+ "sudo apt-get install espeak-ng"
29
+ )
30
+ _ESPEAK_CHECKED = True
31
+ return _ESPEAK_CMD
32
+
33
+
34
+ def speak(text: str, reachy_mini, words_per_minute: int = 140) -> None:
35
+ """Synthesize *text* and play it through the robot's speakers.
36
+
37
+ Tries espeak-ng first, falls back to espeak.
38
+ Blocks until playback should be complete.
39
  """
40
+ cmd = _find_espeak()
41
+ if cmd is None:
42
+ return # warning already logged at startup
43
+
44
+ wav_path: Optional[str] = None
45
  try:
46
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
47
+ wav_path = f.name
48
+
49
  subprocess.run(
50
+ [cmd, "-s", str(words_per_minute), "-w", wav_path, "--", text],
51
  check=True,
52
  timeout=15,
53
  capture_output=True,
54
  )
55
+
56
+ try:
57
+ reachy_mini.media.play_sound(wav_path)
58
+ except Exception as exc:
59
+ logger.warning("play_sound failed: %s", exc)
60
+ return
61
+
62
+ # play_sound() is async β€” wait for playback to finish.
63
+ # Use 2.0 WPS (conservative for 140 WPM) + 1 s buffer.
64
+ wps = words_per_minute / 60.0
65
+ estimated = len(text.split()) / wps + 1.0
66
+ time.sleep(max(estimated, 1.5))
67
+
68
  except subprocess.CalledProcessError as exc:
69
+ logger.warning("espeak failed: %s", exc.stderr.decode(errors="replace"))
70
+ except subprocess.TimeoutExpired:
71
+ logger.warning("espeak timed out synthesising: %r", text)
72
+ except Exception as exc:
73
+ logger.warning("TTS error: %s", exc)
74
  finally:
75
+ if wav_path:
76
+ try:
77
+ os.unlink(wav_path)
78
+ except OSError:
79
+ pass