pnocera commited on
Commit
3f27f46
·
0 Parent(s):

Initial commit - Goshi voice AI for Reachy Mini

Browse files
.gitignore ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+ .eggs/
8
+ *.egg
9
+ .venv/
10
+ venv/
11
+ *.log
12
+ .DS_Store
README.md ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Goshi
3
+ emoji: 🍣
4
+ colorFrom: purple
5
+ colorTo: pink
6
+ sdk: static
7
+ pinned: false
8
+ short_description: Voice AI companion for Reachy Mini - Say "Sushi" to chat!
9
+ tags:
10
+ - reachy_mini
11
+ - reachy_mini_python_app
12
+ ---
13
+
14
+ # Goshi 🍣
15
+
16
+ A voice AI companion for Reachy Mini! Say "Sushi" to start a conversation.
17
+
18
+ ## Features
19
+
20
+ - 🎙️ **Wake Word Detection** - Say "Sushi" to activate
21
+ - 🗣️ **Speech-to-Text** - Whisper-powered transcription
22
+ - 🤖 **LLM Conversation** - Natural dialogue (local or cloud)
23
+ - 🔊 **Text-to-Speech** - Piper neural TTS response
24
+ - 🌍 **Bilingual** - French and English support
25
+ - 🤸 **Expressive Gestures** - Robot reacts while talking
26
+
27
+ ## Architecture
28
+
29
+ ```
30
+ Reachy Mini (Pi 4) Workstation (GPU)
31
+ ┌─────────────────┐ ┌──────────────────┐
32
+ │ Goshi Client │◄─SRT───►│ Goshi Server │
33
+ │ - Wake word │ │ - VAD │
34
+ │ - A/V capture │ │ - Whisper STT │
35
+ │ - TTS playback │ │ - LLM │
36
+ └─────────────────┘ │ - Piper TTS │
37
+ └──────────────────┘
38
+ ```
39
+
40
+ ## Requirements
41
+
42
+ - Reachy Mini robot
43
+ - Goshi server running on a workstation (with NVIDIA GPU)
44
+ - Network connectivity between robot and server
45
+
46
+ ## Installation
47
+
48
+ ### On Reachy Mini
49
+
50
+ Install via the app manager or:
51
+
52
+ ```bash
53
+ pip install git+https://huggingface.co/spaces/pnocera/goshi
54
+ ```
55
+
56
+ ### Server Setup
57
+
58
+ See the [Goshi Server Repository](https://github.com/pnocera/goshi) for server setup instructions.
59
+
60
+ ## Configuration
61
+
62
+ After installation, update `config.yaml` with your server IP:
63
+
64
+ ```yaml
65
+ server:
66
+ host: "YOUR_SERVER_IP" # e.g., 192.168.1.57
67
+ srt_receive_port: 8888
68
+ srt_send_port: 8889
69
+ ```
70
+
71
+ ## Usage
72
+
73
+ 1. Start the Goshi server on your workstation
74
+ 2. Install the app on Reachy Mini
75
+ 3. Configure server IP in settings
76
+ 4. Say "Sushi" to start talking!
77
+
78
+ ## License
79
+
80
+ MIT License - See LICENSE for details.
81
+
82
+ ## Credits
83
+
84
+ Built with ❤️ for Reachy Mini by Pollen Robotics community.
config.yaml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Goshi Configuration
2
+ # Voice AI companion for Reachy Mini
3
+
4
+ server:
5
+ host: "192.168.1.57" # Update with your Goshi server IP
6
+ srt_receive_port: 8888
7
+ srt_send_port: 8889
8
+ srt_latency_ms: 125
9
+
10
+ kws:
11
+ wake_word: "Sushi"
12
+ keywords_file: "./keywords.txt"
13
+ model_path: "./models/kws"
14
+ sensitivity: 0.5
15
+ sample_rate: 16000
16
+
17
+ audio:
18
+ input_device: "hw:0"
19
+ output_device: "hw:0"
20
+ sample_rate: 16000
21
+ channels: 1
22
+
23
+ streaming:
24
+ enable_video: true
25
+ video_width: 1280
26
+ video_height: 720
27
+ video_framerate: 30
28
+ silence_timeout_s: 30
29
+ use_hardware_encoder: true
goshi/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ """Goshi - Voice AI companion for Reachy Mini."""
2
+
3
+ __version__ = "0.1.0"
goshi/config.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Configuration management for Pi client."""
2
+
3
+ import os
4
+ from dataclasses import dataclass, field
5
+ from pathlib import Path
6
+ from typing import Optional
7
+
8
+ import yaml
9
+
10
+
11
+ @dataclass
12
+ class ServerConfig:
13
+ """Server connection settings."""
14
+ host: str = "192.168.1.100"
15
+ srt_receive_port: int = 8888
16
+ srt_send_port: int = 8889
17
+ srt_latency_ms: int = 125
18
+
19
+
20
+ @dataclass
21
+ class KWSConfig:
22
+ """Keyword spotting settings."""
23
+ wake_word: str = "Sushi"
24
+ keywords_file: str = "./keywords.txt"
25
+ model_path: str = "./models/kws"
26
+ sensitivity: float = 0.5
27
+ sample_rate: int = 16000
28
+
29
+
30
+ @dataclass
31
+ class AudioConfig:
32
+ """Audio settings."""
33
+ input_device: str = "hw:0"
34
+ output_device: str = "hw:0"
35
+ sample_rate: int = 16000
36
+ channels: int = 1
37
+
38
+
39
+ @dataclass
40
+ class StreamingConfig:
41
+ """Streaming settings."""
42
+ enable_video: bool = True
43
+ video_width: int = 1280
44
+ video_height: int = 720
45
+ video_framerate: int = 30
46
+ silence_timeout_s: int = 30
47
+ use_hardware_encoder: bool = True
48
+
49
+
50
+ @dataclass
51
+ class Config:
52
+ """Root configuration."""
53
+ server: ServerConfig = field(default_factory=ServerConfig)
54
+ kws: KWSConfig = field(default_factory=KWSConfig)
55
+ audio: AudioConfig = field(default_factory=AudioConfig)
56
+ streaming: StreamingConfig = field(default_factory=StreamingConfig)
57
+
58
+ @classmethod
59
+ def from_file(cls, path: str) -> "Config":
60
+ """Load configuration from YAML file."""
61
+ config_path = Path(path)
62
+ if not config_path.exists():
63
+ return cls()
64
+
65
+ with open(config_path) as f:
66
+ data = yaml.safe_load(f) or {}
67
+
68
+ config = cls()
69
+
70
+ if "server" in data:
71
+ config.server = ServerConfig(**data["server"])
72
+ if "kws" in data:
73
+ config.kws = KWSConfig(**data["kws"])
74
+ if "audio" in data:
75
+ config.audio = AudioConfig(**data["audio"])
76
+ if "streaming" in data:
77
+ config.streaming = StreamingConfig(**data["streaming"])
78
+
79
+ return config
80
+
81
+ def to_dict(self) -> dict:
82
+ """Convert to dictionary."""
83
+ return {
84
+ "server": {
85
+ "host": self.server.host,
86
+ "srt_receive_port": self.server.srt_receive_port,
87
+ "srt_send_port": self.server.srt_send_port,
88
+ "srt_latency_ms": self.server.srt_latency_ms,
89
+ },
90
+ "kws": {
91
+ "wake_word": self.kws.wake_word,
92
+ "keywords_file": self.kws.keywords_file,
93
+ "model_path": self.kws.model_path,
94
+ "sensitivity": self.kws.sensitivity,
95
+ "sample_rate": self.kws.sample_rate,
96
+ },
97
+ "audio": {
98
+ "input_device": self.audio.input_device,
99
+ "output_device": self.audio.output_device,
100
+ "sample_rate": self.audio.sample_rate,
101
+ "channels": self.audio.channels,
102
+ },
103
+ "streaming": {
104
+ "enable_video": self.streaming.enable_video,
105
+ "video_width": self.streaming.video_width,
106
+ "video_height": self.streaming.video_height,
107
+ "video_framerate": self.streaming.video_framerate,
108
+ "silence_timeout_s": self.streaming.silence_timeout_s,
109
+ "use_hardware_encoder": self.streaming.use_hardware_encoder,
110
+ },
111
+ }
112
+
113
+ def save(self, path: str) -> None:
114
+ """Save configuration to YAML file."""
115
+ with open(path, "w") as f:
116
+ yaml.dump(self.to_dict(), f, default_flow_style=False)
goshi/gstreamer_pipelines.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """GStreamer pipeline definitions for Reachy Mini."""
2
+
3
+ import logging
4
+ from dataclasses import dataclass
5
+ from typing import Optional
6
+
7
+ import gi
8
+ gi.require_version("Gst", "1.0")
9
+ from gi.repository import Gst, GLib
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ @dataclass
15
+ class PipelineConfig:
16
+ """Pipeline configuration."""
17
+ server_host: str
18
+ srt_port: int
19
+ srt_latency_ms: int
20
+ audio_device: str = "hw:0"
21
+ sample_rate: int = 16000
22
+ enable_video: bool = True
23
+ video_width: int = 1280
24
+ video_height: int = 720
25
+ video_framerate: int = 30
26
+ use_hardware_encoder: bool = True
27
+
28
+
29
+ def build_capture_pipeline(config: PipelineConfig) -> str:
30
+ """Build the A/V capture and SRT send pipeline."""
31
+
32
+ # Video encoding (use hardware if available)
33
+ if config.use_hardware_encoder:
34
+ video_encoder = """
35
+ v4l2convert !
36
+ v4l2h264enc extra-controls="controls,repeat_sequence_header=1" !
37
+ video/x-h264,level=(string)4 ! h264parse
38
+ """
39
+ else:
40
+ video_encoder = """
41
+ videoconvert !
42
+ x264enc tune=zerolatency bitrate=2000 speed-preset=ultrafast !
43
+ h264parse
44
+ """
45
+
46
+ if config.enable_video:
47
+ # Full A/V pipeline
48
+ pipeline = f"""
49
+ libcamerasrc !
50
+ video/x-raw,width={config.video_width},height={config.video_height},framerate={config.video_framerate}/1 !
51
+ {video_encoder} ! mux.
52
+
53
+ alsasrc device={config.audio_device} !
54
+ audioconvert ! audioresample !
55
+ audio/x-raw,rate={config.sample_rate},channels=1,format=S16LE !
56
+ opusenc bitrate=32000 ! opusparse ! mux.
57
+
58
+ mpegtsmux name=mux !
59
+ srtsink uri=srt://{config.server_host}:{config.srt_port}?mode=caller&latency={config.srt_latency_ms}
60
+ """
61
+ else:
62
+ # Audio-only pipeline
63
+ pipeline = f"""
64
+ alsasrc device={config.audio_device} !
65
+ audioconvert ! audioresample !
66
+ audio/x-raw,rate={config.sample_rate},channels=1,format=S16LE !
67
+ opusenc bitrate=32000 ! opusparse !
68
+ srtsink uri=srt://{config.server_host}:{config.srt_port}?mode=caller&latency={config.srt_latency_ms}
69
+ """
70
+
71
+ return " ".join(pipeline.split())
72
+
73
+
74
+ def build_playback_pipeline(config: PipelineConfig) -> str:
75
+ """Build the TTS audio playback pipeline."""
76
+ pipeline = f"""
77
+ srtsrc uri=srt://{config.server_host}:{config.srt_port}?mode=listener&latency={config.srt_latency_ms} !
78
+ opusparse ! opusdec !
79
+ audioconvert ! audioresample !
80
+ audio/x-raw,rate={config.sample_rate},channels=1 !
81
+ alsasink device={config.audio_device}
82
+ """
83
+ return " ".join(pipeline.split())
84
+
85
+
86
+ class Pipeline:
87
+ """Wrapper for GStreamer pipeline."""
88
+
89
+ def __init__(self, pipeline_str: str, name: str = "pipeline"):
90
+ Gst.init(None)
91
+ self.name = name
92
+ self.pipeline_str = pipeline_str
93
+ self.pipeline: Optional[Gst.Pipeline] = None
94
+ self.bus: Optional[Gst.Bus] = None
95
+ self._running = False
96
+
97
+ def start(self) -> bool:
98
+ """Start the pipeline."""
99
+ try:
100
+ self.pipeline = Gst.parse_launch(self.pipeline_str)
101
+ self.bus = self.pipeline.get_bus()
102
+ self.bus.add_signal_watch()
103
+ self.bus.connect("message", self._on_message)
104
+
105
+ ret = self.pipeline.set_state(Gst.State.PLAYING)
106
+ if ret == Gst.StateChangeReturn.FAILURE:
107
+ logger.error(f"Failed to start {self.name} pipeline")
108
+ return False
109
+
110
+ self._running = True
111
+ logger.info(f"{self.name} pipeline started")
112
+ return True
113
+
114
+ except Exception as e:
115
+ logger.error(f"Failed to create {self.name} pipeline: {e}")
116
+ return False
117
+
118
+ def stop(self) -> None:
119
+ """Stop the pipeline."""
120
+ if self.pipeline:
121
+ self.pipeline.set_state(Gst.State.NULL)
122
+ self.pipeline = None
123
+ self._running = False
124
+ logger.info(f"{self.name} pipeline stopped")
125
+
126
+ def is_running(self) -> bool:
127
+ """Check if pipeline is running."""
128
+ return self._running
129
+
130
+ def _on_message(self, bus, message):
131
+ """Handle GStreamer messages."""
132
+ msg_type = message.type
133
+
134
+ if msg_type == Gst.MessageType.ERROR:
135
+ err, debug = message.parse_error()
136
+ logger.error(f"{self.name} pipeline error: {err.message}")
137
+ logger.debug(f"Debug: {debug}")
138
+ self._running = False
139
+
140
+ elif msg_type == Gst.MessageType.EOS:
141
+ logger.info(f"{self.name} pipeline end of stream")
142
+ self._running = False
143
+
144
+ elif msg_type == Gst.MessageType.STATE_CHANGED:
145
+ if message.src == self.pipeline:
146
+ old, new, pending = message.parse_state_changed()
147
+ logger.debug(f"{self.name} state: {old.value_nick} -> {new.value_nick}")
148
+
149
+ def __enter__(self):
150
+ self.start()
151
+ return self
152
+
153
+ def __exit__(self, exc_type, exc_val, exc_tb):
154
+ self.stop()
155
+ return False
goshi/kws_daemon.py ADDED
@@ -0,0 +1,262 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Keyword Spotting Daemon for wake word detection."""
2
+
3
+ import logging
4
+ import queue
5
+ import threading
6
+ import time
7
+ from dataclasses import dataclass
8
+ from pathlib import Path
9
+ from typing import Callable, Optional
10
+
11
+ import numpy as np
12
+
13
+ try:
14
+ import sherpa_onnx
15
+ except ImportError:
16
+ sherpa_onnx = None
17
+
18
+ try:
19
+ import sounddevice as sd
20
+ except ImportError:
21
+ sd = None
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ @dataclass
27
+ class KWSConfig:
28
+ """KWS configuration."""
29
+ encoder_path: str
30
+ decoder_path: str
31
+ joiner_path: str
32
+ tokens_path: str
33
+ keywords_file: str
34
+ sample_rate: int = 16000
35
+ num_threads: int = 2
36
+ keywords_threshold: float = 0.25
37
+ keywords_score: float = 1.0
38
+ num_trailing_blanks: int = 1
39
+
40
+
41
+ class KeywordSpotter:
42
+ """Sherpa-ONNX Keyword Spotter wrapper."""
43
+
44
+ def __init__(self, config: KWSConfig):
45
+ if sherpa_onnx is None:
46
+ raise ImportError("sherpa_onnx not installed")
47
+
48
+ self.config = config
49
+ self.sample_rate = config.sample_rate
50
+
51
+ # Create keyword spotter
52
+ self.spotter = sherpa_onnx.KeywordSpotter(
53
+ tokens=config.tokens_path,
54
+ encoder=config.encoder_path,
55
+ decoder=config.decoder_path,
56
+ joiner=config.joiner_path,
57
+ num_threads=config.num_threads,
58
+ keywords_file=config.keywords_file,
59
+ keywords_score=config.keywords_score,
60
+ keywords_threshold=config.keywords_threshold,
61
+ num_trailing_blanks=config.num_trailing_blanks,
62
+ provider="cpu",
63
+ )
64
+
65
+ self.stream = self.spotter.create_stream()
66
+ logger.info("Keyword spotter initialized")
67
+
68
+ def process_audio(self, samples: np.ndarray) -> Optional[str]:
69
+ """Process audio samples, return keyword if detected."""
70
+ if len(samples) == 0:
71
+ return None
72
+
73
+ # Accept waveform
74
+ self.stream.accept_waveform(self.sample_rate, samples.astype(np.float32))
75
+
76
+ # Decode
77
+ while self.spotter.is_ready(self.stream):
78
+ self.spotter.decode_stream(self.stream)
79
+ result = self.spotter.get_result(self.stream)
80
+
81
+ if result:
82
+ # Reset stream after detection
83
+ self.spotter.reset_stream(self.stream)
84
+ logger.info(f"Keyword detected: {result}")
85
+ return result
86
+
87
+ return None
88
+
89
+ def reset(self) -> None:
90
+ """Reset the stream."""
91
+ self.spotter.reset_stream(self.stream)
92
+
93
+
94
+ class KWSDaemon:
95
+ """Daemon that listens for wake word and triggers callbacks."""
96
+
97
+ def __init__(
98
+ self,
99
+ config: KWSConfig,
100
+ on_wake_word: Callable[[], None],
101
+ ):
102
+ if sd is None:
103
+ raise ImportError("sounddevice not installed")
104
+
105
+ self.config = config
106
+ self.on_wake_word = on_wake_word
107
+
108
+ self.spotter: Optional[KeywordSpotter] = None
109
+ self.running = False
110
+ self.thread: Optional[threading.Thread] = None
111
+
112
+ # Audio queue for samples
113
+ self.audio_queue: queue.Queue = queue.Queue()
114
+
115
+ def start(self) -> None:
116
+ """Start the KWS daemon."""
117
+ if self.running:
118
+ return
119
+
120
+ # Initialize spotter
121
+ self.spotter = KeywordSpotter(self.config)
122
+
123
+ self.running = True
124
+ self.thread = threading.Thread(target=self._run, daemon=True)
125
+ self.thread.start()
126
+ logger.info("KWS daemon started")
127
+
128
+ def stop(self) -> None:
129
+ """Stop the KWS daemon."""
130
+ self.running = False
131
+ if self.thread:
132
+ self.thread.join(timeout=2.0)
133
+ self.spotter = None
134
+ logger.info("KWS daemon stopped")
135
+
136
+ def _run(self) -> None:
137
+ """Main daemon loop."""
138
+ sample_rate = self.config.sample_rate
139
+ chunk_duration = 0.1 # 100ms chunks
140
+ chunk_size = int(sample_rate * chunk_duration)
141
+
142
+ def audio_callback(indata, frames, time_info, status):
143
+ if status:
144
+ logger.warning(f"Audio status: {status}")
145
+ self.audio_queue.put(indata.copy())
146
+
147
+ try:
148
+ with sd.InputStream(
149
+ samplerate=sample_rate,
150
+ channels=1,
151
+ dtype="float32",
152
+ blocksize=chunk_size,
153
+ callback=audio_callback,
154
+ ):
155
+ logger.info("Audio input stream started")
156
+
157
+ while self.running:
158
+ try:
159
+ # Get audio chunk
160
+ samples = self.audio_queue.get(timeout=0.5)
161
+ samples = samples.flatten()
162
+
163
+ # Process
164
+ if self.spotter:
165
+ keyword = self.spotter.process_audio(samples)
166
+ if keyword:
167
+ self.on_wake_word()
168
+
169
+ except queue.Empty:
170
+ continue
171
+
172
+ except Exception as e:
173
+ logger.error(f"Audio stream error: {e}")
174
+ self.running = False
175
+
176
+ def is_running(self) -> bool:
177
+ """Check if daemon is running."""
178
+ return self.running
179
+
180
+
181
+ # Alternative: Simple VAD-based wake word (for testing without full KWS model)
182
+ class SimpleWakeWordDetector:
183
+ """Simple wake word detector using just VAD + pattern matching."""
184
+
185
+ def __init__(
186
+ self,
187
+ wake_word: str = "sushi",
188
+ vad_threshold: float = 0.5,
189
+ on_wake_word: Callable[[], None] = None,
190
+ ):
191
+ self.wake_word = wake_word.lower()
192
+ self.vad_threshold = vad_threshold
193
+ self.on_wake_word = on_wake_word
194
+ self.running = False
195
+ self.thread: Optional[threading.Thread] = None
196
+
197
+ # Use Silero VAD if available
198
+ try:
199
+ import sherpa_onnx
200
+ self.vad = self._create_vad()
201
+ except ImportError:
202
+ self.vad = None
203
+ logger.warning("sherpa_onnx not available, using energy-based detection")
204
+
205
+ def _create_vad(self):
206
+ """Create Silero VAD."""
207
+ vad_config = sherpa_onnx.VadModelConfig()
208
+ vad_config.silero_vad.model = "./models/silero_vad.onnx"
209
+ vad_config.silero_vad.threshold = self.vad_threshold
210
+ vad_config.silero_vad.min_silence_duration = 0.25
211
+ vad_config.silero_vad.min_speech_duration = 0.1
212
+ vad_config.sample_rate = 16000
213
+
214
+ return sherpa_onnx.VoiceActivityDetector(vad_config, buffer_size_in_seconds=5)
215
+
216
+ def start(self) -> None:
217
+ """Start detector."""
218
+ self.running = True
219
+ self.thread = threading.Thread(target=self._run, daemon=True)
220
+ self.thread.start()
221
+ logger.info("Simple wake word detector started (press Ctrl+C or say wake word)")
222
+
223
+ def stop(self) -> None:
224
+ """Stop detector."""
225
+ self.running = False
226
+ if self.thread:
227
+ self.thread.join(timeout=2.0)
228
+
229
+ def _run(self) -> None:
230
+ """Run detection loop."""
231
+ sample_rate = 16000
232
+ chunk_size = int(sample_rate * 0.1)
233
+
234
+ def audio_callback(indata, frames, time_info, status):
235
+ if status:
236
+ logger.warning(f"Audio status: {status}")
237
+
238
+ # Energy-based detection for testing
239
+ energy = np.sqrt(np.mean(indata ** 2))
240
+ if energy > 0.05: # Threshold
241
+ logger.debug(f"Audio energy: {energy:.4f}")
242
+ # For now, just detect loud audio as "wake word"
243
+ # In real use, this would trigger STT for verification
244
+
245
+ try:
246
+ with sd.InputStream(
247
+ samplerate=sample_rate,
248
+ channels=1,
249
+ dtype="float32",
250
+ blocksize=chunk_size,
251
+ callback=audio_callback,
252
+ ):
253
+ while self.running:
254
+ time.sleep(0.1)
255
+
256
+ except Exception as e:
257
+ logger.error(f"Audio stream error: {e}")
258
+ self.running = False
259
+
260
+ def is_running(self) -> bool:
261
+ """Check if detector is running."""
262
+ return self.running
goshi/main.py ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Goshi main app - Voice AI companion for Reachy Mini."""
2
+
3
+ import logging
4
+ import threading
5
+ import time
6
+ from typing import Optional
7
+
8
+ from pydantic import BaseModel
9
+ from reachy_mini import ReachyMini, ReachyMiniApp
10
+
11
+ from .config import Config
12
+ from .kws_daemon import SimpleWakeWordDetector
13
+ from .streaming_controller import StreamingController, StreamingConfig, StreamState
14
+
15
+ logger = logging.getLogger("goshi")
16
+
17
+
18
+ class Goshi(ReachyMiniApp):
19
+ """Voice AI companion app for Reachy Mini robot.
20
+
21
+ Say "Sushi" to start a conversation!
22
+ """
23
+
24
+ # Settings page URL - accessible from Reachy Mini web interface
25
+ custom_app_url: str | None = "http://0.0.0.0:8042"
26
+ # Use default media backend
27
+ request_media_backend: str | None = None
28
+
29
+ def run(self, reachy_mini: ReachyMini, stop_event: threading.Event) -> None:
30
+ """Main app loop.
31
+
32
+ Args:
33
+ reachy_mini: The Reachy Mini robot instance
34
+ stop_event: Event to signal app shutdown
35
+ """
36
+
37
+ # Load configuration
38
+ try:
39
+ config = Config.from_file("config.yaml")
40
+ logger.info(f"Configuration loaded, server: {config.server.host}")
41
+ except Exception as e:
42
+ logger.warning(f"Failed to load config, using defaults: {e}")
43
+ config = Config()
44
+
45
+ # Current settings state (can be updated via web UI)
46
+ server_host = config.server.host
47
+ is_listening = False
48
+ is_streaming = False
49
+
50
+ # ==========================================
51
+ # Settings API (FastAPI endpoints)
52
+ # ==========================================
53
+
54
+ class ServerSettings(BaseModel):
55
+ host: str
56
+
57
+ class StatusResponse(BaseModel):
58
+ is_listening: bool
59
+ is_streaming: bool
60
+ server_host: str
61
+
62
+ @self.settings_app.get("/status")
63
+ def get_status() -> StatusResponse:
64
+ """Get current app status."""
65
+ return StatusResponse(
66
+ is_listening=is_listening,
67
+ is_streaming=is_streaming,
68
+ server_host=server_host,
69
+ )
70
+
71
+ @self.settings_app.post("/server")
72
+ def update_server(settings: ServerSettings) -> dict:
73
+ """Update server host."""
74
+ nonlocal server_host
75
+ server_host = settings.host
76
+ logger.info(f"Server host updated to: {server_host}")
77
+ return {"host": server_host, "success": True}
78
+
79
+ @self.settings_app.post("/start")
80
+ def start_streaming() -> dict:
81
+ """Manually start streaming."""
82
+ if streaming and not streaming.is_streaming():
83
+ streaming.start_streaming()
84
+ return {"success": True, "message": "Streaming started"}
85
+ return {"success": False, "message": "Already streaming or not ready"}
86
+
87
+ @self.settings_app.post("/stop")
88
+ def stop_streaming_endpoint() -> dict:
89
+ """Manually stop streaming."""
90
+ if streaming and streaming.is_streaming():
91
+ streaming.stop_streaming()
92
+ return {"success": True, "message": "Streaming stopped"}
93
+ return {"success": False, "message": "Not streaming"}
94
+
95
+ # ==========================================
96
+ # Streaming Controller Setup
97
+ # ==========================================
98
+
99
+ streaming_config = StreamingConfig(
100
+ server_host=server_host,
101
+ srt_send_port=config.server.srt_receive_port,
102
+ srt_receive_port=config.server.srt_send_port,
103
+ srt_latency_ms=config.server.srt_latency_ms,
104
+ audio_device=config.audio.input_device,
105
+ enable_video=config.streaming.enable_video,
106
+ video_width=config.streaming.video_width,
107
+ video_height=config.streaming.video_height,
108
+ video_framerate=config.streaming.video_framerate,
109
+ silence_timeout_s=config.streaming.silence_timeout_s,
110
+ use_hardware_encoder=config.streaming.use_hardware_encoder,
111
+ )
112
+
113
+ def on_state_change(state: StreamState) -> None:
114
+ """Handle streaming state changes."""
115
+ nonlocal is_streaming
116
+ is_streaming = state == StreamState.STREAMING
117
+ logger.info(f"Streaming state: {state.value}")
118
+
119
+ # Antenna feedback on state change
120
+ if state == StreamState.STREAMING:
121
+ # Antennas up - listening
122
+ reachy_mini.set_antenna_targets([0.3, 0.3])
123
+ elif state == StreamState.IDLE:
124
+ # Antennas neutral
125
+ reachy_mini.set_antenna_targets([0, 0])
126
+
127
+ streaming = StreamingController(streaming_config, on_state_change)
128
+
129
+ # ==========================================
130
+ # Wake Word Detection
131
+ # ==========================================
132
+
133
+ def on_wake_word() -> None:
134
+ """Handle wake word detection."""
135
+ nonlocal is_listening
136
+ logger.info("Wake word 'Sushi' detected!")
137
+ is_listening = True
138
+
139
+ if not streaming.is_streaming():
140
+ # Excitement animation - antenna wiggle
141
+ for i in range(3):
142
+ reachy_mini.set_antenna_targets([0.5, 0.5])
143
+ time.sleep(0.1)
144
+ reachy_mini.set_antenna_targets([0, 0])
145
+ time.sleep(0.1)
146
+
147
+ # Update server host in case it changed
148
+ streaming.config.server_host = server_host
149
+ streaming.start_streaming()
150
+ else:
151
+ # Reset silence timeout
152
+ streaming.on_activity()
153
+
154
+ kws = SimpleWakeWordDetector(
155
+ wake_word=config.kws.wake_word,
156
+ on_wake_word=on_wake_word,
157
+ )
158
+
159
+ # ==========================================
160
+ # Main Loop
161
+ # ==========================================
162
+
163
+ try:
164
+ kws.start()
165
+ logger.info("Goshi started! Say 'Sushi' to begin...")
166
+
167
+ idle_animation_time = 0
168
+
169
+ while not stop_event.is_set():
170
+ t = time.time()
171
+
172
+ # Idle antenna animation when not streaming
173
+ if not streaming.is_streaming():
174
+ # Gentle breathing animation
175
+ if t - idle_animation_time > 3.0:
176
+ a = 0.1 * (1 + 0.3 * ((t * 0.5) % 1))
177
+ reachy_mini.set_antenna_targets([a, -a])
178
+ idle_animation_time = t
179
+
180
+ time.sleep(0.1)
181
+
182
+ except Exception as e:
183
+ logger.error(f"Error in main loop: {e}")
184
+ raise
185
+ finally:
186
+ logger.info("Shutting down Goshi...")
187
+ kws.stop()
188
+ streaming.stop_streaming()
189
+ # Return antennas to neutral
190
+ reachy_mini.set_antenna_targets([0, 0])
191
+
192
+
193
+ if __name__ == "__main__":
194
+ # Enable logging
195
+ logging.basicConfig(
196
+ level=logging.INFO,
197
+ format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
198
+ )
199
+
200
+ app = Goshi()
201
+ try:
202
+ app.wrapped_run()
203
+ except KeyboardInterrupt:
204
+ app.stop()
goshi/static/index.html ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+
4
+ <head>
5
+ <meta charset="UTF-8">
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
+ <title>Goshi Settings</title>
8
+ <style>
9
+ :root {
10
+ --primary: #8B5CF6;
11
+ --secondary: #EC4899;
12
+ --bg: #1a1a2e;
13
+ --card: #16213e;
14
+ --text: #eaeaea;
15
+ --success: #10b981;
16
+ --error: #ef4444;
17
+ }
18
+
19
+ * {
20
+ margin: 0;
21
+ padding: 0;
22
+ box-sizing: border-box;
23
+ }
24
+
25
+ body {
26
+ font-family: -apple-system, BlinkMacSystemFont, sans-serif;
27
+ background: var(--bg);
28
+ color: var(--text);
29
+ min-height: 100vh;
30
+ padding: 2rem;
31
+ }
32
+
33
+ .container {
34
+ max-width: 500px;
35
+ margin: 0 auto;
36
+ }
37
+
38
+ h1 {
39
+ font-size: 1.75rem;
40
+ margin-bottom: 2rem;
41
+ text-align: center;
42
+ }
43
+
44
+ .card {
45
+ background: var(--card);
46
+ padding: 1.5rem;
47
+ border-radius: 1rem;
48
+ margin-bottom: 1rem;
49
+ }
50
+
51
+ .status {
52
+ display: flex;
53
+ align-items: center;
54
+ gap: 0.75rem;
55
+ margin-bottom: 1rem;
56
+ }
57
+
58
+ .status-dot {
59
+ width: 12px;
60
+ height: 12px;
61
+ border-radius: 50%;
62
+ background: var(--error);
63
+ }
64
+
65
+ .status-dot.active {
66
+ background: var(--success);
67
+ }
68
+
69
+ label {
70
+ display: block;
71
+ margin-bottom: 0.5rem;
72
+ font-size: 0.9rem;
73
+ opacity: 0.8;
74
+ }
75
+
76
+ input {
77
+ width: 100%;
78
+ padding: 0.75rem;
79
+ border: 1px solid rgba(255, 255, 255, 0.1);
80
+ border-radius: 0.5rem;
81
+ background: rgba(0, 0, 0, 0.3);
82
+ color: var(--text);
83
+ font-size: 1rem;
84
+ margin-bottom: 1rem;
85
+ }
86
+
87
+ button {
88
+ width: 100%;
89
+ padding: 0.75rem;
90
+ border: none;
91
+ border-radius: 0.5rem;
92
+ background: linear-gradient(135deg, var(--primary), var(--secondary));
93
+ color: white;
94
+ font-size: 1rem;
95
+ font-weight: 600;
96
+ cursor: pointer;
97
+ transition: opacity 0.2s;
98
+ }
99
+
100
+ button:hover {
101
+ opacity: 0.9;
102
+ }
103
+
104
+ button:active {
105
+ opacity: 0.8;
106
+ }
107
+
108
+ .btn-group {
109
+ display: flex;
110
+ gap: 0.5rem;
111
+ }
112
+
113
+ .btn-group button {
114
+ flex: 1;
115
+ }
116
+
117
+ .btn-secondary {
118
+ background: rgba(255, 255, 255, 0.1);
119
+ }
120
+
121
+ .message {
122
+ padding: 0.75rem;
123
+ border-radius: 0.5rem;
124
+ margin-top: 1rem;
125
+ text-align: center;
126
+ display: none;
127
+ }
128
+
129
+ .message.success {
130
+ background: rgba(16, 185, 129, 0.2);
131
+ display: block;
132
+ }
133
+
134
+ .message.error {
135
+ background: rgba(239, 68, 68, 0.2);
136
+ display: block;
137
+ }
138
+ </style>
139
+ </head>
140
+
141
+ <body>
142
+ <div class="container">
143
+ <h1>🍣 Goshi Settings</h1>
144
+
145
+ <div class="card">
146
+ <div class="status">
147
+ <span class="status-dot" id="statusDot"></span>
148
+ <span id="statusText">Checking...</span>
149
+ </div>
150
+
151
+ <label>Server Host</label>
152
+ <input type="text" id="serverHost" placeholder="192.168.1.57">
153
+
154
+ <button onclick="updateServer()">Save Server</button>
155
+
156
+ <div id="message" class="message"></div>
157
+ </div>
158
+
159
+ <div class="card">
160
+ <label>Streaming Control</label>
161
+ <div class="btn-group">
162
+ <button onclick="startStream()">Start</button>
163
+ <button class="btn-secondary" onclick="stopStream()">Stop</button>
164
+ </div>
165
+ </div>
166
+ </div>
167
+
168
+ <script>
169
+ async function fetchStatus() {
170
+ try {
171
+ const res = await fetch('/status');
172
+ const data = await res.json();
173
+
174
+ const dot = document.getElementById('statusDot');
175
+ const text = document.getElementById('statusText');
176
+ const host = document.getElementById('serverHost');
177
+
178
+ if (data.is_streaming) {
179
+ dot.classList.add('active');
180
+ text.textContent = 'Streaming';
181
+ } else if (data.is_listening) {
182
+ dot.classList.add('active');
183
+ text.textContent = 'Listening';
184
+ } else {
185
+ dot.classList.remove('active');
186
+ text.textContent = 'Idle';
187
+ }
188
+
189
+ host.value = data.server_host;
190
+ } catch (e) {
191
+ document.getElementById('statusText').textContent = 'Disconnected';
192
+ }
193
+ }
194
+
195
+ async function updateServer() {
196
+ const host = document.getElementById('serverHost').value;
197
+ try {
198
+ const res = await fetch('/server', {
199
+ method: 'POST',
200
+ headers: { 'Content-Type': 'application/json' },
201
+ body: JSON.stringify({ host })
202
+ });
203
+ const data = await res.json();
204
+ showMessage('Server updated!', 'success');
205
+ } catch (e) {
206
+ showMessage('Failed to update', 'error');
207
+ }
208
+ }
209
+
210
+ async function startStream() {
211
+ try {
212
+ await fetch('/start', { method: 'POST' });
213
+ showMessage('Streaming started', 'success');
214
+ setTimeout(fetchStatus, 500);
215
+ } catch (e) {
216
+ showMessage('Failed to start', 'error');
217
+ }
218
+ }
219
+
220
+ async function stopStream() {
221
+ try {
222
+ await fetch('/stop', { method: 'POST' });
223
+ showMessage('Streaming stopped', 'success');
224
+ setTimeout(fetchStatus, 500);
225
+ } catch (e) {
226
+ showMessage('Failed to stop', 'error');
227
+ }
228
+ }
229
+
230
+ function showMessage(text, type) {
231
+ const msg = document.getElementById('message');
232
+ msg.textContent = text;
233
+ msg.className = 'message ' + type;
234
+ setTimeout(() => msg.className = 'message', 3000);
235
+ }
236
+
237
+ fetchStatus();
238
+ setInterval(fetchStatus, 5000);
239
+ </script>
240
+ </body>
241
+
242
+ </html>
goshi/streaming_controller.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Streaming controller - manages capture and playback pipelines."""
2
+
3
+ import logging
4
+ import threading
5
+ import time
6
+ from dataclasses import dataclass
7
+ from enum import Enum
8
+ from typing import Callable, Optional
9
+
10
+ from gstreamer_pipelines import Pipeline, PipelineConfig, build_capture_pipeline, build_playback_pipeline
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class StreamState(Enum):
16
+ """Stream state."""
17
+ IDLE = "idle"
18
+ STREAMING = "streaming"
19
+ ERROR = "error"
20
+
21
+
22
+ @dataclass
23
+ class StreamingConfig:
24
+ """Streaming controller configuration."""
25
+ server_host: str
26
+ srt_send_port: int
27
+ srt_receive_port: int
28
+ srt_latency_ms: int
29
+ audio_device: str
30
+ enable_video: bool
31
+ video_width: int
32
+ video_height: int
33
+ video_framerate: int
34
+ silence_timeout_s: int
35
+ use_hardware_encoder: bool
36
+
37
+
38
+ class StreamingController:
39
+ """Controls audio/video streaming to/from server."""
40
+
41
+ def __init__(
42
+ self,
43
+ config: StreamingConfig,
44
+ on_state_change: Optional[Callable[[StreamState], None]] = None,
45
+ ):
46
+ self.config = config
47
+ self.on_state_change = on_state_change
48
+
49
+ self.state = StreamState.IDLE
50
+ self.capture_pipeline: Optional[Pipeline] = None
51
+ self.playback_pipeline: Optional[Pipeline] = None
52
+
53
+ self._lock = threading.Lock()
54
+ self._timeout_timer: Optional[threading.Timer] = None
55
+ self._last_activity = time.time()
56
+
57
+ def start_streaming(self) -> bool:
58
+ """Start capture and playback pipelines."""
59
+ with self._lock:
60
+ if self.state == StreamState.STREAMING:
61
+ logger.debug("Already streaming")
62
+ return True
63
+
64
+ logger.info("Starting streaming...")
65
+
66
+ # Create capture pipeline config
67
+ capture_config = PipelineConfig(
68
+ server_host=self.config.server_host,
69
+ srt_port=self.config.srt_send_port,
70
+ srt_latency_ms=self.config.srt_latency_ms,
71
+ audio_device=self.config.audio_device,
72
+ enable_video=self.config.enable_video,
73
+ video_width=self.config.video_width,
74
+ video_height=self.config.video_height,
75
+ video_framerate=self.config.video_framerate,
76
+ use_hardware_encoder=self.config.use_hardware_encoder,
77
+ )
78
+
79
+ # Create playback pipeline config
80
+ playback_config = PipelineConfig(
81
+ server_host=self.config.server_host,
82
+ srt_port=self.config.srt_receive_port,
83
+ srt_latency_ms=self.config.srt_latency_ms,
84
+ audio_device=self.config.audio_device,
85
+ )
86
+
87
+ # Start capture pipeline
88
+ capture_str = build_capture_pipeline(capture_config)
89
+ self.capture_pipeline = Pipeline(capture_str, "capture")
90
+ if not self.capture_pipeline.start():
91
+ self._set_state(StreamState.ERROR)
92
+ return False
93
+
94
+ # Start playback pipeline
95
+ playback_str = build_playback_pipeline(playback_config)
96
+ self.playback_pipeline = Pipeline(playback_str, "playback")
97
+ if not self.playback_pipeline.start():
98
+ self.capture_pipeline.stop()
99
+ self._set_state(StreamState.ERROR)
100
+ return False
101
+
102
+ self._set_state(StreamState.STREAMING)
103
+ self._start_timeout_timer()
104
+
105
+ return True
106
+
107
+ def stop_streaming(self) -> None:
108
+ """Stop all pipelines."""
109
+ with self._lock:
110
+ self._cancel_timeout_timer()
111
+
112
+ if self.capture_pipeline:
113
+ self.capture_pipeline.stop()
114
+ self.capture_pipeline = None
115
+
116
+ if self.playback_pipeline:
117
+ self.playback_pipeline.stop()
118
+ self.playback_pipeline = None
119
+
120
+ self._set_state(StreamState.IDLE)
121
+ logger.info("Streaming stopped")
122
+
123
+ def on_activity(self) -> None:
124
+ """Called when there's user activity (speech, etc.)."""
125
+ self._last_activity = time.time()
126
+ self._reset_timeout_timer()
127
+
128
+ def _set_state(self, state: StreamState) -> None:
129
+ """Set state and notify callback."""
130
+ if self.state != state:
131
+ self.state = state
132
+ if self.on_state_change:
133
+ self.on_state_change(state)
134
+
135
+ def _start_timeout_timer(self) -> None:
136
+ """Start the silence timeout timer."""
137
+ self._cancel_timeout_timer()
138
+ self._timeout_timer = threading.Timer(
139
+ self.config.silence_timeout_s,
140
+ self._on_timeout,
141
+ )
142
+ self._timeout_timer.daemon = True
143
+ self._timeout_timer.start()
144
+
145
+ def _reset_timeout_timer(self) -> None:
146
+ """Reset the timeout timer."""
147
+ self._cancel_timeout_timer()
148
+ self._start_timeout_timer()
149
+
150
+ def _cancel_timeout_timer(self) -> None:
151
+ """Cancel the timeout timer."""
152
+ if self._timeout_timer:
153
+ self._timeout_timer.cancel()
154
+ self._timeout_timer = None
155
+
156
+ def _on_timeout(self) -> None:
157
+ """Called when silence timeout is reached."""
158
+ logger.info(f"Silence timeout ({self.config.silence_timeout_s}s), stopping stream")
159
+ self.stop_streaming()
160
+
161
+ def is_streaming(self) -> bool:
162
+ """Check if currently streaming."""
163
+ return self.state == StreamState.STREAMING
164
+
165
+ def get_state(self) -> StreamState:
166
+ """Get current state."""
167
+ return self.state
168
+
169
+ def __enter__(self):
170
+ self.start_streaming()
171
+ return self
172
+
173
+ def __exit__(self, exc_type, exc_val, exc_tb):
174
+ self.stop_streaming()
175
+ return False
index.html ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Goshi - Voice AI for Reachy Mini</title>
7
+ <link rel="stylesheet" href="style.css">
8
+ <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap" rel="stylesheet">
9
+ </head>
10
+ <body>
11
+ <div class="container">
12
+ <div class="hero">
13
+ <span class="emoji">🍣</span>
14
+ <h1>Goshi</h1>
15
+ <p class="subtitle">Voice AI Companion for Reachy Mini</p>
16
+ <p class="tagline">Say <span class="highlight">"Sushi"</span> to start a conversation!</p>
17
+ </div>
18
+
19
+ <div class="features">
20
+ <div class="feature">
21
+ <span class="feature-icon">🎙️</span>
22
+ <h3>Wake Word</h3>
23
+ <p>Say "Sushi" to activate the robot</p>
24
+ </div>
25
+ <div class="feature">
26
+ <span class="feature-icon">🤖</span>
27
+ <h3>Smart AI</h3>
28
+ <p>Natural conversations powered by LLMs</p>
29
+ </div>
30
+ <div class="feature">
31
+ <span class="feature-icon">🌍</span>
32
+ <h3>Bilingual</h3>
33
+ <p>Speaks French & English</p>
34
+ </div>
35
+ <div class="feature">
36
+ <span class="feature-icon">⚡</span>
37
+ <h3>Low Latency</h3>
38
+ <p>&lt;800ms response time</p>
39
+ </div>
40
+ <div class="feature">
41
+ <span class="feature-icon">🔒</span>
42
+ <h3>Local AI</h3>
43
+ <p>Private, on-premise processing</p>
44
+ </div>
45
+ <div class="feature">
46
+ <span class="feature-icon">🤸</span>
47
+ <h3>Expressive</h3>
48
+ <p>Head & antenna animations</p>
49
+ </div>
50
+ </div>
51
+
52
+ <div class="install-section">
53
+ <h2>🚀 Installation</h2>
54
+ <div class="install-steps">
55
+ <div class="step">
56
+ <span class="step-number">1</span>
57
+ <div class="step-content">
58
+ <h4>Install on Reachy Mini</h4>
59
+ <p>Via the app manager or command line:</p>
60
+ <code>pip install git+https://huggingface.co/spaces/pnocera/goshi</code>
61
+ </div>
62
+ </div>
63
+ <div class="step">
64
+ <span class="step-number">2</span>
65
+ <div class="step-content">
66
+ <h4>Start the Goshi Server</h4>
67
+ <p>On your workstation (requires NVIDIA GPU):</p>
68
+ <code>./goshi-server --config config.toml</code>
69
+ </div>
70
+ </div>
71
+ <div class="step">
72
+ <span class="step-number">3</span>
73
+ <div class="step-content">
74
+ <h4>Configure & Talk!</h4>
75
+ <p>Update config.yaml with server IP, then say "Sushi"!</p>
76
+ </div>
77
+ </div>
78
+ </div>
79
+ </div>
80
+
81
+ <div class="architecture">
82
+ <h2>🏗️ Architecture</h2>
83
+ <pre>
84
+ Reachy Mini (Raspberry Pi 4) Workstation (RTX GPU)
85
+ ┌────────────────────────┐ ┌─────────────────────────┐
86
+ │ Goshi Client │◄── SRT ──►│ Goshi Server │
87
+ │ ┌──────────────────┐ │ │ ┌───────────────────┐ │
88
+ │ │ Wake Word (KWS) │ │ │ │ VAD (Silero) │ │
89
+ │ │ Audio Capture │──┼───────────┼─►│ STT (Whisper) │ │
90
+ │ │ TTS Playback │◄─┼───────────┼──│ LLM (Ollama) │ │
91
+ │ │ Video Stream │ │ │ │ TTS (Piper) │ │
92
+ │ └──────────────────┘ │ │ └───────────────────┘ │
93
+ └────────────────────────┘ └─────────────────────────┘
94
+ </pre>
95
+ </div>
96
+
97
+ <div class="footer">
98
+ <p>Built with ❤️ for Reachy Mini</p>
99
+ <p class="links">
100
+ <a href="https://github.com/pnocera/goshi">GitHub</a> ·
101
+ <a href="https://pollen-robotics.com">Pollen Robotics</a> ·
102
+ <a href="https://huggingface.co/spaces/pnocera/goshi">Hugging Face</a>
103
+ </p>
104
+ </div>
105
+ </div>
106
+ </body>
107
+ </html>
pyproject.toml ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "goshi"
7
+ version = "0.1.0"
8
+ description = "Voice AI companion for Reachy Mini - Say 'Sushi' to start a conversation!"
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = {text = "MIT"}
12
+ authors = [
13
+ {name = "Pierre Nocera", email = "pierre@example.com"}
14
+ ]
15
+ dependencies = [
16
+ "reachy-mini",
17
+ "PyYAML>=6.0",
18
+ "sounddevice>=0.4.6",
19
+ "numpy>=1.24.0",
20
+ "pydantic>=2.0",
21
+ ]
22
+ keywords = ["reachy-mini-app", "voice-ai", "robot"]
23
+
24
+ [project.entry-points."reachy_mini_apps"]
25
+ goshi = "goshi.main:Goshi"
26
+
27
+ [tool.setuptools]
28
+ package-dir = { "" = "." }
29
+ include-package-data = true
30
+
31
+ [tool.setuptools.packages.find]
32
+ where = ["."]
33
+
34
+ [tool.setuptools.package-data]
35
+ goshi = ["**/*"]
style.css ADDED
@@ -0,0 +1,236 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ :root {
2
+ --primary: #8B5CF6;
3
+ --secondary: #EC4899;
4
+ --accent: #F59E0B;
5
+ --bg: #0F0F0F;
6
+ --bg-card: rgba(255, 255, 255, 0.05);
7
+ --bg-hover: rgba(255, 255, 255, 0.08);
8
+ --text: #FFFFFF;
9
+ --text-muted: rgba(255, 255, 255, 0.6);
10
+ --border: rgba(255, 255, 255, 0.1);
11
+ --gradient: linear-gradient(135deg, var(--primary), var(--secondary));
12
+ }
13
+
14
+ * {
15
+ margin: 0;
16
+ padding: 0;
17
+ box-sizing: border-box;
18
+ }
19
+
20
+ body {
21
+ font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
22
+ background: var(--bg);
23
+ color: var(--text);
24
+ min-height: 100vh;
25
+ line-height: 1.6;
26
+ }
27
+
28
+ .container {
29
+ max-width: 1000px;
30
+ margin: 0 auto;
31
+ padding: 2rem;
32
+ }
33
+
34
+ /* Hero Section */
35
+ .hero {
36
+ text-align: center;
37
+ padding: 4rem 0 3rem;
38
+ }
39
+
40
+ .emoji {
41
+ font-size: 6rem;
42
+ display: block;
43
+ margin-bottom: 1rem;
44
+ animation: bounce 2s ease-in-out infinite;
45
+ }
46
+
47
+ @keyframes bounce {
48
+ 0%, 100% { transform: translateY(0); }
49
+ 50% { transform: translateY(-10px); }
50
+ }
51
+
52
+ h1 {
53
+ font-size: 4rem;
54
+ font-weight: 700;
55
+ background: var(--gradient);
56
+ -webkit-background-clip: text;
57
+ -webkit-text-fill-color: transparent;
58
+ background-clip: text;
59
+ margin-bottom: 0.5rem;
60
+ }
61
+
62
+ .subtitle {
63
+ font-size: 1.5rem;
64
+ color: var(--text-muted);
65
+ margin-bottom: 1rem;
66
+ }
67
+
68
+ .tagline {
69
+ font-size: 1.25rem;
70
+ color: var(--text-muted);
71
+ }
72
+
73
+ .highlight {
74
+ background: var(--gradient);
75
+ -webkit-background-clip: text;
76
+ -webkit-text-fill-color: transparent;
77
+ background-clip: text;
78
+ font-weight: 600;
79
+ }
80
+
81
+ /* Features Grid */
82
+ .features {
83
+ display: grid;
84
+ grid-template-columns: repeat(3, 1fr);
85
+ gap: 1.5rem;
86
+ margin: 3rem 0;
87
+ }
88
+
89
+ @media (max-width: 768px) {
90
+ .features {
91
+ grid-template-columns: repeat(2, 1fr);
92
+ }
93
+ }
94
+
95
+ @media (max-width: 480px) {
96
+ .features {
97
+ grid-template-columns: 1fr;
98
+ }
99
+ }
100
+
101
+ .feature {
102
+ background: var(--bg-card);
103
+ padding: 2rem;
104
+ border-radius: 1rem;
105
+ text-align: center;
106
+ border: 1px solid var(--border);
107
+ transition: all 0.3s ease;
108
+ }
109
+
110
+ .feature:hover {
111
+ background: var(--bg-hover);
112
+ transform: translateY(-5px);
113
+ border-color: var(--primary);
114
+ }
115
+
116
+ .feature-icon {
117
+ font-size: 2.5rem;
118
+ display: block;
119
+ margin-bottom: 1rem;
120
+ }
121
+
122
+ .feature h3 {
123
+ font-size: 1.1rem;
124
+ font-weight: 600;
125
+ margin-bottom: 0.5rem;
126
+ }
127
+
128
+ .feature p {
129
+ font-size: 0.9rem;
130
+ color: var(--text-muted);
131
+ }
132
+
133
+ /* Install Section */
134
+ .install-section {
135
+ background: var(--bg-card);
136
+ padding: 2.5rem;
137
+ border-radius: 1.5rem;
138
+ margin: 3rem 0;
139
+ border: 1px solid var(--border);
140
+ }
141
+
142
+ .install-section h2 {
143
+ text-align: center;
144
+ margin-bottom: 2rem;
145
+ font-size: 1.75rem;
146
+ }
147
+
148
+ .install-steps {
149
+ display: flex;
150
+ flex-direction: column;
151
+ gap: 1.5rem;
152
+ }
153
+
154
+ .step {
155
+ display: flex;
156
+ align-items: flex-start;
157
+ gap: 1rem;
158
+ }
159
+
160
+ .step-number {
161
+ background: var(--gradient);
162
+ color: white;
163
+ width: 2.5rem;
164
+ height: 2.5rem;
165
+ border-radius: 50%;
166
+ display: flex;
167
+ align-items: center;
168
+ justify-content: center;
169
+ font-weight: 700;
170
+ flex-shrink: 0;
171
+ }
172
+
173
+ .step-content h4 {
174
+ font-size: 1.1rem;
175
+ margin-bottom: 0.25rem;
176
+ }
177
+
178
+ .step-content p {
179
+ color: var(--text-muted);
180
+ font-size: 0.9rem;
181
+ margin-bottom: 0.5rem;
182
+ }
183
+
184
+ .step-content code {
185
+ display: inline-block;
186
+ background: rgba(0, 0, 0, 0.5);
187
+ padding: 0.5rem 1rem;
188
+ border-radius: 0.5rem;
189
+ font-family: 'JetBrains Mono', 'Fira Code', monospace;
190
+ font-size: 0.85rem;
191
+ color: var(--accent);
192
+ border: 1px solid var(--border);
193
+ }
194
+
195
+ /* Architecture */
196
+ .architecture {
197
+ background: var(--bg-card);
198
+ padding: 2rem;
199
+ border-radius: 1rem;
200
+ margin: 2rem 0;
201
+ border: 1px solid var(--border);
202
+ }
203
+
204
+ .architecture h2 {
205
+ text-align: center;
206
+ margin-bottom: 1.5rem;
207
+ }
208
+
209
+ .architecture pre {
210
+ font-family: 'JetBrains Mono', 'Fira Code', monospace;
211
+ font-size: 0.75rem;
212
+ overflow-x: auto;
213
+ color: var(--text-muted);
214
+ line-height: 1.4;
215
+ }
216
+
217
+ /* Footer */
218
+ .footer {
219
+ text-align: center;
220
+ padding: 3rem 0 2rem;
221
+ color: var(--text-muted);
222
+ }
223
+
224
+ .footer .links {
225
+ margin-top: 0.5rem;
226
+ }
227
+
228
+ .footer a {
229
+ color: var(--primary);
230
+ text-decoration: none;
231
+ transition: color 0.2s;
232
+ }
233
+
234
+ .footer a:hover {
235
+ color: var(--secondary);
236
+ }