Spaces:

pnocera
/

goshi

Running

App Files Files Community

pnocera commited on Dec 30, 2025

Commit

3f27f46

0 Parent(s):

Initial commit - Goshi voice AI for Reachy Mini

Browse files

Files changed (13) hide show

.gitignore +12 -0
README.md +84 -0
config.yaml +29 -0
goshi/__init__.py +3 -0
goshi/config.py +116 -0
goshi/gstreamer_pipelines.py +155 -0
goshi/kws_daemon.py +262 -0
goshi/main.py +204 -0
goshi/static/index.html +242 -0
goshi/streaming_controller.py +175 -0
index.html +107 -0
pyproject.toml +35 -0
style.css +236 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,12 @@

+__pycache__/
+*.py[cod]
+*$py.class
+*.egg-info/
+dist/
+build/
+.eggs/
+*.egg
+.venv/
+venv/
+*.log
+.DS_Store

README.md ADDED Viewed

	@@ -0,0 +1,84 @@

+---
+title: Goshi
+emoji: 🍣
+colorFrom: purple
+colorTo: pink
+sdk: static
+pinned: false
+short_description: Voice AI companion for Reachy Mini - Say "Sushi" to chat!
+tags:
+  - reachy_mini
+  - reachy_mini_python_app
+---
+# Goshi 🍣
+A voice AI companion for Reachy Mini! Say "Sushi" to start a conversation.
+## Features
+- 🎙️ **Wake Word Detection** - Say "Sushi" to activate
+- 🗣️ **Speech-to-Text** - Whisper-powered transcription
+- 🤖 **LLM Conversation** - Natural dialogue (local or cloud)
+- 🔊 **Text-to-Speech** - Piper neural TTS response
+- 🌍 **Bilingual** - French and English support
+- 🤸 **Expressive Gestures** - Robot reacts while talking
+## Architecture
+```
+Reachy Mini (Pi 4)          Workstation (GPU)
+┌─────────────────┐         ┌──────────────────┐
+│  Goshi Client   │◄─SRT───►│   Goshi Server   │
+│  - Wake word    │         │   - VAD          │
+│  - A/V capture  │         │   - Whisper STT  │
+│  - TTS playback │         │   - LLM          │
+└─────────────────┘         │   - Piper TTS    │
+                            └──────────────────┘
+```
+## Requirements
+- Reachy Mini robot
+- Goshi server running on a workstation (with NVIDIA GPU)
+- Network connectivity between robot and server
+## Installation
+### On Reachy Mini
+Install via the app manager or:
+```bash
+pip install git+https://huggingface.co/spaces/pnocera/goshi
+```
+### Server Setup
+See the [Goshi Server Repository](https://github.com/pnocera/goshi) for server setup instructions.
+## Configuration
+After installation, update `config.yaml` with your server IP:
+```yaml
+server:
+  host: "YOUR_SERVER_IP"  # e.g., 192.168.1.57
+  srt_receive_port: 8888
+  srt_send_port: 8889
+```
+## Usage
+1. Start the Goshi server on your workstation
+2. Install the app on Reachy Mini
+3. Configure server IP in settings
+4. Say "Sushi" to start talking!
+## License
+MIT License - See LICENSE for details.
+## Credits
+Built with ❤️ for Reachy Mini by Pollen Robotics community.

config.yaml ADDED Viewed

	@@ -0,0 +1,29 @@

+# Goshi Configuration
+# Voice AI companion for Reachy Mini
+server:
+  host: "192.168.1.57"  # Update with your Goshi server IP
+  srt_receive_port: 8888
+  srt_send_port: 8889
+  srt_latency_ms: 125
+kws:
+  wake_word: "Sushi"
+  keywords_file: "./keywords.txt"
+  model_path: "./models/kws"
+  sensitivity: 0.5
+  sample_rate: 16000
+audio:
+  input_device: "hw:0"
+  output_device: "hw:0"
+  sample_rate: 16000
+  channels: 1
+streaming:
+  enable_video: true
+  video_width: 1280
+  video_height: 720
+  video_framerate: 30
+  silence_timeout_s: 30
+  use_hardware_encoder: true

goshi/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ """Goshi - Voice AI companion for Reachy Mini."""
2	+
3	+ __version__ = "0.1.0"

goshi/config.py ADDED Viewed

	@@ -0,0 +1,116 @@

+"""Configuration management for Pi client."""
+import os
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Optional
+import yaml
+@dataclass
+class ServerConfig:
+    """Server connection settings."""
+    host: str = "192.168.1.100"
+    srt_receive_port: int = 8888
+    srt_send_port: int = 8889
+    srt_latency_ms: int = 125
+@dataclass
+class KWSConfig:
+    """Keyword spotting settings."""
+    wake_word: str = "Sushi"
+    keywords_file: str = "./keywords.txt"
+    model_path: str = "./models/kws"
+    sensitivity: float = 0.5
+    sample_rate: int = 16000
+@dataclass
+class AudioConfig:
+    """Audio settings."""
+    input_device: str = "hw:0"
+    output_device: str = "hw:0"
+    sample_rate: int = 16000
+    channels: int = 1
+@dataclass
+class StreamingConfig:
+    """Streaming settings."""
+    enable_video: bool = True
+    video_width: int = 1280
+    video_height: int = 720
+    video_framerate: int = 30
+    silence_timeout_s: int = 30
+    use_hardware_encoder: bool = True
+@dataclass
+class Config:
+    """Root configuration."""
+    server: ServerConfig = field(default_factory=ServerConfig)
+    kws: KWSConfig = field(default_factory=KWSConfig)
+    audio: AudioConfig = field(default_factory=AudioConfig)
+    streaming: StreamingConfig = field(default_factory=StreamingConfig)
+    @classmethod
+    def from_file(cls, path: str) -> "Config":
+        """Load configuration from YAML file."""
+        config_path = Path(path)
+        if not config_path.exists():
+            return cls()
+        with open(config_path) as f:
+            data = yaml.safe_load(f) or {}
+        config = cls()
+        if "server" in data:
+            config.server = ServerConfig(**data["server"])
+        if "kws" in data:
+            config.kws = KWSConfig(**data["kws"])
+        if "audio" in data:
+            config.audio = AudioConfig(**data["audio"])
+        if "streaming" in data:
+            config.streaming = StreamingConfig(**data["streaming"])
+        return config
+    def to_dict(self) -> dict:
+        """Convert to dictionary."""
+        return {
+            "server": {
+                "host": self.server.host,
+                "srt_receive_port": self.server.srt_receive_port,
+                "srt_send_port": self.server.srt_send_port,
+                "srt_latency_ms": self.server.srt_latency_ms,
+            },
+            "kws": {
+                "wake_word": self.kws.wake_word,
+                "keywords_file": self.kws.keywords_file,
+                "model_path": self.kws.model_path,
+                "sensitivity": self.kws.sensitivity,
+                "sample_rate": self.kws.sample_rate,
+            },
+            "audio": {
+                "input_device": self.audio.input_device,
+                "output_device": self.audio.output_device,
+                "sample_rate": self.audio.sample_rate,
+                "channels": self.audio.channels,
+            },
+            "streaming": {
+                "enable_video": self.streaming.enable_video,
+                "video_width": self.streaming.video_width,
+                "video_height": self.streaming.video_height,
+                "video_framerate": self.streaming.video_framerate,
+                "silence_timeout_s": self.streaming.silence_timeout_s,
+                "use_hardware_encoder": self.streaming.use_hardware_encoder,
+            },
+        }
+    def save(self, path: str) -> None:
+        """Save configuration to YAML file."""
+        with open(path, "w") as f:
+            yaml.dump(self.to_dict(), f, default_flow_style=False)

goshi/gstreamer_pipelines.py ADDED Viewed

	@@ -0,0 +1,155 @@

+"""GStreamer pipeline definitions for Reachy Mini."""
+import logging
+from dataclasses import dataclass
+from typing import Optional
+import gi
+gi.require_version("Gst", "1.0")
+from gi.repository import Gst, GLib
+logger = logging.getLogger(__name__)
+@dataclass
+class PipelineConfig:
+    """Pipeline configuration."""
+    server_host: str
+    srt_port: int
+    srt_latency_ms: int
+    audio_device: str = "hw:0"
+    sample_rate: int = 16000
+    enable_video: bool = True
+    video_width: int = 1280
+    video_height: int = 720
+    video_framerate: int = 30
+    use_hardware_encoder: bool = True
+def build_capture_pipeline(config: PipelineConfig) -> str:
+    """Build the A/V capture and SRT send pipeline."""
+    # Video encoding (use hardware if available)
+    if config.use_hardware_encoder:
+        video_encoder = """
+            v4l2convert !
+            v4l2h264enc extra-controls="controls,repeat_sequence_header=1" !
+            video/x-h264,level=(string)4 ! h264parse
+        """
+    else:
+        video_encoder = """
+            videoconvert !
+            x264enc tune=zerolatency bitrate=2000 speed-preset=ultrafast !
+            h264parse
+        """
+    if config.enable_video:
+        # Full A/V pipeline
+        pipeline = f"""
+            libcamerasrc !
+            video/x-raw,width={config.video_width},height={config.video_height},framerate={config.video_framerate}/1 !
+            {video_encoder} ! mux.
+            alsasrc device={config.audio_device} !
+            audioconvert ! audioresample !
+            audio/x-raw,rate={config.sample_rate},channels=1,format=S16LE !
+            opusenc bitrate=32000 ! opusparse ! mux.
+            mpegtsmux name=mux !
+            srtsink uri=srt://{config.server_host}:{config.srt_port}?mode=caller&latency={config.srt_latency_ms}
+        """
+    else:
+        # Audio-only pipeline
+        pipeline = f"""
+            alsasrc device={config.audio_device} !
+            audioconvert ! audioresample !
+            audio/x-raw,rate={config.sample_rate},channels=1,format=S16LE !
+            opusenc bitrate=32000 ! opusparse !
+            srtsink uri=srt://{config.server_host}:{config.srt_port}?mode=caller&latency={config.srt_latency_ms}
+        """
+    return " ".join(pipeline.split())
+def build_playback_pipeline(config: PipelineConfig) -> str:
+    """Build the TTS audio playback pipeline."""
+    pipeline = f"""
+        srtsrc uri=srt://{config.server_host}:{config.srt_port}?mode=listener&latency={config.srt_latency_ms} !
+        opusparse ! opusdec !
+        audioconvert ! audioresample !
+        audio/x-raw,rate={config.sample_rate},channels=1 !
+        alsasink device={config.audio_device}
+    """
+    return " ".join(pipeline.split())
+class Pipeline:
+    """Wrapper for GStreamer pipeline."""
+    def __init__(self, pipeline_str: str, name: str = "pipeline"):
+        Gst.init(None)
+        self.name = name
+        self.pipeline_str = pipeline_str
+        self.pipeline: Optional[Gst.Pipeline] = None
+        self.bus: Optional[Gst.Bus] = None
+        self._running = False
+    def start(self) -> bool:
+        """Start the pipeline."""
+        try:
+            self.pipeline = Gst.parse_launch(self.pipeline_str)
+            self.bus = self.pipeline.get_bus()
+            self.bus.add_signal_watch()
+            self.bus.connect("message", self._on_message)
+            ret = self.pipeline.set_state(Gst.State.PLAYING)
+            if ret == Gst.StateChangeReturn.FAILURE:
+                logger.error(f"Failed to start {self.name} pipeline")
+                return False
+            self._running = True
+            logger.info(f"{self.name} pipeline started")
+            return True
+        except Exception as e:
+            logger.error(f"Failed to create {self.name} pipeline: {e}")
+            return False
+    def stop(self) -> None:
+        """Stop the pipeline."""
+        if self.pipeline:
+            self.pipeline.set_state(Gst.State.NULL)
+            self.pipeline = None
+        self._running = False
+        logger.info(f"{self.name} pipeline stopped")
+    def is_running(self) -> bool:
+        """Check if pipeline is running."""
+        return self._running
+    def _on_message(self, bus, message):
+        """Handle GStreamer messages."""
+        msg_type = message.type
+        if msg_type == Gst.MessageType.ERROR:
+            err, debug = message.parse_error()
+            logger.error(f"{self.name} pipeline error: {err.message}")
+            logger.debug(f"Debug: {debug}")
+            self._running = False
+        elif msg_type == Gst.MessageType.EOS:
+            logger.info(f"{self.name} pipeline end of stream")
+            self._running = False
+        elif msg_type == Gst.MessageType.STATE_CHANGED:
+            if message.src == self.pipeline:
+                old, new, pending = message.parse_state_changed()
+                logger.debug(f"{self.name} state: {old.value_nick} -> {new.value_nick}")
+    def __enter__(self):
+        self.start()
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.stop()
+        return False

goshi/kws_daemon.py ADDED Viewed

	@@ -0,0 +1,262 @@

+"""Keyword Spotting Daemon for wake word detection."""
+import logging
+import queue
+import threading
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Callable, Optional
+import numpy as np
+try:
+    import sherpa_onnx
+except ImportError:
+    sherpa_onnx = None
+try:
+    import sounddevice as sd
+except ImportError:
+    sd = None
+logger = logging.getLogger(__name__)
+@dataclass
+class KWSConfig:
+    """KWS configuration."""
+    encoder_path: str
+    decoder_path: str
+    joiner_path: str
+    tokens_path: str
+    keywords_file: str
+    sample_rate: int = 16000
+    num_threads: int = 2
+    keywords_threshold: float = 0.25
+    keywords_score: float = 1.0
+    num_trailing_blanks: int = 1
+class KeywordSpotter:
+    """Sherpa-ONNX Keyword Spotter wrapper."""
+    def __init__(self, config: KWSConfig):
+        if sherpa_onnx is None:
+            raise ImportError("sherpa_onnx not installed")
+        self.config = config
+        self.sample_rate = config.sample_rate
+        # Create keyword spotter
+        self.spotter = sherpa_onnx.KeywordSpotter(
+            tokens=config.tokens_path,
+            encoder=config.encoder_path,
+            decoder=config.decoder_path,
+            joiner=config.joiner_path,
+            num_threads=config.num_threads,
+            keywords_file=config.keywords_file,
+            keywords_score=config.keywords_score,
+            keywords_threshold=config.keywords_threshold,
+            num_trailing_blanks=config.num_trailing_blanks,
+            provider="cpu",
+        )
+        self.stream = self.spotter.create_stream()
+        logger.info("Keyword spotter initialized")
+    def process_audio(self, samples: np.ndarray) -> Optional[str]:
+        """Process audio samples, return keyword if detected."""
+        if len(samples) == 0:
+            return None
+        # Accept waveform
+        self.stream.accept_waveform(self.sample_rate, samples.astype(np.float32))
+        # Decode
+        while self.spotter.is_ready(self.stream):
+            self.spotter.decode_stream(self.stream)
+            result = self.spotter.get_result(self.stream)
+            if result:
+                # Reset stream after detection
+                self.spotter.reset_stream(self.stream)
+                logger.info(f"Keyword detected: {result}")
+                return result
+        return None
+    def reset(self) -> None:
+        """Reset the stream."""
+        self.spotter.reset_stream(self.stream)
+class KWSDaemon:
+    """Daemon that listens for wake word and triggers callbacks."""
+    def __init__(
+        self,
+        config: KWSConfig,
+        on_wake_word: Callable[[], None],
+    ):
+        if sd is None:
+            raise ImportError("sounddevice not installed")
+        self.config = config
+        self.on_wake_word = on_wake_word
+        self.spotter: Optional[KeywordSpotter] = None
+        self.running = False
+        self.thread: Optional[threading.Thread] = None
+        # Audio queue for samples
+        self.audio_queue: queue.Queue = queue.Queue()
+    def start(self) -> None:
+        """Start the KWS daemon."""
+        if self.running:
+            return
+        # Initialize spotter
+        self.spotter = KeywordSpotter(self.config)
+        self.running = True
+        self.thread = threading.Thread(target=self._run, daemon=True)
+        self.thread.start()
+        logger.info("KWS daemon started")
+    def stop(self) -> None:
+        """Stop the KWS daemon."""
+        self.running = False
+        if self.thread:
+            self.thread.join(timeout=2.0)
+        self.spotter = None
+        logger.info("KWS daemon stopped")
+    def _run(self) -> None:
+        """Main daemon loop."""
+        sample_rate = self.config.sample_rate
+        chunk_duration = 0.1  # 100ms chunks
+        chunk_size = int(sample_rate * chunk_duration)
+        def audio_callback(indata, frames, time_info, status):
+            if status:
+                logger.warning(f"Audio status: {status}")
+            self.audio_queue.put(indata.copy())
+        try:
+            with sd.InputStream(
+                samplerate=sample_rate,
+                channels=1,
+                dtype="float32",
+                blocksize=chunk_size,
+                callback=audio_callback,
+            ):
+                logger.info("Audio input stream started")
+                while self.running:
+                    try:
+                        # Get audio chunk
+                        samples = self.audio_queue.get(timeout=0.5)
+                        samples = samples.flatten()
+                        # Process
+                        if self.spotter:
+                            keyword = self.spotter.process_audio(samples)
+                            if keyword:
+                                self.on_wake_word()
+                    except queue.Empty:
+                        continue
+        except Exception as e:
+            logger.error(f"Audio stream error: {e}")
+            self.running = False
+    def is_running(self) -> bool:
+        """Check if daemon is running."""
+        return self.running
+# Alternative: Simple VAD-based wake word (for testing without full KWS model)
+class SimpleWakeWordDetector:
+    """Simple wake word detector using just VAD + pattern matching."""
+    def __init__(
+        self,
+        wake_word: str = "sushi",
+        vad_threshold: float = 0.5,
+        on_wake_word: Callable[[], None] = None,
+    ):
+        self.wake_word = wake_word.lower()
+        self.vad_threshold = vad_threshold
+        self.on_wake_word = on_wake_word
+        self.running = False
+        self.thread: Optional[threading.Thread] = None
+        # Use Silero VAD if available
+        try:
+            import sherpa_onnx
+            self.vad = self._create_vad()
+        except ImportError:
+            self.vad = None
+            logger.warning("sherpa_onnx not available, using energy-based detection")
+    def _create_vad(self):
+        """Create Silero VAD."""
+        vad_config = sherpa_onnx.VadModelConfig()
+        vad_config.silero_vad.model = "./models/silero_vad.onnx"
+        vad_config.silero_vad.threshold = self.vad_threshold
+        vad_config.silero_vad.min_silence_duration = 0.25
+        vad_config.silero_vad.min_speech_duration = 0.1
+        vad_config.sample_rate = 16000
+        return sherpa_onnx.VoiceActivityDetector(vad_config, buffer_size_in_seconds=5)
+    def start(self) -> None:
+        """Start detector."""
+        self.running = True
+        self.thread = threading.Thread(target=self._run, daemon=True)
+        self.thread.start()
+        logger.info("Simple wake word detector started (press Ctrl+C or say wake word)")
+    def stop(self) -> None:
+        """Stop detector."""
+        self.running = False
+        if self.thread:
+            self.thread.join(timeout=2.0)
+    def _run(self) -> None:
+        """Run detection loop."""
+        sample_rate = 16000
+        chunk_size = int(sample_rate * 0.1)
+        def audio_callback(indata, frames, time_info, status):
+            if status:
+                logger.warning(f"Audio status: {status}")
+            # Energy-based detection for testing
+            energy = np.sqrt(np.mean(indata ** 2))
+            if energy > 0.05:  # Threshold
+                logger.debug(f"Audio energy: {energy:.4f}")
+                # For now, just detect loud audio as "wake word"
+                # In real use, this would trigger STT for verification
+        try:
+            with sd.InputStream(
+                samplerate=sample_rate,
+                channels=1,
+                dtype="float32",
+                blocksize=chunk_size,
+                callback=audio_callback,
+            ):
+                while self.running:
+                    time.sleep(0.1)
+        except Exception as e:
+            logger.error(f"Audio stream error: {e}")
+            self.running = False
+    def is_running(self) -> bool:
+        """Check if detector is running."""
+        return self.running

goshi/main.py ADDED Viewed

	@@ -0,0 +1,204 @@

+"""Goshi main app - Voice AI companion for Reachy Mini."""
+import logging
+import threading
+import time
+from typing import Optional
+from pydantic import BaseModel
+from reachy_mini import ReachyMini, ReachyMiniApp
+from .config import Config
+from .kws_daemon import SimpleWakeWordDetector
+from .streaming_controller import StreamingController, StreamingConfig, StreamState
+logger = logging.getLogger("goshi")
+class Goshi(ReachyMiniApp):
+    """Voice AI companion app for Reachy Mini robot.
+    Say "Sushi" to start a conversation!
+    """
+    # Settings page URL - accessible from Reachy Mini web interface
+    custom_app_url: str | None = "http://0.0.0.0:8042"
+    # Use default media backend
+    request_media_backend: str | None = None
+    def run(self, reachy_mini: ReachyMini, stop_event: threading.Event) -> None:
+        """Main app loop.
+        Args:
+            reachy_mini: The Reachy Mini robot instance
+            stop_event: Event to signal app shutdown
+        """
+        # Load configuration
+        try:
+            config = Config.from_file("config.yaml")
+            logger.info(f"Configuration loaded, server: {config.server.host}")
+        except Exception as e:
+            logger.warning(f"Failed to load config, using defaults: {e}")
+            config = Config()
+        # Current settings state (can be updated via web UI)
+        server_host = config.server.host
+        is_listening = False
+        is_streaming = False
+        # ==========================================
+        # Settings API (FastAPI endpoints)
+        # ==========================================
+        class ServerSettings(BaseModel):
+            host: str
+        class StatusResponse(BaseModel):
+            is_listening: bool
+            is_streaming: bool
+            server_host: str
+        @self.settings_app.get("/status")
+        def get_status() -> StatusResponse:
+            """Get current app status."""
+            return StatusResponse(
+                is_listening=is_listening,
+                is_streaming=is_streaming,
+                server_host=server_host,
+            )
+        @self.settings_app.post("/server")
+        def update_server(settings: ServerSettings) -> dict:
+            """Update server host."""
+            nonlocal server_host
+            server_host = settings.host
+            logger.info(f"Server host updated to: {server_host}")
+            return {"host": server_host, "success": True}
+        @self.settings_app.post("/start")
+        def start_streaming() -> dict:
+            """Manually start streaming."""
+            if streaming and not streaming.is_streaming():
+                streaming.start_streaming()
+                return {"success": True, "message": "Streaming started"}
+            return {"success": False, "message": "Already streaming or not ready"}
+        @self.settings_app.post("/stop")
+        def stop_streaming_endpoint() -> dict:
+            """Manually stop streaming."""
+            if streaming and streaming.is_streaming():
+                streaming.stop_streaming()
+                return {"success": True, "message": "Streaming stopped"}
+            return {"success": False, "message": "Not streaming"}
+        # ==========================================
+        # Streaming Controller Setup
+        # ==========================================
+        streaming_config = StreamingConfig(
+            server_host=server_host,
+            srt_send_port=config.server.srt_receive_port,
+            srt_receive_port=config.server.srt_send_port,
+            srt_latency_ms=config.server.srt_latency_ms,
+            audio_device=config.audio.input_device,
+            enable_video=config.streaming.enable_video,
+            video_width=config.streaming.video_width,
+            video_height=config.streaming.video_height,
+            video_framerate=config.streaming.video_framerate,
+            silence_timeout_s=config.streaming.silence_timeout_s,
+            use_hardware_encoder=config.streaming.use_hardware_encoder,
+        )
+        def on_state_change(state: StreamState) -> None:
+            """Handle streaming state changes."""
+            nonlocal is_streaming
+            is_streaming = state == StreamState.STREAMING
+            logger.info(f"Streaming state: {state.value}")
+            # Antenna feedback on state change
+            if state == StreamState.STREAMING:
+                # Antennas up - listening
+                reachy_mini.set_antenna_targets([0.3, 0.3])
+            elif state == StreamState.IDLE:
+                # Antennas neutral
+                reachy_mini.set_antenna_targets([0, 0])
+        streaming = StreamingController(streaming_config, on_state_change)
+        # ==========================================
+        # Wake Word Detection
+        # ==========================================
+        def on_wake_word() -> None:
+            """Handle wake word detection."""
+            nonlocal is_listening
+            logger.info("Wake word 'Sushi' detected!")
+            is_listening = True
+            if not streaming.is_streaming():
+                # Excitement animation - antenna wiggle
+                for i in range(3):
+                    reachy_mini.set_antenna_targets([0.5, 0.5])
+                    time.sleep(0.1)
+                    reachy_mini.set_antenna_targets([0, 0])
+                    time.sleep(0.1)
+                # Update server host in case it changed
+                streaming.config.server_host = server_host
+                streaming.start_streaming()
+            else:
+                # Reset silence timeout
+                streaming.on_activity()
+        kws = SimpleWakeWordDetector(
+            wake_word=config.kws.wake_word,
+            on_wake_word=on_wake_word,
+        )
+        # ==========================================
+        # Main Loop
+        # ==========================================
+        try:
+            kws.start()
+            logger.info("Goshi started! Say 'Sushi' to begin...")
+            idle_animation_time = 0
+            while not stop_event.is_set():
+                t = time.time()
+                # Idle antenna animation when not streaming
+                if not streaming.is_streaming():
+                    # Gentle breathing animation
+                    if t - idle_animation_time > 3.0:
+                        a = 0.1 * (1 + 0.3 * ((t * 0.5) % 1))
+                        reachy_mini.set_antenna_targets([a, -a])
+                        idle_animation_time = t
+                time.sleep(0.1)
+        except Exception as e:
+            logger.error(f"Error in main loop: {e}")
+            raise
+        finally:
+            logger.info("Shutting down Goshi...")
+            kws.stop()
+            streaming.stop_streaming()
+            # Return antennas to neutral
+            reachy_mini.set_antenna_targets([0, 0])
+if __name__ == "__main__":
+    # Enable logging
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
+    )
+    app = Goshi()
+    try:
+        app.wrapped_run()
+    except KeyboardInterrupt:
+        app.stop()

goshi/static/index.html ADDED Viewed

	@@ -0,0 +1,242 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Goshi Settings</title>
+    <style>
+        :root {
+            --primary: #8B5CF6;
+            --secondary: #EC4899;
+            --bg: #1a1a2e;
+            --card: #16213e;
+            --text: #eaeaea;
+            --success: #10b981;
+            --error: #ef4444;
+        }
+        * {
+            margin: 0;
+            padding: 0;
+            box-sizing: border-box;
+        }
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, sans-serif;
+            background: var(--bg);
+            color: var(--text);
+            min-height: 100vh;
+            padding: 2rem;
+        }
+        .container {
+            max-width: 500px;
+            margin: 0 auto;
+        }
+        h1 {
+            font-size: 1.75rem;
+            margin-bottom: 2rem;
+            text-align: center;
+        }
+        .card {
+            background: var(--card);
+            padding: 1.5rem;
+            border-radius: 1rem;
+            margin-bottom: 1rem;
+        }
+        .status {
+            display: flex;
+            align-items: center;
+            gap: 0.75rem;
+            margin-bottom: 1rem;
+        }
+        .status-dot {
+            width: 12px;
+            height: 12px;
+            border-radius: 50%;
+            background: var(--error);
+        }
+        .status-dot.active {
+            background: var(--success);
+        }
+        label {
+            display: block;
+            margin-bottom: 0.5rem;
+            font-size: 0.9rem;
+            opacity: 0.8;
+        }
+        input {
+            width: 100%;
+            padding: 0.75rem;
+            border: 1px solid rgba(255, 255, 255, 0.1);
+            border-radius: 0.5rem;
+            background: rgba(0, 0, 0, 0.3);
+            color: var(--text);
+            font-size: 1rem;
+            margin-bottom: 1rem;
+        }
+        button {
+            width: 100%;
+            padding: 0.75rem;
+            border: none;
+            border-radius: 0.5rem;
+            background: linear-gradient(135deg, var(--primary), var(--secondary));
+            color: white;
+            font-size: 1rem;
+            font-weight: 600;
+            cursor: pointer;
+            transition: opacity 0.2s;
+        }
+        button:hover {
+            opacity: 0.9;
+        }
+        button:active {
+            opacity: 0.8;
+        }
+        .btn-group {
+            display: flex;
+            gap: 0.5rem;
+        }
+        .btn-group button {
+            flex: 1;
+        }
+        .btn-secondary {
+            background: rgba(255, 255, 255, 0.1);
+        }
+        .message {
+            padding: 0.75rem;
+            border-radius: 0.5rem;
+            margin-top: 1rem;
+            text-align: center;
+            display: none;
+        }
+        .message.success {
+            background: rgba(16, 185, 129, 0.2);
+            display: block;
+        }
+        .message.error {
+            background: rgba(239, 68, 68, 0.2);
+            display: block;
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <h1>🍣 Goshi Settings</h1>
+        <div class="card">
+            <div class="status">
+                <span class="status-dot" id="statusDot"></span>
+                <span id="statusText">Checking...</span>
+            </div>
+            <label>Server Host</label>
+            <input type="text" id="serverHost" placeholder="192.168.1.57">
+            <button onclick="updateServer()">Save Server</button>
+            <div id="message" class="message"></div>
+        </div>
+        <div class="card">
+            <label>Streaming Control</label>
+            <div class="btn-group">
+                <button onclick="startStream()">Start</button>
+                <button class="btn-secondary" onclick="stopStream()">Stop</button>
+            </div>
+        </div>
+    </div>
+    <script>
+        async function fetchStatus() {
+            try {
+                const res = await fetch('/status');
+                const data = await res.json();
+                const dot = document.getElementById('statusDot');
+                const text = document.getElementById('statusText');
+                const host = document.getElementById('serverHost');
+                if (data.is_streaming) {
+                    dot.classList.add('active');
+                    text.textContent = 'Streaming';
+                } else if (data.is_listening) {
+                    dot.classList.add('active');
+                    text.textContent = 'Listening';
+                } else {
+                    dot.classList.remove('active');
+                    text.textContent = 'Idle';
+                }
+                host.value = data.server_host;
+            } catch (e) {
+                document.getElementById('statusText').textContent = 'Disconnected';
+            }
+        }
+        async function updateServer() {
+            const host = document.getElementById('serverHost').value;
+            try {
+                const res = await fetch('/server', {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({ host })
+                });
+                const data = await res.json();
+                showMessage('Server updated!', 'success');
+            } catch (e) {
+                showMessage('Failed to update', 'error');
+            }
+        }
+        async function startStream() {
+            try {
+                await fetch('/start', { method: 'POST' });
+                showMessage('Streaming started', 'success');
+                setTimeout(fetchStatus, 500);
+            } catch (e) {
+                showMessage('Failed to start', 'error');
+            }
+        }
+        async function stopStream() {
+            try {
+                await fetch('/stop', { method: 'POST' });
+                showMessage('Streaming stopped', 'success');
+                setTimeout(fetchStatus, 500);
+            } catch (e) {
+                showMessage('Failed to stop', 'error');
+            }
+        }
+        function showMessage(text, type) {
+            const msg = document.getElementById('message');
+            msg.textContent = text;
+            msg.className = 'message ' + type;
+            setTimeout(() => msg.className = 'message', 3000);
+        }
+        fetchStatus();
+        setInterval(fetchStatus, 5000);
+    </script>
+</body>
+</html>

goshi/streaming_controller.py ADDED Viewed

	@@ -0,0 +1,175 @@

+"""Streaming controller - manages capture and playback pipelines."""
+import logging
+import threading
+import time
+from dataclasses import dataclass
+from enum import Enum
+from typing import Callable, Optional
+from gstreamer_pipelines import Pipeline, PipelineConfig, build_capture_pipeline, build_playback_pipeline
+logger = logging.getLogger(__name__)
+class StreamState(Enum):
+    """Stream state."""
+    IDLE = "idle"
+    STREAMING = "streaming"
+    ERROR = "error"
+@dataclass
+class StreamingConfig:
+    """Streaming controller configuration."""
+    server_host: str
+    srt_send_port: int
+    srt_receive_port: int
+    srt_latency_ms: int
+    audio_device: str
+    enable_video: bool
+    video_width: int
+    video_height: int
+    video_framerate: int
+    silence_timeout_s: int
+    use_hardware_encoder: bool
+class StreamingController:
+    """Controls audio/video streaming to/from server."""
+    def __init__(
+        self,
+        config: StreamingConfig,
+        on_state_change: Optional[Callable[[StreamState], None]] = None,
+    ):
+        self.config = config
+        self.on_state_change = on_state_change
+        self.state = StreamState.IDLE
+        self.capture_pipeline: Optional[Pipeline] = None
+        self.playback_pipeline: Optional[Pipeline] = None
+        self._lock = threading.Lock()
+        self._timeout_timer: Optional[threading.Timer] = None
+        self._last_activity = time.time()
+    def start_streaming(self) -> bool:
+        """Start capture and playback pipelines."""
+        with self._lock:
+            if self.state == StreamState.STREAMING:
+                logger.debug("Already streaming")
+                return True
+            logger.info("Starting streaming...")
+            # Create capture pipeline config
+            capture_config = PipelineConfig(
+                server_host=self.config.server_host,
+                srt_port=self.config.srt_send_port,
+                srt_latency_ms=self.config.srt_latency_ms,
+                audio_device=self.config.audio_device,
+                enable_video=self.config.enable_video,
+                video_width=self.config.video_width,
+                video_height=self.config.video_height,
+                video_framerate=self.config.video_framerate,
+                use_hardware_encoder=self.config.use_hardware_encoder,
+            )
+            # Create playback pipeline config
+            playback_config = PipelineConfig(
+                server_host=self.config.server_host,
+                srt_port=self.config.srt_receive_port,
+                srt_latency_ms=self.config.srt_latency_ms,
+                audio_device=self.config.audio_device,
+            )
+            # Start capture pipeline
+            capture_str = build_capture_pipeline(capture_config)
+            self.capture_pipeline = Pipeline(capture_str, "capture")
+            if not self.capture_pipeline.start():
+                self._set_state(StreamState.ERROR)
+                return False
+            # Start playback pipeline
+            playback_str = build_playback_pipeline(playback_config)
+            self.playback_pipeline = Pipeline(playback_str, "playback")
+            if not self.playback_pipeline.start():
+                self.capture_pipeline.stop()
+                self._set_state(StreamState.ERROR)
+                return False
+            self._set_state(StreamState.STREAMING)
+            self._start_timeout_timer()
+            return True
+    def stop_streaming(self) -> None:
+        """Stop all pipelines."""
+        with self._lock:
+            self._cancel_timeout_timer()
+            if self.capture_pipeline:
+                self.capture_pipeline.stop()
+                self.capture_pipeline = None
+            if self.playback_pipeline:
+                self.playback_pipeline.stop()
+                self.playback_pipeline = None
+            self._set_state(StreamState.IDLE)
+            logger.info("Streaming stopped")
+    def on_activity(self) -> None:
+        """Called when there's user activity (speech, etc.)."""
+        self._last_activity = time.time()
+        self._reset_timeout_timer()
+    def _set_state(self, state: StreamState) -> None:
+        """Set state and notify callback."""
+        if self.state != state:
+            self.state = state
+            if self.on_state_change:
+                self.on_state_change(state)
+    def _start_timeout_timer(self) -> None:
+        """Start the silence timeout timer."""
+        self._cancel_timeout_timer()
+        self._timeout_timer = threading.Timer(
+            self.config.silence_timeout_s,
+            self._on_timeout,
+        )
+        self._timeout_timer.daemon = True
+        self._timeout_timer.start()
+    def _reset_timeout_timer(self) -> None:
+        """Reset the timeout timer."""
+        self._cancel_timeout_timer()
+        self._start_timeout_timer()
+    def _cancel_timeout_timer(self) -> None:
+        """Cancel the timeout timer."""
+        if self._timeout_timer:
+            self._timeout_timer.cancel()
+            self._timeout_timer = None
+    def _on_timeout(self) -> None:
+        """Called when silence timeout is reached."""
+        logger.info(f"Silence timeout ({self.config.silence_timeout_s}s), stopping stream")
+        self.stop_streaming()
+    def is_streaming(self) -> bool:
+        """Check if currently streaming."""
+        return self.state == StreamState.STREAMING
+    def get_state(self) -> StreamState:
+        """Get current state."""
+        return self.state
+    def __enter__(self):
+        self.start_streaming()
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.stop_streaming()
+        return False

index.html ADDED Viewed

	@@ -0,0 +1,107 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Goshi - Voice AI for Reachy Mini</title>
+    <link rel="stylesheet" href="style.css">
+    <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap" rel="stylesheet">
+</head>
+<body>
+    <div class="container">
+        <div class="hero">
+            <span class="emoji">🍣</span>
+            <h1>Goshi</h1>
+            <p class="subtitle">Voice AI Companion for Reachy Mini</p>
+            <p class="tagline">Say <span class="highlight">"Sushi"</span> to start a conversation!</p>
+        </div>
+        <div class="features">
+            <div class="feature">
+                <span class="feature-icon">🎙️</span>
+                <h3>Wake Word</h3>
+                <p>Say "Sushi" to activate the robot</p>
+            </div>
+            <div class="feature">
+                <span class="feature-icon">🤖</span>
+                <h3>Smart AI</h3>
+                <p>Natural conversations powered by LLMs</p>
+            </div>
+            <div class="feature">
+                <span class="feature-icon">🌍</span>
+                <h3>Bilingual</h3>
+                <p>Speaks French & English</p>
+            </div>
+            <div class="feature">
+                <span class="feature-icon">⚡</span>
+                <h3>Low Latency</h3>
+                <p>&lt;800ms response time</p>
+            </div>
+            <div class="feature">
+                <span class="feature-icon">🔒</span>
+                <h3>Local AI</h3>
+                <p>Private, on-premise processing</p>
+            </div>
+            <div class="feature">
+                <span class="feature-icon">🤸</span>
+                <h3>Expressive</h3>
+                <p>Head & antenna animations</p>
+            </div>
+        </div>
+        <div class="install-section">
+            <h2>🚀 Installation</h2>
+            <div class="install-steps">
+                <div class="step">
+                    <span class="step-number">1</span>
+                    <div class="step-content">
+                        <h4>Install on Reachy Mini</h4>
+                        <p>Via the app manager or command line:</p>
+                        <code>pip install git+https://huggingface.co/spaces/pnocera/goshi</code>
+                    </div>
+                </div>
+                <div class="step">
+                    <span class="step-number">2</span>
+                    <div class="step-content">
+                        <h4>Start the Goshi Server</h4>
+                        <p>On your workstation (requires NVIDIA GPU):</p>
+                        <code>./goshi-server --config config.toml</code>
+                    </div>
+                </div>
+                <div class="step">
+                    <span class="step-number">3</span>
+                    <div class="step-content">
+                        <h4>Configure & Talk!</h4>
+                        <p>Update config.yaml with server IP, then say "Sushi"!</p>
+                    </div>
+                </div>
+            </div>
+        </div>
+        <div class="architecture">
+            <h2>🏗️ Architecture</h2>
+            <pre>
+  Reachy Mini (Raspberry Pi 4)          Workstation (RTX GPU)
+  ┌────────────────────────┐           ┌─────────────────────────┐
+  │       Goshi Client     │◄── SRT ──►│     Goshi Server        │
+  │  ┌──────────────────┐  │           │  ┌───────────────────┐  │
+  │  │ Wake Word (KWS)  │  │           │  │ VAD (Silero)      │  │
+  │  │ Audio Capture    │──┼───────────┼─►│ STT (Whisper)     │  │
+  │  │ TTS Playback     │◄─┼───────────┼──│ LLM (Ollama)      │  │
+  │  │ Video Stream     │  │           │  │ TTS (Piper)       │  │
+  │  └──────────────────┘  │           │  └───────────────────┘  │
+  └────────────────────────┘           └─────────────────────────┘
+            </pre>
+        </div>
+        <div class="footer">
+            <p>Built with ❤️ for Reachy Mini</p>
+            <p class="links">
+                <a href="https://github.com/pnocera/goshi">GitHub</a> ·
+                <a href="https://pollen-robotics.com">Pollen Robotics</a> ·
+                <a href="https://huggingface.co/spaces/pnocera/goshi">Hugging Face</a>
+            </p>
+        </div>
+    </div>
+</body>
+</html>

pyproject.toml ADDED Viewed

	@@ -0,0 +1,35 @@

+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "goshi"
+version = "0.1.0"
+description = "Voice AI companion for Reachy Mini - Say 'Sushi' to start a conversation!"
+readme = "README.md"
+requires-python = ">=3.10"
+license = {text = "MIT"}
+authors = [
+    {name = "Pierre Nocera", email = "pierre@example.com"}
+]
+dependencies = [
+    "reachy-mini",
+    "PyYAML>=6.0",
+    "sounddevice>=0.4.6",
+    "numpy>=1.24.0",
+    "pydantic>=2.0",
+]
+keywords = ["reachy-mini-app", "voice-ai", "robot"]
+[project.entry-points."reachy_mini_apps"]
+goshi = "goshi.main:Goshi"
+[tool.setuptools]
+package-dir = { "" = "." }
+include-package-data = true
+[tool.setuptools.packages.find]
+where = ["."]
+[tool.setuptools.package-data]
+goshi = ["**/*"]

style.css ADDED Viewed

	@@ -0,0 +1,236 @@

+:root {
+    --primary: #8B5CF6;
+    --secondary: #EC4899;
+    --accent: #F59E0B;
+    --bg: #0F0F0F;
+    --bg-card: rgba(255, 255, 255, 0.05);
+    --bg-hover: rgba(255, 255, 255, 0.08);
+    --text: #FFFFFF;
+    --text-muted: rgba(255, 255, 255, 0.6);
+    --border: rgba(255, 255, 255, 0.1);
+    --gradient: linear-gradient(135deg, var(--primary), var(--secondary));
+}
+* {
+    margin: 0;
+    padding: 0;
+    box-sizing: border-box;
+}
+body {
+    font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+    background: var(--bg);
+    color: var(--text);
+    min-height: 100vh;
+    line-height: 1.6;
+}
+.container {
+    max-width: 1000px;
+    margin: 0 auto;
+    padding: 2rem;
+}
+/* Hero Section */
+.hero {
+    text-align: center;
+    padding: 4rem 0 3rem;
+}
+.emoji {
+    font-size: 6rem;
+    display: block;
+    margin-bottom: 1rem;
+    animation: bounce 2s ease-in-out infinite;
+}
+@keyframes bounce {
+    0%, 100% { transform: translateY(0); }
+    50% { transform: translateY(-10px); }
+}
+h1 {
+    font-size: 4rem;
+    font-weight: 700;
+    background: var(--gradient);
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+    background-clip: text;
+    margin-bottom: 0.5rem;
+}
+.subtitle {
+    font-size: 1.5rem;
+    color: var(--text-muted);
+    margin-bottom: 1rem;
+}
+.tagline {
+    font-size: 1.25rem;
+    color: var(--text-muted);
+}
+.highlight {
+    background: var(--gradient);
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+    background-clip: text;
+    font-weight: 600;
+}
+/* Features Grid */
+.features {
+    display: grid;
+    grid-template-columns: repeat(3, 1fr);
+    gap: 1.5rem;
+    margin: 3rem 0;
+}
+@media (max-width: 768px) {
+    .features {
+        grid-template-columns: repeat(2, 1fr);
+    }
+}
+@media (max-width: 480px) {
+    .features {
+        grid-template-columns: 1fr;
+    }
+}
+.feature {
+    background: var(--bg-card);
+    padding: 2rem;
+    border-radius: 1rem;
+    text-align: center;
+    border: 1px solid var(--border);
+    transition: all 0.3s ease;
+}
+.feature:hover {
+    background: var(--bg-hover);
+    transform: translateY(-5px);
+    border-color: var(--primary);
+}
+.feature-icon {
+    font-size: 2.5rem;
+    display: block;
+    margin-bottom: 1rem;
+}
+.feature h3 {
+    font-size: 1.1rem;
+    font-weight: 600;
+    margin-bottom: 0.5rem;
+}
+.feature p {
+    font-size: 0.9rem;
+    color: var(--text-muted);
+}
+/* Install Section */
+.install-section {
+    background: var(--bg-card);
+    padding: 2.5rem;
+    border-radius: 1.5rem;
+    margin: 3rem 0;
+    border: 1px solid var(--border);
+}
+.install-section h2 {
+    text-align: center;
+    margin-bottom: 2rem;
+    font-size: 1.75rem;
+}
+.install-steps {
+    display: flex;
+    flex-direction: column;
+    gap: 1.5rem;
+}
+.step {
+    display: flex;
+    align-items: flex-start;
+    gap: 1rem;
+}
+.step-number {
+    background: var(--gradient);
+    color: white;
+    width: 2.5rem;
+    height: 2.5rem;
+    border-radius: 50%;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    font-weight: 700;
+    flex-shrink: 0;
+}
+.step-content h4 {
+    font-size: 1.1rem;
+    margin-bottom: 0.25rem;
+}
+.step-content p {
+    color: var(--text-muted);
+    font-size: 0.9rem;
+    margin-bottom: 0.5rem;
+}
+.step-content code {
+    display: inline-block;
+    background: rgba(0, 0, 0, 0.5);
+    padding: 0.5rem 1rem;
+    border-radius: 0.5rem;
+    font-family: 'JetBrains Mono', 'Fira Code', monospace;
+    font-size: 0.85rem;
+    color: var(--accent);
+    border: 1px solid var(--border);
+}
+/* Architecture */
+.architecture {
+    background: var(--bg-card);
+    padding: 2rem;
+    border-radius: 1rem;
+    margin: 2rem 0;
+    border: 1px solid var(--border);
+}
+.architecture h2 {
+    text-align: center;
+    margin-bottom: 1.5rem;
+}
+.architecture pre {
+    font-family: 'JetBrains Mono', 'Fira Code', monospace;
+    font-size: 0.75rem;
+    overflow-x: auto;
+    color: var(--text-muted);
+    line-height: 1.4;
+}
+/* Footer */
+.footer {
+    text-align: center;
+    padding: 3rem 0 2rem;
+    color: var(--text-muted);
+}
+.footer .links {
+    margin-top: 0.5rem;
+}
+.footer a {
+    color: var(--primary);
+    text-decoration: none;
+    transition: color 0.2s;
+}
+.footer a:hover {
+    color: var(--secondary);
+}