Spaces:

pollen-robotics
/

reachy_mini_conversation_app

Running

App Files Files Community

FabienDanieau commited on Oct 9

Commit

14caa4d

1 Parent(s): 3628b7f

enable the headless mode

Browse files

Files changed (5) hide show

pyproject.toml +0 -1
src/reachy_mini_conversation_demo/audio/gstreamer.py +0 -213
src/reachy_mini_conversation_demo/console.py +42 -59
src/reachy_mini_conversation_demo/main.py +20 -18
src/reachy_mini_conversation_demo/openai_realtime.py +2 -2

pyproject.toml CHANGED Viewed

@@ -16,7 +16,6 @@ dependencies = [
     "gradio>=5.49.0",
     "huggingface_hub>=0.34.4",
     "opencv-python>=4.12.0.88",
-    "pygobject>=3.42.2,<=3.46.0",
     #Environment variables
     "python-dotenv",

     "gradio>=5.49.0",
     "huggingface_hub>=0.34.4",
     "opencv-python>=4.12.0.88",
     #Environment variables
     "python-dotenv",

src/reachy_mini_conversation_demo/audio/gstreamer.py DELETED Viewed

@@ -1,213 +0,0 @@
-import logging
-from typing import Optional
-from threading import Thread
-import gi
-gi.require_version("Gst", "1.0")
-gi.require_version("GstApp", "1.0")
-from gi.repository import Gst, GLib, GstApp  # noqa: E402
-class GstPlayer:
-    """Audio player using GStreamer."""
-    def __init__(self, sample_rate: int = 24000, device_name: Optional[str] = None):
-        """Initialize player."""
-        self._logger = logging.getLogger(__name__)
-        Gst.init(None)
-        self._loop = GLib.MainLoop()
-        self._thread_bus_calls: Optional[Thread] = None
-        self.pipeline = Gst.Pipeline.new("audio_player")
-        # Create elements
-        self.appsrc = Gst.ElementFactory.make("appsrc", None)
-        self.appsrc.set_property("format", Gst.Format.TIME)
-        self.appsrc.set_property("is-live", True)
-        caps = Gst.Caps.from_string(f"audio/x-raw,format=S16LE,channels=1,rate={sample_rate},layout=interleaved")
-        self.appsrc.set_property("caps", caps)
-        queue = Gst.ElementFactory.make("queue")
-        audioconvert = Gst.ElementFactory.make("audioconvert")
-        audioresample = Gst.ElementFactory.make("audioresample")
-        # Try to pin specific output device; fallback to autoaudiosink
-        audiosink = _create_device_element(direction="sink", name_substr=device_name) or Gst.ElementFactory.make(
-            "autoaudiosink"
-        )
-        self.pipeline.add(self.appsrc)
-        self.pipeline.add(queue)
-        self.pipeline.add(audioconvert)
-        self.pipeline.add(audioresample)
-        self.pipeline.add(audiosink)
-        self.appsrc.link(queue)
-        queue.link(audioconvert)
-        audioconvert.link(audioresample)
-        audioresample.link(audiosink)
-    def _on_bus_message(self, bus: Gst.Bus, msg: Gst.Message, loop) -> bool:  # type: ignore[no-untyped-def]
-        t = msg.type
-        if t == Gst.MessageType.EOS:
-            self._logger.warning("End-of-stream")
-            return False
-        elif t == Gst.MessageType.ERROR:
-            err, debug = msg.parse_error()
-            self._logger.error(f"Error: {err} {debug}")
-            return False
-        return True
-    def _handle_bus_calls(self) -> None:
-        self._logger.debug("starting bus message loop")
-        bus = self.pipeline.get_bus()
-        bus.add_watch(GLib.PRIORITY_DEFAULT, self._on_bus_message, self._loop)
-        self._loop.run()  # type: ignore[no-untyped-call]
-        bus.remove_watch()
-        self._logger.debug("bus message loop stopped")
-    def play(self):
-        """Start playback."""
-        self.pipeline.set_state(Gst.State.PLAYING)
-        self._thread_bus_calls = Thread(target=self._handle_bus_calls, daemon=True)
-        self._thread_bus_calls.start()
-    def push_sample(self, data: bytes):
-        """Push audio sample (bytes) to playback pipeline."""
-        buf = Gst.Buffer.new_wrapped(data)
-        self.appsrc.push_buffer(buf)
-    def stop(self):
-        """Stop playback and clean up."""
-        logger = logging.getLogger(__name__)
-        self._loop.quit()
-        self.pipeline.set_state(Gst.State.NULL)
-        if self._thread_bus_calls is not None:
-            self._thread_bus_calls.join()
-        logger.info("Stopped Player")
-class GstRecorder:
-    """Audio recorder using GStreamer."""
-    def __init__(self, sample_rate: int = 24000, device_name: Optional[str] = None):
-        """Initialize recorder."""
-        self._logger = logging.getLogger(__name__)
-        Gst.init(None)
-        self._loop = GLib.MainLoop()
-        self._thread_bus_calls: Optional[Thread] = None
-        self.pipeline = Gst.Pipeline.new("audio_recorder")
-        # Create elements: try specific mic; fallback to default
-        autoaudiosrc = _create_device_element(direction="source", name_substr=device_name) or Gst.ElementFactory.make(
-            "autoaudiosrc", None
-        )
-        queue = Gst.ElementFactory.make("queue", None)
-        audioconvert = Gst.ElementFactory.make("audioconvert", None)
-        audioresample = Gst.ElementFactory.make("audioresample", None)
-        self.appsink = Gst.ElementFactory.make("appsink", None)
-        if not all([autoaudiosrc, queue, audioconvert, audioresample, self.appsink]):
-            raise RuntimeError("Failed to create GStreamer elements")
-        # Force mono/S16LE at 24000; resample handles device SR (e.g., 16000 → 24000)
-        caps = Gst.Caps.from_string(f"audio/x-raw,channels=1,rate={sample_rate},format=S16LE")
-        self.appsink.set_property("caps", caps)
-        # Build pipeline
-        self.pipeline.add(autoaudiosrc)
-        self.pipeline.add(queue)
-        self.pipeline.add(audioconvert)
-        self.pipeline.add(audioresample)
-        self.pipeline.add(self.appsink)
-        autoaudiosrc.link(queue)
-        queue.link(audioconvert)
-        audioconvert.link(audioresample)
-        audioresample.link(self.appsink)
-    def _on_bus_message(self, bus: Gst.Bus, msg: Gst.Message, loop) -> bool:  # type: ignore[no-untyped-def]
-        t = msg.type
-        if t == Gst.MessageType.EOS:
-            self._logger.warning("End-of-stream")
-            return False
-        elif t == Gst.MessageType.ERROR:
-            err, debug = msg.parse_error()
-            self._logger.error(f"Error: {err} {debug}")
-            return False
-        return True
-    def _handle_bus_calls(self) -> None:
-        self._logger.debug("starting bus message loop")
-        bus = self.pipeline.get_bus()
-        bus.add_watch(GLib.PRIORITY_DEFAULT, self._on_bus_message, self._loop)
-        self._loop.run()  # type: ignore[no-untyped-call]
-        bus.remove_watch()
-        self._logger.debug("bus message loop stopped")
-    def record(self):
-        """Start recording."""
-        self.pipeline.set_state(Gst.State.PLAYING)
-        self._thread_bus_calls = Thread(target=self._handle_bus_calls, daemon=True)
-        self._thread_bus_calls.start()
-    def get_sample(self):
-        sample = self.appsink.pull_sample()
-        data = None
-        if isinstance(sample, Gst.Sample):
-            buf = sample.get_buffer()
-            if buf is None:
-                self._logger.warning("Buffer is None")
-            data = buf.extract_dup(0, buf.get_size())
-        return data
-    def stop(self):
-        """Stop recording and clean up."""
-        logger = logging.getLogger(__name__)
-        self._loop.quit()
-        self.pipeline.set_state(Gst.State.NULL)
-        if self._thread_bus_calls is not None:
-            self._thread_bus_calls.join()
-        logger.info("Stopped Recorder")
-def _create_device_element(direction: str, name_substr: Optional[str]) -> Optional[Gst.Element]:
-    """direction: 'source' or 'sink'.
-    name_substr: case-insensitive substring matching device display name/description.
-    """
-    logger = logging.getLogger(__name__)
-    if not name_substr:
-        logger.error(f"Device select: no name_substr for {direction}; returning None")
-        return None
-    monitor = Gst.DeviceMonitor.new()
-    klass = "Audio/Source" if direction == "source" else "Audio/Sink"
-    monitor.add_filter(klass, None)
-    monitor.start()
-    try:
-        for dev in monitor.get_devices() or []:
-            disp = dev.get_display_name() or ""
-            props = dev.get_properties()
-            desc = props.get_string("device.description") if props and props.has_field("device.description") else ""
-            logger.info(f"Device candidate: disp='{disp}', desc='{desc}'")
-            if name_substr.lower() in disp.lower() or name_substr.lower() in desc.lower():
-                elem = dev.create_element(None)
-                factory = elem.get_factory().get_name() if elem and elem.get_factory() else "<?>"
-                logger.info(f"Using {direction} device: '{disp or desc}' (factory='{factory}')")
-                return elem
-    finally:
-        monitor.stop()
-    logging.getLogger(__name__).warning("Requested %s '%s' not found; using auto*", direction, name_substr)
-    return None

src/reachy_mini_conversation_demo/console.py CHANGED Viewed

@@ -1,67 +1,37 @@
 import asyncio
 import logging
-import numpy as np
-from gi.repository import Gst
-from fastrtc import AdditionalOutputs
-from reachy_mini_conversation_demo.audio.gstreamer import GstPlayer, GstRecorder
-from reachy_mini_conversation_demo.audio.head_wobbler import SAMPLE_RATE
 from reachy_mini_conversation_demo.openai_realtime import OpenaiRealtimeHandler
 logger = logging.getLogger(__name__)
 class LocalStream:
-    """Bidirectional local audio stream: records mic frames to the handler and
-    plays handler audio frames to the speaker."""
-    def __init__(self, handler: OpenaiRealtimeHandler):
         """Initialize the stream with an OpenAI realtime handler and pipelines."""
         self.handler = handler
         self._stop_event = asyncio.Event()
-        self.recorder = GstRecorder(sample_rate=SAMPLE_RATE)
-        self.player = GstPlayer(sample_rate=SAMPLE_RATE)
         # Allow the handler to flush the player queue when appropriate.
         self.handler._clear_queue = self.clear_queue  # type: ignore[assignment]
-        # # Optional GStreamer bus monitoring (currently disabled)
-        # player_bus = self.player.pipeline.get_bus()
-        # player_bus.add_signal_watch()
-        # player_bus.connect("message", self.on_player_message)
-    # def on_player_message(self, bus, message):
-    #     """Handle GStreamer player state messages."""
-    #     # logger.info(f"Player message: {message.type}")
-    #     if message.type == Gst.MessageType.STATE_CHANGED:
-    #         old_state, new_state, pending_state = message.parse_state_changed()
-    #         if new_state != old_state and new_state == Gst.State.PLAYING:
-    #             print("Player is now playing")
-    #             self.recorder.pipeline.set_state(Gst.State.PAUSED)
-    #         if new_state != old_state and new_state == Gst.State.PAUSED:
-    #             print("Player is now paused")
-    #             self.recorder.pipeline.set_state(Gst.State.PLAYING)
-    #     if message.type == Gst.MessageType.EOS:
-    #         self.recorder.pipeline.set_state(Gst.State.PLAYING)
-    #         print("Player reached end of stream, restarting recorder")
-    def clear_queue(self) -> None:
-        """Flush the player's appsrc to drop any queued audio immediately"""
-        self.player.pipeline.set_state(Gst.State.PAUSED)
-        self.player.appsrc.send_event(Gst.Event.new_flush_start())
-        self.player.appsrc.send_event(Gst.Event.new_flush_stop(reset_time=True))
-        self.player.pipeline.set_state(Gst.State.PLAYING)
-        logger.info("Cleared player queue")
-    def start(self) -> None:
-        """Start the recorder/player and run the async processing loops"""
         self._stop_event.clear()
-        self.recorder.record()
-        self.player.play()
         async def runner() -> None:
             tasks = [
@@ -76,18 +46,26 @@ class LocalStream:
     def stop(self) -> None:
         """Stop the stream and underlying GStreamer pipelines."""
         self._stop_event.set()
-        self.recorder.stop()
-        self.player.stop()
     async def record_loop(self) -> None:
         """Read mic frames from the recorder and forward them to the handler."""
         logger.info("Starting receive loop")
         while not self._stop_event.is_set():
-            data = self.recorder.get_sample()
             if data is not None:
-                frame = np.frombuffer(data, dtype=np.int16).squeeze()
-                await self.handler.receive((0, frame))
-            await asyncio.sleep(0)  # yield to event loop
     async def play_loop(self) -> None:
         """Fetch outputs from the handler: log text and play audio frames."""
@@ -97,15 +75,20 @@ class LocalStream:
             if isinstance(data, AdditionalOutputs):
                 for msg in data.args:
                     content = msg.get("content", "")
-                    logger.info(
-                        "role=%s content=%s",
-                        msg.get("role"),
-                        content if len(content) < 500 else content[:500] + "…",
-                    )
             elif isinstance(data, tuple):
-                _, frame = data
-                self.player.push_sample(frame.tobytes())
             # else: ignore None/unknown outputs

+"""Bidirectional local audio stream.
+records mic frames to the handler and plays handler audio frames to the speaker.
+"""
 import asyncio
 import logging
+import librosa
+from fastrtc import AdditionalOutputs, audio_to_int16, audio_to_float32
+from reachy_mini import ReachyMini
 from reachy_mini_conversation_demo.openai_realtime import OpenaiRealtimeHandler
 logger = logging.getLogger(__name__)
 class LocalStream:
+    """LocalStream using Reachy Mini's recorder/player."""
+    def __init__(self, handler: OpenaiRealtimeHandler, robot: ReachyMini):
         """Initialize the stream with an OpenAI realtime handler and pipelines."""
         self.handler = handler
+        self._robot = robot
         self._stop_event = asyncio.Event()
         # Allow the handler to flush the player queue when appropriate.
         self.handler._clear_queue = self.clear_queue  # type: ignore[assignment]
+    def launch(self) -> None:
+        """Start the recorder/player and run the async processing loops."""
         self._stop_event.clear()
+        self._robot.media.start_recording()
+        self._robot.media.start_playing()
         async def runner() -> None:
             tasks = [
     def stop(self) -> None:
         """Stop the stream and underlying GStreamer pipelines."""
         self._stop_event.set()
+        self._robot.media.stop_recording()
+        self._robot.media.stop_playing()
+    def clear_queue(self) -> None:
+        """Flush the player's appsrc to drop any queued audio immediately."""
+        logger.info("User intervention: flushing player queue")
+        self.handler.output_queue = asyncio.Queue()
     async def record_loop(self) -> None:
         """Read mic frames from the recorder and forward them to the handler."""
         logger.info("Starting receive loop")
         while not self._stop_event.is_set():
+            data = self._robot.media.get_audio_sample()
             if data is not None:
+                frame_mono = data.T[0]  # both channels are identical
+                frame = audio_to_int16(frame_mono)
+                await self.handler.receive((16000, frame))
+                # await asyncio.sleep(0)  # yield to event loop
+            else:
+                await asyncio.sleep(0.01)  # avoid busy loop
     async def play_loop(self) -> None:
         """Fetch outputs from the handler: log text and play audio frames."""
             if isinstance(data, AdditionalOutputs):
                 for msg in data.args:
                     content = msg.get("content", "")
+                    if isinstance(content, str):
+                        logger.info(
+                            "role=%s content=%s",
+                            msg.get("role"),
+                            content if len(content) < 500 else content[:500] + "…",
+                        )
             elif isinstance(data, tuple):
+                sample_rate, frame = data
+                device_sample_rate = self._robot.media.get_audio_samplerate()
+                frame = audio_to_float32(frame.squeeze())
+                if sample_rate != device_sample_rate:
+                    frame = librosa.resample(frame, orig_sr=sample_rate, target_sr=device_sample_rate)
+                self._robot.media.push_audio_sample(frame)
             # else: ignore None/unknown outputs

src/reachy_mini_conversation_demo/main.py CHANGED Viewed

@@ -3,8 +3,8 @@
 import os
 import gradio as gr
-import fastrtc
 from reachy_mini import ReachyMini
 from reachy_mini_conversation_demo.moves import MovementManager
@@ -63,20 +63,23 @@ def main():
     logger.debug(f"Chatbot avatar images: {chatbot.avatar_images}")
     handler = OpenaiRealtimeHandler(deps)
-    local_stream = LocalStream(handler)
-    # stream = fastrtc.Stream(
-    #     handler=handler,
-    #     mode="send-receive",
-    #     modality="audio",
-    #     additional_inputs=[chatbot],
-    #     additional_outputs=[chatbot],
-    #     additional_outputs_handler=update_chatbot,
-    #     ui_args={"title": "Talk with Reachy Mini"},
-    # )
-    # app = fastrtc.FastAPI()
-    # app = gr.mount_gradio_app(app, stream.ui, path="/")
     # Each async service → its own thread/loop
     movement_manager.start()
@@ -85,11 +88,10 @@ def main():
         camera_worker.start()
     try:
-        local_stream.start()
-        # stream.ui.launch()
     except KeyboardInterrupt:
         logger.info("Exiting...")
-        local_stream.stop()
     finally:
         movement_manager.stop()
         head_wobbler.stop()

 import os
 import gradio as gr
+from fastapi import FastAPI
+from fastrtc import Stream
 from reachy_mini import ReachyMini
 from reachy_mini_conversation_demo.moves import MovementManager
     logger.debug(f"Chatbot avatar images: {chatbot.avatar_images}")
     handler = OpenaiRealtimeHandler(deps)
+    stream_manager = None
+    if args.gradio:
+        stream = Stream(
+            handler=handler,
+            mode="send-receive",
+            modality="audio",
+            additional_inputs=[chatbot],
+            additional_outputs=[chatbot],
+            additional_outputs_handler=update_chatbot,
+            ui_args={"title": "Talk with Reachy Mini"},
+        )
+        stream_manager = stream.ui
+        app = FastAPI()
+        app = gr.mount_gradio_app(app, stream.ui, path="/")
+    else:
+        stream_manager = LocalStream(handler, robot)
     # Each async service → its own thread/loop
     movement_manager.start()
         camera_worker.start()
     try:
+        stream_manager.launch()
     except KeyboardInterrupt:
         logger.info("Exiting...")
+        stream_manager.stop()
     finally:
         movement_manager.stop()
         head_wobbler.stop()

src/reachy_mini_conversation_demo/openai_realtime.py CHANGED Viewed

@@ -27,8 +27,8 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
         """Initialize the handler."""
         super().__init__(
             expected_layout="mono",
-            output_sample_rate=24000,
-            input_sample_rate=24000,
         )
         self.deps = deps

         """Initialize the handler."""
         super().__init__(
             expected_layout="mono",
+            output_sample_rate=24000,  # openai outputs
+            input_sample_rate=16000,  # respeaker output
         )
         self.deps = deps