Spaces:
Running
Running
RemiFabre commited on
Commit ·
41c2517
1
Parent(s): 46fd62d
Test: copy ALL files from conv app to match exactly
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitignore +1 -1
- index.html +122 -39
- pyproject.toml +107 -44
- src/reachy_mini_conversation_app/__init__.py +1 -0
- src/reachy_mini_conversation_app/audio/__init__.py +1 -0
- src/reachy_mini_conversation_app/audio/head_wobbler.py +181 -0
- src/reachy_mini_conversation_app/audio/speech_tapper.py +268 -0
- src/reachy_mini_conversation_app/camera_worker.py +241 -0
- src/reachy_mini_conversation_app/config.py +60 -0
- src/reachy_mini_conversation_app/console.py +499 -0
- src/reachy_mini_conversation_app/dance_emotion_moves.py +154 -0
- src/reachy_mini_conversation_app/gradio_personality.py +301 -0
- src/reachy_mini_conversation_app/headless_personality.py +102 -0
- src/reachy_mini_conversation_app/headless_personality_ui.py +276 -0
- src/reachy_mini_conversation_app/images/reachymini_avatar.png +3 -0
- src/reachy_mini_conversation_app/images/user_avatar.png +3 -0
- src/reachy_mini_conversation_app/main.py +253 -0
- src/reachy_mini_conversation_app/moves.py +849 -0
- src/reachy_mini_conversation_app/openai_realtime.py +719 -0
- src/reachy_mini_conversation_app/profiles/__init__.py +1 -0
- src/reachy_mini_conversation_app/profiles/cosmic_kitchen/instructions.txt +49 -0
- src/reachy_mini_conversation_app/profiles/cosmic_kitchen/tools.txt +8 -0
- src/reachy_mini_conversation_app/profiles/default/instructions.txt +1 -0
- src/reachy_mini_conversation_app/profiles/default/tools.txt +8 -0
- src/reachy_mini_conversation_app/profiles/example/instructions.txt +3 -0
- src/reachy_mini_conversation_app/profiles/example/sweep_look.py +127 -0
- src/reachy_mini_conversation_app/profiles/example/tools.txt +13 -0
- src/reachy_mini_conversation_app/profiles/mars_rover/instructions.txt +25 -0
- src/reachy_mini_conversation_app/profiles/mars_rover/tools.txt +8 -0
- src/reachy_mini_conversation_app/profiles/short_bored_teenager/instructions.txt +1 -0
- src/reachy_mini_conversation_app/profiles/short_bored_teenager/tools.txt +8 -0
- src/reachy_mini_conversation_app/profiles/short_captain_circuit/instructions.txt +1 -0
- src/reachy_mini_conversation_app/profiles/short_captain_circuit/tools.txt +8 -0
- src/reachy_mini_conversation_app/profiles/short_chess_coach/instructions.txt +1 -0
- src/reachy_mini_conversation_app/profiles/short_chess_coach/tools.txt +8 -0
- src/reachy_mini_conversation_app/profiles/short_hype_bot/instructions.txt +1 -0
- src/reachy_mini_conversation_app/profiles/short_hype_bot/tools.txt +8 -0
- src/reachy_mini_conversation_app/profiles/short_mad_scientist_assistant/instructions.txt +1 -0
- src/reachy_mini_conversation_app/profiles/short_mad_scientist_assistant/tools.txt +8 -0
- src/reachy_mini_conversation_app/profiles/short_nature_documentarian/instructions.txt +1 -0
- src/reachy_mini_conversation_app/profiles/short_nature_documentarian/tools.txt +8 -0
- src/reachy_mini_conversation_app/profiles/short_noir_detective/instructions.txt +1 -0
- src/reachy_mini_conversation_app/profiles/short_noir_detective/tools.txt +8 -0
- src/reachy_mini_conversation_app/profiles/short_time_traveler/instructions.txt +1 -0
- src/reachy_mini_conversation_app/profiles/short_time_traveler/tools.txt +8 -0
- src/reachy_mini_conversation_app/profiles/short_victorian_butler/instructions.txt +1 -0
- src/reachy_mini_conversation_app/profiles/short_victorian_butler/tools.txt +8 -0
- src/reachy_mini_conversation_app/profiles/sorry_bro/instructions.txt +6 -0
- src/reachy_mini_conversation_app/profiles/sorry_bro/tools.txt +8 -0
- src/reachy_mini_conversation_app/prompts.py +104 -0
.gitignore
CHANGED
|
@@ -58,4 +58,4 @@ cache/
|
|
| 58 |
.nfs*
|
| 59 |
|
| 60 |
# User-created personalities (managed by UI)
|
| 61 |
-
src/
|
|
|
|
| 58 |
.nfs*
|
| 59 |
|
| 60 |
# User-created personalities (managed by UI)
|
| 61 |
+
src/reachy_mini_conversation_app/profiles/user_personalities/
|
index.html
CHANGED
|
@@ -1,42 +1,125 @@
|
|
| 1 |
<!doctype html>
|
| 2 |
-
<html
|
| 3 |
-
<head>
|
| 4 |
-
<meta charset="utf-8" />
|
| 5 |
-
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
| 6 |
-
<title>Test Conv Pipe</title>
|
| 7 |
-
<link rel="stylesheet" href="style.css" />
|
| 8 |
-
</head>
|
| 9 |
-
<body>
|
| 10 |
-
<div class="ambient"></div>
|
| 11 |
-
<div class="container">
|
| 12 |
-
<header class="hero">
|
| 13 |
-
<div class="pill">Reachy Mini App</div>
|
| 14 |
-
<h1>Test Conv Pipe</h1>
|
| 15 |
-
<p class="subtitle">A conversation app for Reachy Mini robot.</p>
|
| 16 |
-
</header>
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
reachy-mini-daemon --sim # in another terminal
|
| 28 |
-
python -m test_conv_pipe</code></pre>
|
| 29 |
-
</div>
|
| 30 |
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
</
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
<!doctype html>
|
| 2 |
+
<html>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
+
<head>
|
| 5 |
+
<meta charset="utf-8" />
|
| 6 |
+
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
| 7 |
+
<title>Reachy Mini Conversation App</title>
|
| 8 |
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 9 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
| 10 |
+
<link href="https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@400;500;600;700&family=Manrope:wght@400;500;600&display=swap" rel="stylesheet">
|
| 11 |
+
<link rel="stylesheet" href="style.css" />
|
| 12 |
+
</head>
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
+
<body>
|
| 15 |
+
<header class="hero">
|
| 16 |
+
<div class="topline">
|
| 17 |
+
<div class="brand">
|
| 18 |
+
<span class="logo">🤖</span>
|
| 19 |
+
<span class="brand-name">Reachy Mini</span>
|
| 20 |
+
</div>
|
| 21 |
+
<div class="pill">Realtime voice · Vision aware · Expressive motion</div>
|
| 22 |
+
</div>
|
| 23 |
+
<div class="hero-grid">
|
| 24 |
+
<div class="hero-copy">
|
| 25 |
+
<p class="eyebrow">Conversation App</p>
|
| 26 |
+
<h1>Talk, see, and move together.</h1>
|
| 27 |
+
<p class="lede">
|
| 28 |
+
A friendly, camera-aware companion for Reachy Mini. Chat out loud, watch it follow faces, dance, or react with recorded emotions—all while streaming transcripts in a clean web UI.
|
| 29 |
+
</p>
|
| 30 |
+
<div class="hero-actions">
|
| 31 |
+
<a class="btn primary" href="#highlights">Explore features</a>
|
| 32 |
+
<a class="btn ghost" href="#story">See how it feels</a>
|
| 33 |
+
</div>
|
| 34 |
+
<div class="hero-badges">
|
| 35 |
+
<span>Low-latency voice loop</span>
|
| 36 |
+
<span>Camera insights on demand</span>
|
| 37 |
+
<span>Choreographed dances & emotions</span>
|
| 38 |
+
<span>Personality profiles via web UI</span>
|
| 39 |
+
</div>
|
| 40 |
+
</div>
|
| 41 |
+
<div class="hero-visual">
|
| 42 |
+
<div class="glass-card">
|
| 43 |
+
<img src="docs/assets/reachy_mini_dance.gif" alt="Reachy Mini dancing" class="hero-gif">
|
| 44 |
+
<p class="caption">Reachy Mini can move, dance, and emote while holding a natural conversation.</p>
|
| 45 |
+
</div>
|
| 46 |
+
</div>
|
| 47 |
+
</div>
|
| 48 |
+
</header>
|
| 49 |
+
|
| 50 |
+
<section id="highlights" class="section features">
|
| 51 |
+
<div class="section-header">
|
| 52 |
+
<p class="eyebrow">What’s inside</p>
|
| 53 |
+
<h2>All-in-one conversational layer for your robot</h2>
|
| 54 |
+
<p class="intro">
|
| 55 |
+
The app blends realtime speech, vision, and motion so Reachy Mini feels present..
|
| 56 |
+
</p>
|
| 57 |
+
</div>
|
| 58 |
+
<div class="feature-grid">
|
| 59 |
+
<div class="feature-card">
|
| 60 |
+
<span class="icon">🎤</span>
|
| 61 |
+
<h3>Natural voice chat</h3>
|
| 62 |
+
<p>Talk freely and get fast, high-quality replies powered by realtime models.</p>
|
| 63 |
+
</div>
|
| 64 |
+
<div class="feature-card">
|
| 65 |
+
<span class="icon">🎥</span>
|
| 66 |
+
<h3>Vision-aware replies</h3>
|
| 67 |
+
<p>Ask the camera tool to see what’s in front, track a face, or keep attention on whoever is speaking.</p>
|
| 68 |
+
</div>
|
| 69 |
+
<div class="feature-card">
|
| 70 |
+
<span class="icon">💃</span>
|
| 71 |
+
<h3>Expressive motion</h3>
|
| 72 |
+
<p>Queue dances, play recorded emotions while Reachy listens and talks.</p>
|
| 73 |
+
</div>
|
| 74 |
+
<div class="feature-card">
|
| 75 |
+
<span class="icon">🧠</span>
|
| 76 |
+
<h3>Personalities on demand</h3>
|
| 77 |
+
<p>Switch conversation styles through profiles and decide which tools (dance, camera, tracking) each persona can use.</p>
|
| 78 |
+
</div>
|
| 79 |
+
<div class="feature-card">
|
| 80 |
+
<span class="icon">🌐</span>
|
| 81 |
+
<h3>Ready for your setup</h3>
|
| 82 |
+
<p>Works with wired or wireless Reachy Mini, and can run vision locally or through the default cloud model.</p>
|
| 83 |
+
</div>
|
| 84 |
+
</div>
|
| 85 |
+
</section>
|
| 86 |
+
|
| 87 |
+
<section id="story" class="section story">
|
| 88 |
+
<div class="story-grid">
|
| 89 |
+
<div class="story-card">
|
| 90 |
+
<p class="eyebrow">How it feels</p>
|
| 91 |
+
<h3>From hello to helpful in seconds</h3>
|
| 92 |
+
<ul class="story-list">
|
| 93 |
+
<li><span>👋</span> Say “Hey Reachy” and start chatting—no extra setup in the moment.</li>
|
| 94 |
+
<li><span>👀</span> Ask what it sees; it can peek through the camera or keep focus on your face.</li>
|
| 95 |
+
<li><span>🎭</span> Trigger emotions or dance breaks to keep the conversation lively.</li>
|
| 96 |
+
<li><span>📝</span> Follow along with live transcripts in the web UI or run audio-only from the console.</li>
|
| 97 |
+
</ul>
|
| 98 |
+
</div>
|
| 99 |
+
<div class="story-card secondary">
|
| 100 |
+
<p class="eyebrow">Where it shines</p>
|
| 101 |
+
<h3>Great for demos, teaching, and playful exploration</h3>
|
| 102 |
+
<p class="story-text">
|
| 103 |
+
Show off how Reachy Mini listens, responds, and moves in sync. Whether you’re guiding a class, hosting a booth, or experimenting at home, the app keeps the robot expressive without juggling scripts or joystick controls.
|
| 104 |
+
</p>
|
| 105 |
+
<div class="chips">
|
| 106 |
+
<span class="chip">Live conversation</span>
|
| 107 |
+
<span class="chip">Face tracking</span>
|
| 108 |
+
<span class="chip">Camera tool</span>
|
| 109 |
+
<span class="chip">Dance library</span>
|
| 110 |
+
<span class="chip">Profiles & tools</span>
|
| 111 |
+
</div>
|
| 112 |
+
</div>
|
| 113 |
+
</div>
|
| 114 |
+
</section>
|
| 115 |
+
|
| 116 |
+
<footer class="footer">
|
| 117 |
+
<p>
|
| 118 |
+
Reachy Mini Conversation App by <a href="https://github.com/pollen-robotics" target="_blank" rel="noopener">Pollen Robotics</a>.
|
| 119 |
+
Explore more apps on <a href="https://huggingface.co/spaces/pollen-robotics/Reachy_Mini_Apps" target="_blank" rel="noopener">Hugging Face Spaces</a>.
|
| 120 |
+
</p>
|
| 121 |
+
</footer>
|
| 122 |
+
|
| 123 |
+
</body>
|
| 124 |
+
|
| 125 |
+
</html>
|
pyproject.toml
CHANGED
|
@@ -1,58 +1,124 @@
|
|
| 1 |
[build-system]
|
| 2 |
-
requires = [
|
| 3 |
build-backend = "setuptools.build_meta"
|
| 4 |
|
| 5 |
[project]
|
| 6 |
-
name = "
|
| 7 |
version = "0.2.2"
|
|
|
|
| 8 |
description = ""
|
| 9 |
readme = "README.md"
|
| 10 |
requires-python = ">=3.10"
|
| 11 |
-
dependencies = [
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
-
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
[project.optional-dependencies]
|
| 20 |
-
reachy_mini_wireless = [
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
[project.scripts]
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
[tool.setuptools]
|
|
|
|
| 30 |
include-package-data = true
|
| 31 |
|
| 32 |
-
[tool.
|
| 33 |
-
|
| 34 |
-
exclude = [ ".venv", "dist", "build", "**/__pycache__", "*.egg-info", ".mypy_cache", ".pytest_cache",]
|
| 35 |
-
|
| 36 |
-
[tool.mypy]
|
| 37 |
-
python_version = "3.12"
|
| 38 |
-
files = [ "src/",]
|
| 39 |
-
ignore_missing_imports = true
|
| 40 |
-
strict = true
|
| 41 |
-
show_error_codes = true
|
| 42 |
-
warn_unused_ignores = true
|
| 43 |
-
|
| 44 |
-
[project.entry-points.reachy_mini_apps]
|
| 45 |
-
test_conv_pipe = "test_conv_pipe.main:TestConvPipe"
|
| 46 |
-
|
| 47 |
-
[tool.setuptools.package-dir]
|
| 48 |
-
"" = "src"
|
| 49 |
|
| 50 |
[tool.setuptools.package-data]
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
[tool.ruff.lint]
|
| 54 |
-
select = [
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
[tool.ruff.format]
|
| 58 |
quote-style = "double"
|
|
@@ -60,13 +126,10 @@ indent-style = "space"
|
|
| 60 |
skip-magic-trailing-comma = false
|
| 61 |
line-ending = "auto"
|
| 62 |
|
| 63 |
-
[tool.
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
known-local-folder = [ "test_conv_pipe",]
|
| 71 |
-
known-first-party = [ "reachy_mini", "reachy_mini_dances_library", "reachy_mini_toolbox",]
|
| 72 |
-
split-on-trailing-comma = true
|
|
|
|
| 1 |
[build-system]
|
| 2 |
+
requires = ["setuptools"]
|
| 3 |
build-backend = "setuptools.build_meta"
|
| 4 |
|
| 5 |
[project]
|
| 6 |
+
name = "reachy_mini_conversation_app"
|
| 7 |
version = "0.2.2"
|
| 8 |
+
authors = [{ name = "Pollen Robotics", email = "contact@pollen-robotics.com" }]
|
| 9 |
description = ""
|
| 10 |
readme = "README.md"
|
| 11 |
requires-python = ">=3.10"
|
| 12 |
+
dependencies = [
|
| 13 |
+
#Media
|
| 14 |
+
"aiortc>=1.13.0",
|
| 15 |
+
"fastrtc>=0.0.34",
|
| 16 |
+
"gradio==5.50.1.dev1",
|
| 17 |
+
"huggingface-hub==1.3.0",
|
| 18 |
+
"opencv-python>=4.12.0.88",
|
| 19 |
|
| 20 |
+
#Environment variables
|
| 21 |
+
"python-dotenv",
|
| 22 |
+
|
| 23 |
+
#OpenAI
|
| 24 |
+
"openai>=2.1",
|
| 25 |
+
|
| 26 |
+
#Reachy mini
|
| 27 |
+
"reachy_mini_dances_library",
|
| 28 |
+
"reachy_mini_toolbox",
|
| 29 |
+
"reachy-mini >= 1.2.11",
|
| 30 |
+
"eclipse-zenoh~=1.7.0",
|
| 31 |
+
"gradio_client>=1.13.3",
|
| 32 |
+
]
|
| 33 |
|
| 34 |
[project.optional-dependencies]
|
| 35 |
+
reachy_mini_wireless = [
|
| 36 |
+
"PyGObject>=3.42.2,<=3.46.0",
|
| 37 |
+
"gst-signalling>=1.1.2",
|
| 38 |
+
]
|
| 39 |
+
local_vision = [
|
| 40 |
+
"torch>=2.1",
|
| 41 |
+
"transformers==5.0.0rc2",
|
| 42 |
+
"num2words",
|
| 43 |
+
]
|
| 44 |
+
yolo_vision = [
|
| 45 |
+
"ultralytics",
|
| 46 |
+
"supervision",
|
| 47 |
+
]
|
| 48 |
+
mediapipe_vision = [
|
| 49 |
+
"mediapipe==0.10.14",
|
| 50 |
+
]
|
| 51 |
+
all_vision = [
|
| 52 |
+
"torch>=2.1",
|
| 53 |
+
"transformers==5.0.0rc2",
|
| 54 |
+
"num2words",
|
| 55 |
+
"ultralytics",
|
| 56 |
+
"supervision",
|
| 57 |
+
"mediapipe==0.10.14",
|
| 58 |
+
]
|
| 59 |
+
|
| 60 |
+
[dependency-groups]
|
| 61 |
+
dev = [
|
| 62 |
+
"pytest",
|
| 63 |
+
"pytest-asyncio",
|
| 64 |
+
"ruff==0.12.0",
|
| 65 |
+
"mypy==1.18.2",
|
| 66 |
+
"pre-commit",
|
| 67 |
+
"types-requests",
|
| 68 |
+
"python-semantic-release>=10.5.3",
|
| 69 |
+
]
|
| 70 |
|
| 71 |
[project.scripts]
|
| 72 |
+
reachy-mini-conversation-app = "reachy_mini_conversation_app.main:main"
|
| 73 |
+
|
| 74 |
+
[project.entry-points."reachy_mini_apps"]
|
| 75 |
+
reachy_mini_conversation_app = "reachy_mini_conversation_app.main:ReachyMiniConversationApp"
|
| 76 |
|
| 77 |
[tool.setuptools]
|
| 78 |
+
package-dir = { "" = "src" }
|
| 79 |
include-package-data = true
|
| 80 |
|
| 81 |
+
[tool.setuptools.packages.find]
|
| 82 |
+
where = ["src"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
|
| 84 |
[tool.setuptools.package-data]
|
| 85 |
+
reachy_mini_conversation_app = [
|
| 86 |
+
"images/*",
|
| 87 |
+
"static/*",
|
| 88 |
+
".env.example",
|
| 89 |
+
"demos/**/*.txt",
|
| 90 |
+
"prompts_library/*.txt",
|
| 91 |
+
"profiles/**/*.txt",
|
| 92 |
+
"prompts/**/*.txt",
|
| 93 |
+
]
|
| 94 |
+
|
| 95 |
+
[tool.ruff]
|
| 96 |
+
line-length = 119
|
| 97 |
+
exclude = [".venv", "dist", "build", "**/__pycache__", "*.egg-info", ".mypy_cache", ".pytest_cache"]
|
| 98 |
|
| 99 |
[tool.ruff.lint]
|
| 100 |
+
select = [
|
| 101 |
+
"E", # pycodestyle errors
|
| 102 |
+
"F", # pyflakes
|
| 103 |
+
"W", # pycodestyle warnings
|
| 104 |
+
"I", # isort
|
| 105 |
+
"C4", # flake8-comprehensions
|
| 106 |
+
"D", # pydocstyle
|
| 107 |
+
]
|
| 108 |
+
ignore = [
|
| 109 |
+
"E501", # handled by formatter
|
| 110 |
+
"D100", # ignore missing module docstrings
|
| 111 |
+
"D203", # blank line before class docstring (conflicts with D211)
|
| 112 |
+
"D213", # summary on second line (conflicts with D212)
|
| 113 |
+
]
|
| 114 |
+
|
| 115 |
+
[tool.ruff.lint.isort]
|
| 116 |
+
length-sort = true
|
| 117 |
+
lines-after-imports = 2
|
| 118 |
+
no-lines-before = ["standard-library", "local-folder"]
|
| 119 |
+
known-local-folder = ["reachy_mini_conversation_app"]
|
| 120 |
+
known-first-party = ["reachy_mini", "reachy_mini_dances_library", "reachy_mini_toolbox"]
|
| 121 |
+
split-on-trailing-comma = true
|
| 122 |
|
| 123 |
[tool.ruff.format]
|
| 124 |
quote-style = "double"
|
|
|
|
| 126 |
skip-magic-trailing-comma = false
|
| 127 |
line-ending = "auto"
|
| 128 |
|
| 129 |
+
[tool.mypy]
|
| 130 |
+
python_version = "3.12"
|
| 131 |
+
files = ["src/"]
|
| 132 |
+
ignore_missing_imports = true
|
| 133 |
+
strict = true
|
| 134 |
+
show_error_codes = true
|
| 135 |
+
warn_unused_ignores = true
|
|
|
|
|
|
|
|
|
src/reachy_mini_conversation_app/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Nothing (for ruff)."""
|
src/reachy_mini_conversation_app/audio/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Nothing (for ruff)."""
|
src/reachy_mini_conversation_app/audio/head_wobbler.py
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Moves head given audio samples."""
|
| 2 |
+
|
| 3 |
+
import time
|
| 4 |
+
import queue
|
| 5 |
+
import base64
|
| 6 |
+
import logging
|
| 7 |
+
import threading
|
| 8 |
+
from typing import Tuple
|
| 9 |
+
from collections.abc import Callable
|
| 10 |
+
|
| 11 |
+
import numpy as np
|
| 12 |
+
from numpy.typing import NDArray
|
| 13 |
+
|
| 14 |
+
from reachy_mini_conversation_app.audio.speech_tapper import HOP_MS, SwayRollRT
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
SAMPLE_RATE = 24000
|
| 18 |
+
MOVEMENT_LATENCY_S = 0.2 # seconds between audio and robot movement
|
| 19 |
+
logger = logging.getLogger(__name__)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class HeadWobbler:
|
| 23 |
+
"""Converts audio deltas (base64) into head movement offsets."""
|
| 24 |
+
|
| 25 |
+
def __init__(self, set_speech_offsets: Callable[[Tuple[float, float, float, float, float, float]], None]) -> None:
|
| 26 |
+
"""Initialize the head wobbler."""
|
| 27 |
+
self._apply_offsets = set_speech_offsets
|
| 28 |
+
self._base_ts: float | None = None
|
| 29 |
+
self._hops_done: int = 0
|
| 30 |
+
|
| 31 |
+
self.audio_queue: "queue.Queue[Tuple[int, int, NDArray[np.int16]]]" = queue.Queue()
|
| 32 |
+
self.sway = SwayRollRT()
|
| 33 |
+
|
| 34 |
+
# Synchronization primitives
|
| 35 |
+
self._state_lock = threading.Lock()
|
| 36 |
+
self._sway_lock = threading.Lock()
|
| 37 |
+
self._generation = 0
|
| 38 |
+
|
| 39 |
+
self._stop_event = threading.Event()
|
| 40 |
+
self._thread: threading.Thread | None = None
|
| 41 |
+
|
| 42 |
+
def feed(self, delta_b64: str) -> None:
|
| 43 |
+
"""Thread-safe: push audio into the consumer queue."""
|
| 44 |
+
buf = np.frombuffer(base64.b64decode(delta_b64), dtype=np.int16).reshape(1, -1)
|
| 45 |
+
with self._state_lock:
|
| 46 |
+
generation = self._generation
|
| 47 |
+
self.audio_queue.put((generation, SAMPLE_RATE, buf))
|
| 48 |
+
|
| 49 |
+
def start(self) -> None:
|
| 50 |
+
"""Start the head wobbler loop in a thread."""
|
| 51 |
+
self._stop_event.clear()
|
| 52 |
+
self._thread = threading.Thread(target=self.working_loop, daemon=True)
|
| 53 |
+
self._thread.start()
|
| 54 |
+
logger.debug("Head wobbler started")
|
| 55 |
+
|
| 56 |
+
def stop(self) -> None:
|
| 57 |
+
"""Stop the head wobbler loop."""
|
| 58 |
+
self._stop_event.set()
|
| 59 |
+
if self._thread is not None:
|
| 60 |
+
self._thread.join()
|
| 61 |
+
logger.debug("Head wobbler stopped")
|
| 62 |
+
|
| 63 |
+
def working_loop(self) -> None:
|
| 64 |
+
"""Convert audio deltas into head movement offsets."""
|
| 65 |
+
hop_dt = HOP_MS / 1000.0
|
| 66 |
+
|
| 67 |
+
logger.debug("Head wobbler thread started")
|
| 68 |
+
while not self._stop_event.is_set():
|
| 69 |
+
queue_ref = self.audio_queue
|
| 70 |
+
try:
|
| 71 |
+
chunk_generation, sr, chunk = queue_ref.get_nowait() # (gen, sr, data)
|
| 72 |
+
except queue.Empty:
|
| 73 |
+
# avoid while to never exit
|
| 74 |
+
time.sleep(MOVEMENT_LATENCY_S)
|
| 75 |
+
continue
|
| 76 |
+
|
| 77 |
+
try:
|
| 78 |
+
with self._state_lock:
|
| 79 |
+
current_generation = self._generation
|
| 80 |
+
if chunk_generation != current_generation:
|
| 81 |
+
continue
|
| 82 |
+
|
| 83 |
+
if self._base_ts is None:
|
| 84 |
+
with self._state_lock:
|
| 85 |
+
if self._base_ts is None:
|
| 86 |
+
self._base_ts = time.monotonic()
|
| 87 |
+
|
| 88 |
+
pcm = np.asarray(chunk).squeeze(0)
|
| 89 |
+
with self._sway_lock:
|
| 90 |
+
results = self.sway.feed(pcm, sr)
|
| 91 |
+
|
| 92 |
+
i = 0
|
| 93 |
+
while i < len(results):
|
| 94 |
+
with self._state_lock:
|
| 95 |
+
if self._generation != current_generation:
|
| 96 |
+
break
|
| 97 |
+
base_ts = self._base_ts
|
| 98 |
+
hops_done = self._hops_done
|
| 99 |
+
|
| 100 |
+
if base_ts is None:
|
| 101 |
+
base_ts = time.monotonic()
|
| 102 |
+
with self._state_lock:
|
| 103 |
+
if self._base_ts is None:
|
| 104 |
+
self._base_ts = base_ts
|
| 105 |
+
hops_done = self._hops_done
|
| 106 |
+
|
| 107 |
+
target = base_ts + MOVEMENT_LATENCY_S + hops_done * hop_dt
|
| 108 |
+
now = time.monotonic()
|
| 109 |
+
|
| 110 |
+
if now - target >= hop_dt:
|
| 111 |
+
lag_hops = int((now - target) / hop_dt)
|
| 112 |
+
drop = min(lag_hops, len(results) - i - 1)
|
| 113 |
+
if drop > 0:
|
| 114 |
+
with self._state_lock:
|
| 115 |
+
self._hops_done += drop
|
| 116 |
+
hops_done = self._hops_done
|
| 117 |
+
i += drop
|
| 118 |
+
continue
|
| 119 |
+
|
| 120 |
+
if target > now:
|
| 121 |
+
time.sleep(target - now)
|
| 122 |
+
with self._state_lock:
|
| 123 |
+
if self._generation != current_generation:
|
| 124 |
+
break
|
| 125 |
+
|
| 126 |
+
r = results[i]
|
| 127 |
+
offsets = (
|
| 128 |
+
r["x_mm"] / 1000.0,
|
| 129 |
+
r["y_mm"] / 1000.0,
|
| 130 |
+
r["z_mm"] / 1000.0,
|
| 131 |
+
r["roll_rad"],
|
| 132 |
+
r["pitch_rad"],
|
| 133 |
+
r["yaw_rad"],
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
with self._state_lock:
|
| 137 |
+
if self._generation != current_generation:
|
| 138 |
+
break
|
| 139 |
+
|
| 140 |
+
self._apply_offsets(offsets)
|
| 141 |
+
|
| 142 |
+
with self._state_lock:
|
| 143 |
+
self._hops_done += 1
|
| 144 |
+
i += 1
|
| 145 |
+
finally:
|
| 146 |
+
queue_ref.task_done()
|
| 147 |
+
logger.debug("Head wobbler thread exited")
|
| 148 |
+
|
| 149 |
+
'''
|
| 150 |
+
def drain_audio_queue(self) -> None:
|
| 151 |
+
"""Empty the audio queue."""
|
| 152 |
+
try:
|
| 153 |
+
while True:
|
| 154 |
+
self.audio_queue.get_nowait()
|
| 155 |
+
except QueueEmpty:
|
| 156 |
+
pass
|
| 157 |
+
'''
|
| 158 |
+
|
| 159 |
+
def reset(self) -> None:
|
| 160 |
+
"""Reset the internal state."""
|
| 161 |
+
with self._state_lock:
|
| 162 |
+
self._generation += 1
|
| 163 |
+
self._base_ts = None
|
| 164 |
+
self._hops_done = 0
|
| 165 |
+
|
| 166 |
+
# Drain any queued audio chunks from previous generations
|
| 167 |
+
drained_any = False
|
| 168 |
+
while True:
|
| 169 |
+
try:
|
| 170 |
+
_, _, _ = self.audio_queue.get_nowait()
|
| 171 |
+
except queue.Empty:
|
| 172 |
+
break
|
| 173 |
+
else:
|
| 174 |
+
drained_any = True
|
| 175 |
+
self.audio_queue.task_done()
|
| 176 |
+
|
| 177 |
+
with self._sway_lock:
|
| 178 |
+
self.sway.reset()
|
| 179 |
+
|
| 180 |
+
if drained_any:
|
| 181 |
+
logger.debug("Head wobbler queue drained during reset")
|
src/reachy_mini_conversation_app/audio/speech_tapper.py
ADDED
|
@@ -0,0 +1,268 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import math
|
| 3 |
+
from typing import Any, Dict, List
|
| 4 |
+
from itertools import islice
|
| 5 |
+
from collections import deque
|
| 6 |
+
|
| 7 |
+
import numpy as np
|
| 8 |
+
from numpy.typing import NDArray
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
# Tunables
|
| 12 |
+
SR = 16_000
|
| 13 |
+
FRAME_MS = 20
|
| 14 |
+
HOP_MS = 50
|
| 15 |
+
|
| 16 |
+
SWAY_MASTER = 1.5
|
| 17 |
+
SENS_DB_OFFSET = +4.0
|
| 18 |
+
VAD_DB_ON = -35.0
|
| 19 |
+
VAD_DB_OFF = -45.0
|
| 20 |
+
VAD_ATTACK_MS = 40
|
| 21 |
+
VAD_RELEASE_MS = 250
|
| 22 |
+
ENV_FOLLOW_GAIN = 0.65
|
| 23 |
+
|
| 24 |
+
SWAY_F_PITCH = 2.2
|
| 25 |
+
SWAY_A_PITCH_DEG = 4.5
|
| 26 |
+
SWAY_F_YAW = 0.6
|
| 27 |
+
SWAY_A_YAW_DEG = 7.5
|
| 28 |
+
SWAY_F_ROLL = 1.3
|
| 29 |
+
SWAY_A_ROLL_DEG = 2.25
|
| 30 |
+
SWAY_F_X = 0.35
|
| 31 |
+
SWAY_A_X_MM = 4.5
|
| 32 |
+
SWAY_F_Y = 0.45
|
| 33 |
+
SWAY_A_Y_MM = 3.75
|
| 34 |
+
SWAY_F_Z = 0.25
|
| 35 |
+
SWAY_A_Z_MM = 2.25
|
| 36 |
+
|
| 37 |
+
SWAY_DB_LOW = -46.0
|
| 38 |
+
SWAY_DB_HIGH = -18.0
|
| 39 |
+
LOUDNESS_GAMMA = 0.9
|
| 40 |
+
SWAY_ATTACK_MS = 50
|
| 41 |
+
SWAY_RELEASE_MS = 250
|
| 42 |
+
|
| 43 |
+
# Derived
|
| 44 |
+
FRAME = int(SR * FRAME_MS / 1000)
|
| 45 |
+
HOP = int(SR * HOP_MS / 1000)
|
| 46 |
+
ATTACK_FR = max(1, int(VAD_ATTACK_MS / HOP_MS))
|
| 47 |
+
RELEASE_FR = max(1, int(VAD_RELEASE_MS / HOP_MS))
|
| 48 |
+
SWAY_ATTACK_FR = max(1, int(SWAY_ATTACK_MS / HOP_MS))
|
| 49 |
+
SWAY_RELEASE_FR = max(1, int(SWAY_RELEASE_MS / HOP_MS))
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def _rms_dbfs(x: NDArray[np.float32]) -> float:
|
| 53 |
+
"""Root-mean-square in dBFS for float32 mono array in [-1,1]."""
|
| 54 |
+
# numerically stable rms (avoid overflow)
|
| 55 |
+
x = x.astype(np.float32, copy=False)
|
| 56 |
+
rms = np.sqrt(np.mean(x * x, dtype=np.float32) + 1e-12, dtype=np.float32)
|
| 57 |
+
return float(20.0 * math.log10(float(rms) + 1e-12))
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def _loudness_gain(db: float, offset: float = SENS_DB_OFFSET) -> float:
|
| 61 |
+
"""Normalize dB into [0,1] with gamma; clipped to [0,1]."""
|
| 62 |
+
t = (db + offset - SWAY_DB_LOW) / (SWAY_DB_HIGH - SWAY_DB_LOW)
|
| 63 |
+
if t < 0.0:
|
| 64 |
+
t = 0.0
|
| 65 |
+
elif t > 1.0:
|
| 66 |
+
t = 1.0
|
| 67 |
+
return t**LOUDNESS_GAMMA if LOUDNESS_GAMMA != 1.0 else t
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def _to_float32_mono(x: NDArray[Any]) -> NDArray[np.float32]:
|
| 71 |
+
"""Convert arbitrary PCM array to float32 mono in [-1,1].
|
| 72 |
+
|
| 73 |
+
Accepts shapes: (N,), (1,N), (N,1), (C,N), (N,C).
|
| 74 |
+
"""
|
| 75 |
+
a = np.asarray(x)
|
| 76 |
+
if a.ndim == 0:
|
| 77 |
+
return np.zeros(0, dtype=np.float32)
|
| 78 |
+
|
| 79 |
+
# If 2D, decide which axis is channels (prefer small first dim)
|
| 80 |
+
if a.ndim == 2:
|
| 81 |
+
# e.g., (channels, samples) if channels is small (<=8)
|
| 82 |
+
if a.shape[0] <= 8 and a.shape[0] <= a.shape[1]:
|
| 83 |
+
a = np.mean(a, axis=0)
|
| 84 |
+
else:
|
| 85 |
+
a = np.mean(a, axis=1)
|
| 86 |
+
elif a.ndim > 2:
|
| 87 |
+
a = np.mean(a.reshape(a.shape[0], -1), axis=0)
|
| 88 |
+
|
| 89 |
+
# Now 1D, cast/scale
|
| 90 |
+
if np.issubdtype(a.dtype, np.floating):
|
| 91 |
+
return a.astype(np.float32, copy=False)
|
| 92 |
+
# integer PCM
|
| 93 |
+
info = np.iinfo(a.dtype)
|
| 94 |
+
scale = float(max(-info.min, info.max))
|
| 95 |
+
return a.astype(np.float32) / (scale if scale != 0.0 else 1.0)
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def _resample_linear(x: NDArray[np.float32], sr_in: int, sr_out: int) -> NDArray[np.float32]:
|
| 99 |
+
"""Lightweight linear resampler for short buffers."""
|
| 100 |
+
if sr_in == sr_out or x.size == 0:
|
| 101 |
+
return x
|
| 102 |
+
# guard tiny sizes
|
| 103 |
+
n_out = int(round(x.size * sr_out / sr_in))
|
| 104 |
+
if n_out <= 1:
|
| 105 |
+
return np.zeros(0, dtype=np.float32)
|
| 106 |
+
t_in = np.linspace(0.0, 1.0, num=x.size, dtype=np.float32, endpoint=True)
|
| 107 |
+
t_out = np.linspace(0.0, 1.0, num=n_out, dtype=np.float32, endpoint=True)
|
| 108 |
+
return np.interp(t_out, t_in, x).astype(np.float32, copy=False)
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
class SwayRollRT:
|
| 112 |
+
"""Feed audio chunks → per-hop sway outputs.
|
| 113 |
+
|
| 114 |
+
Usage:
|
| 115 |
+
rt = SwayRollRT()
|
| 116 |
+
rt.feed(pcm_int16_or_float, sr) -> List[dict]
|
| 117 |
+
"""
|
| 118 |
+
|
| 119 |
+
def __init__(self, rng_seed: int = 7):
|
| 120 |
+
"""Initialize state."""
|
| 121 |
+
self._seed = int(rng_seed)
|
| 122 |
+
self.samples: deque[float] = deque(maxlen=10 * SR) # sliding window for VAD/env
|
| 123 |
+
self.carry: NDArray[np.float32] = np.zeros(0, dtype=np.float32)
|
| 124 |
+
|
| 125 |
+
self.vad_on = False
|
| 126 |
+
self.vad_above = 0
|
| 127 |
+
self.vad_below = 0
|
| 128 |
+
|
| 129 |
+
self.sway_env = 0.0
|
| 130 |
+
self.sway_up = 0
|
| 131 |
+
self.sway_down = 0
|
| 132 |
+
|
| 133 |
+
rng = np.random.default_rng(self._seed)
|
| 134 |
+
self.phase_pitch = float(rng.random() * 2 * math.pi)
|
| 135 |
+
self.phase_yaw = float(rng.random() * 2 * math.pi)
|
| 136 |
+
self.phase_roll = float(rng.random() * 2 * math.pi)
|
| 137 |
+
self.phase_x = float(rng.random() * 2 * math.pi)
|
| 138 |
+
self.phase_y = float(rng.random() * 2 * math.pi)
|
| 139 |
+
self.phase_z = float(rng.random() * 2 * math.pi)
|
| 140 |
+
self.t = 0.0
|
| 141 |
+
|
| 142 |
+
def reset(self) -> None:
|
| 143 |
+
"""Reset state (VAD/env/buffers/time) but keep initial phases/seed."""
|
| 144 |
+
self.samples.clear()
|
| 145 |
+
self.carry = np.zeros(0, dtype=np.float32)
|
| 146 |
+
self.vad_on = False
|
| 147 |
+
self.vad_above = 0
|
| 148 |
+
self.vad_below = 0
|
| 149 |
+
self.sway_env = 0.0
|
| 150 |
+
self.sway_up = 0
|
| 151 |
+
self.sway_down = 0
|
| 152 |
+
self.t = 0.0
|
| 153 |
+
|
| 154 |
+
def feed(self, pcm: NDArray[Any], sr: int | None) -> List[Dict[str, float]]:
|
| 155 |
+
"""Stream in PCM chunk. Returns a list of sway dicts, one per hop (HOP_MS).
|
| 156 |
+
|
| 157 |
+
Args:
|
| 158 |
+
pcm: np.ndarray, shape (N,) or (C,N)/(N,C); int or float.
|
| 159 |
+
sr: sample rate of `pcm` (None -> assume SR).
|
| 160 |
+
|
| 161 |
+
"""
|
| 162 |
+
sr_in = SR if sr is None else int(sr)
|
| 163 |
+
x = _to_float32_mono(pcm)
|
| 164 |
+
if x.size == 0:
|
| 165 |
+
return []
|
| 166 |
+
if sr_in != SR:
|
| 167 |
+
x = _resample_linear(x, sr_in, SR)
|
| 168 |
+
if x.size == 0:
|
| 169 |
+
return []
|
| 170 |
+
|
| 171 |
+
# append to carry and consume fixed HOP chunks
|
| 172 |
+
if self.carry.size:
|
| 173 |
+
self.carry = np.concatenate([self.carry, x])
|
| 174 |
+
else:
|
| 175 |
+
self.carry = x
|
| 176 |
+
|
| 177 |
+
out: List[Dict[str, float]] = []
|
| 178 |
+
|
| 179 |
+
while self.carry.size >= HOP:
|
| 180 |
+
hop = self.carry[:HOP]
|
| 181 |
+
remaining: NDArray[np.float32] = self.carry[HOP:]
|
| 182 |
+
self.carry = remaining
|
| 183 |
+
|
| 184 |
+
# keep sliding window for VAD/env computation
|
| 185 |
+
# (deque accepts any iterable; list() for small HOP is fine)
|
| 186 |
+
self.samples.extend(hop.tolist())
|
| 187 |
+
if len(self.samples) < FRAME:
|
| 188 |
+
self.t += HOP_MS / 1000.0
|
| 189 |
+
continue
|
| 190 |
+
|
| 191 |
+
frame = np.fromiter(
|
| 192 |
+
islice(self.samples, len(self.samples) - FRAME, len(self.samples)),
|
| 193 |
+
dtype=np.float32,
|
| 194 |
+
count=FRAME,
|
| 195 |
+
)
|
| 196 |
+
db = _rms_dbfs(frame)
|
| 197 |
+
|
| 198 |
+
# VAD with hysteresis + attack/release
|
| 199 |
+
if db >= VAD_DB_ON:
|
| 200 |
+
self.vad_above += 1
|
| 201 |
+
self.vad_below = 0
|
| 202 |
+
if not self.vad_on and self.vad_above >= ATTACK_FR:
|
| 203 |
+
self.vad_on = True
|
| 204 |
+
elif db <= VAD_DB_OFF:
|
| 205 |
+
self.vad_below += 1
|
| 206 |
+
self.vad_above = 0
|
| 207 |
+
if self.vad_on and self.vad_below >= RELEASE_FR:
|
| 208 |
+
self.vad_on = False
|
| 209 |
+
|
| 210 |
+
if self.vad_on:
|
| 211 |
+
self.sway_up = min(SWAY_ATTACK_FR, self.sway_up + 1)
|
| 212 |
+
self.sway_down = 0
|
| 213 |
+
else:
|
| 214 |
+
self.sway_down = min(SWAY_RELEASE_FR, self.sway_down + 1)
|
| 215 |
+
self.sway_up = 0
|
| 216 |
+
|
| 217 |
+
up = self.sway_up / SWAY_ATTACK_FR
|
| 218 |
+
down = 1.0 - (self.sway_down / SWAY_RELEASE_FR)
|
| 219 |
+
target = up if self.vad_on else down
|
| 220 |
+
self.sway_env += ENV_FOLLOW_GAIN * (target - self.sway_env)
|
| 221 |
+
# clamp
|
| 222 |
+
if self.sway_env < 0.0:
|
| 223 |
+
self.sway_env = 0.0
|
| 224 |
+
elif self.sway_env > 1.0:
|
| 225 |
+
self.sway_env = 1.0
|
| 226 |
+
|
| 227 |
+
loud = _loudness_gain(db) * SWAY_MASTER
|
| 228 |
+
env = self.sway_env
|
| 229 |
+
self.t += HOP_MS / 1000.0
|
| 230 |
+
|
| 231 |
+
# oscillators
|
| 232 |
+
pitch = (
|
| 233 |
+
math.radians(SWAY_A_PITCH_DEG)
|
| 234 |
+
* loud
|
| 235 |
+
* env
|
| 236 |
+
* math.sin(2 * math.pi * SWAY_F_PITCH * self.t + self.phase_pitch)
|
| 237 |
+
)
|
| 238 |
+
yaw = (
|
| 239 |
+
math.radians(SWAY_A_YAW_DEG)
|
| 240 |
+
* loud
|
| 241 |
+
* env
|
| 242 |
+
* math.sin(2 * math.pi * SWAY_F_YAW * self.t + self.phase_yaw)
|
| 243 |
+
)
|
| 244 |
+
roll = (
|
| 245 |
+
math.radians(SWAY_A_ROLL_DEG)
|
| 246 |
+
* loud
|
| 247 |
+
* env
|
| 248 |
+
* math.sin(2 * math.pi * SWAY_F_ROLL * self.t + self.phase_roll)
|
| 249 |
+
)
|
| 250 |
+
x_mm = SWAY_A_X_MM * loud * env * math.sin(2 * math.pi * SWAY_F_X * self.t + self.phase_x)
|
| 251 |
+
y_mm = SWAY_A_Y_MM * loud * env * math.sin(2 * math.pi * SWAY_F_Y * self.t + self.phase_y)
|
| 252 |
+
z_mm = SWAY_A_Z_MM * loud * env * math.sin(2 * math.pi * SWAY_F_Z * self.t + self.phase_z)
|
| 253 |
+
|
| 254 |
+
out.append(
|
| 255 |
+
{
|
| 256 |
+
"pitch_rad": pitch,
|
| 257 |
+
"yaw_rad": yaw,
|
| 258 |
+
"roll_rad": roll,
|
| 259 |
+
"pitch_deg": math.degrees(pitch),
|
| 260 |
+
"yaw_deg": math.degrees(yaw),
|
| 261 |
+
"roll_deg": math.degrees(roll),
|
| 262 |
+
"x_mm": x_mm,
|
| 263 |
+
"y_mm": y_mm,
|
| 264 |
+
"z_mm": z_mm,
|
| 265 |
+
},
|
| 266 |
+
)
|
| 267 |
+
|
| 268 |
+
return out
|
src/reachy_mini_conversation_app/camera_worker.py
ADDED
|
@@ -0,0 +1,241 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Camera worker thread with frame buffering and face tracking.
|
| 2 |
+
|
| 3 |
+
Ported from main_works.py camera_worker() function to provide:
|
| 4 |
+
- 30Hz+ camera polling with thread-safe frame buffering
|
| 5 |
+
- Face tracking integration with smooth interpolation
|
| 6 |
+
- Latest frame always available for tools
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import time
|
| 10 |
+
import logging
|
| 11 |
+
import threading
|
| 12 |
+
from typing import Any, List, Tuple
|
| 13 |
+
|
| 14 |
+
import numpy as np
|
| 15 |
+
from numpy.typing import NDArray
|
| 16 |
+
from scipy.spatial.transform import Rotation as R
|
| 17 |
+
|
| 18 |
+
from reachy_mini import ReachyMini
|
| 19 |
+
from reachy_mini.utils.interpolation import linear_pose_interpolation
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
logger = logging.getLogger(__name__)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class CameraWorker:
|
| 26 |
+
"""Thread-safe camera worker with frame buffering and face tracking."""
|
| 27 |
+
|
| 28 |
+
def __init__(self, reachy_mini: ReachyMini, head_tracker: Any = None) -> None:
|
| 29 |
+
"""Initialize."""
|
| 30 |
+
self.reachy_mini = reachy_mini
|
| 31 |
+
self.head_tracker = head_tracker
|
| 32 |
+
|
| 33 |
+
# Thread-safe frame storage
|
| 34 |
+
self.latest_frame: NDArray[np.uint8] | None = None
|
| 35 |
+
self.frame_lock = threading.Lock()
|
| 36 |
+
self._stop_event = threading.Event()
|
| 37 |
+
self._thread: threading.Thread | None = None
|
| 38 |
+
|
| 39 |
+
# Face tracking state
|
| 40 |
+
self.is_head_tracking_enabled = True
|
| 41 |
+
self.face_tracking_offsets: List[float] = [
|
| 42 |
+
0.0,
|
| 43 |
+
0.0,
|
| 44 |
+
0.0,
|
| 45 |
+
0.0,
|
| 46 |
+
0.0,
|
| 47 |
+
0.0,
|
| 48 |
+
] # x, y, z, roll, pitch, yaw
|
| 49 |
+
self.face_tracking_lock = threading.Lock()
|
| 50 |
+
|
| 51 |
+
# Face tracking timing variables (same as main_works.py)
|
| 52 |
+
self.last_face_detected_time: float | None = None
|
| 53 |
+
self.interpolation_start_time: float | None = None
|
| 54 |
+
self.interpolation_start_pose: NDArray[np.float32] | None = None
|
| 55 |
+
self.face_lost_delay = 2.0 # seconds to wait before starting interpolation
|
| 56 |
+
self.interpolation_duration = 1.0 # seconds to interpolate back to neutral
|
| 57 |
+
|
| 58 |
+
# Track state changes
|
| 59 |
+
self.previous_head_tracking_state = self.is_head_tracking_enabled
|
| 60 |
+
|
| 61 |
+
def get_latest_frame(self) -> NDArray[np.uint8] | None:
|
| 62 |
+
"""Get the latest frame (thread-safe)."""
|
| 63 |
+
with self.frame_lock:
|
| 64 |
+
if self.latest_frame is None:
|
| 65 |
+
return None
|
| 66 |
+
# Return a copy in original BGR format (OpenCV native)
|
| 67 |
+
return self.latest_frame.copy()
|
| 68 |
+
|
| 69 |
+
def get_face_tracking_offsets(
|
| 70 |
+
self,
|
| 71 |
+
) -> Tuple[float, float, float, float, float, float]:
|
| 72 |
+
"""Get current face tracking offsets (thread-safe)."""
|
| 73 |
+
with self.face_tracking_lock:
|
| 74 |
+
offsets = self.face_tracking_offsets
|
| 75 |
+
return (offsets[0], offsets[1], offsets[2], offsets[3], offsets[4], offsets[5])
|
| 76 |
+
|
| 77 |
+
def set_head_tracking_enabled(self, enabled: bool) -> None:
|
| 78 |
+
"""Enable/disable head tracking."""
|
| 79 |
+
self.is_head_tracking_enabled = enabled
|
| 80 |
+
logger.info(f"Head tracking {'enabled' if enabled else 'disabled'}")
|
| 81 |
+
|
| 82 |
+
def start(self) -> None:
|
| 83 |
+
"""Start the camera worker loop in a thread."""
|
| 84 |
+
self._stop_event.clear()
|
| 85 |
+
self._thread = threading.Thread(target=self.working_loop, daemon=True)
|
| 86 |
+
self._thread.start()
|
| 87 |
+
logger.debug("Camera worker started")
|
| 88 |
+
|
| 89 |
+
def stop(self) -> None:
|
| 90 |
+
"""Stop the camera worker loop."""
|
| 91 |
+
self._stop_event.set()
|
| 92 |
+
if self._thread is not None:
|
| 93 |
+
self._thread.join()
|
| 94 |
+
|
| 95 |
+
logger.debug("Camera worker stopped")
|
| 96 |
+
|
| 97 |
+
def working_loop(self) -> None:
|
| 98 |
+
"""Enable the camera worker loop.
|
| 99 |
+
|
| 100 |
+
Ported from main_works.py camera_worker() with same logic.
|
| 101 |
+
"""
|
| 102 |
+
logger.debug("Starting camera working loop")
|
| 103 |
+
|
| 104 |
+
# Initialize head tracker if available
|
| 105 |
+
neutral_pose = np.eye(4) # Neutral pose (identity matrix)
|
| 106 |
+
self.previous_head_tracking_state = self.is_head_tracking_enabled
|
| 107 |
+
|
| 108 |
+
while not self._stop_event.is_set():
|
| 109 |
+
try:
|
| 110 |
+
current_time = time.time()
|
| 111 |
+
|
| 112 |
+
# Get frame from robot
|
| 113 |
+
frame = self.reachy_mini.media.get_frame()
|
| 114 |
+
|
| 115 |
+
if frame is not None:
|
| 116 |
+
# Thread-safe frame storage
|
| 117 |
+
with self.frame_lock:
|
| 118 |
+
self.latest_frame = frame # .copy()
|
| 119 |
+
|
| 120 |
+
# Check if face tracking was just disabled
|
| 121 |
+
if self.previous_head_tracking_state and not self.is_head_tracking_enabled:
|
| 122 |
+
# Face tracking was just disabled - start interpolation to neutral
|
| 123 |
+
self.last_face_detected_time = current_time # Trigger the face-lost logic
|
| 124 |
+
self.interpolation_start_time = None # Will be set by the face-lost interpolation
|
| 125 |
+
self.interpolation_start_pose = None
|
| 126 |
+
|
| 127 |
+
# Update tracking state
|
| 128 |
+
self.previous_head_tracking_state = self.is_head_tracking_enabled
|
| 129 |
+
|
| 130 |
+
# Handle face tracking if enabled and head tracker available
|
| 131 |
+
if self.is_head_tracking_enabled and self.head_tracker is not None:
|
| 132 |
+
eye_center, _ = self.head_tracker.get_head_position(frame)
|
| 133 |
+
|
| 134 |
+
if eye_center is not None:
|
| 135 |
+
# Face detected - immediately switch to tracking
|
| 136 |
+
self.last_face_detected_time = current_time
|
| 137 |
+
self.interpolation_start_time = None # Stop any interpolation
|
| 138 |
+
|
| 139 |
+
# Convert normalized coordinates to pixel coordinates
|
| 140 |
+
h, w, _ = frame.shape
|
| 141 |
+
eye_center_norm = (eye_center + 1) / 2
|
| 142 |
+
eye_center_pixels = [
|
| 143 |
+
eye_center_norm[0] * w,
|
| 144 |
+
eye_center_norm[1] * h,
|
| 145 |
+
]
|
| 146 |
+
|
| 147 |
+
# Get the head pose needed to look at the target, but don't perform movement
|
| 148 |
+
target_pose = self.reachy_mini.look_at_image(
|
| 149 |
+
eye_center_pixels[0],
|
| 150 |
+
eye_center_pixels[1],
|
| 151 |
+
duration=0.0,
|
| 152 |
+
perform_movement=False,
|
| 153 |
+
)
|
| 154 |
+
|
| 155 |
+
# Extract translation and rotation from the target pose directly
|
| 156 |
+
translation = target_pose[:3, 3]
|
| 157 |
+
rotation = R.from_matrix(target_pose[:3, :3]).as_euler("xyz", degrees=False)
|
| 158 |
+
|
| 159 |
+
# Scale down translation and rotation because smaller FOV
|
| 160 |
+
translation *= 0.6
|
| 161 |
+
rotation *= 0.6
|
| 162 |
+
|
| 163 |
+
# Thread-safe update of face tracking offsets (use pose as-is)
|
| 164 |
+
with self.face_tracking_lock:
|
| 165 |
+
self.face_tracking_offsets = [
|
| 166 |
+
translation[0],
|
| 167 |
+
translation[1],
|
| 168 |
+
translation[2], # x, y, z
|
| 169 |
+
rotation[0],
|
| 170 |
+
rotation[1],
|
| 171 |
+
rotation[2], # roll, pitch, yaw
|
| 172 |
+
]
|
| 173 |
+
|
| 174 |
+
# No face detected while tracking enabled - set face lost timestamp
|
| 175 |
+
elif self.last_face_detected_time is None or self.last_face_detected_time == current_time:
|
| 176 |
+
# Only update if we haven't already set a face lost time
|
| 177 |
+
# (current_time check prevents overriding the disable-triggered timestamp)
|
| 178 |
+
pass
|
| 179 |
+
|
| 180 |
+
# Handle smooth interpolation (works for both face-lost and tracking-disabled cases)
|
| 181 |
+
if self.last_face_detected_time is not None:
|
| 182 |
+
time_since_face_lost = current_time - self.last_face_detected_time
|
| 183 |
+
|
| 184 |
+
if time_since_face_lost >= self.face_lost_delay:
|
| 185 |
+
# Start interpolation if not already started
|
| 186 |
+
if self.interpolation_start_time is None:
|
| 187 |
+
self.interpolation_start_time = current_time
|
| 188 |
+
# Capture current pose as start of interpolation
|
| 189 |
+
with self.face_tracking_lock:
|
| 190 |
+
current_translation = self.face_tracking_offsets[:3]
|
| 191 |
+
current_rotation_euler = self.face_tracking_offsets[3:]
|
| 192 |
+
# Convert to 4x4 pose matrix
|
| 193 |
+
pose_matrix = np.eye(4, dtype=np.float32)
|
| 194 |
+
pose_matrix[:3, 3] = current_translation
|
| 195 |
+
pose_matrix[:3, :3] = R.from_euler(
|
| 196 |
+
"xyz",
|
| 197 |
+
current_rotation_euler,
|
| 198 |
+
).as_matrix()
|
| 199 |
+
self.interpolation_start_pose = pose_matrix
|
| 200 |
+
|
| 201 |
+
# Calculate interpolation progress (t from 0 to 1)
|
| 202 |
+
elapsed_interpolation = current_time - self.interpolation_start_time
|
| 203 |
+
t = min(1.0, elapsed_interpolation / self.interpolation_duration)
|
| 204 |
+
|
| 205 |
+
# Interpolate between current pose and neutral pose
|
| 206 |
+
interpolated_pose = linear_pose_interpolation(
|
| 207 |
+
self.interpolation_start_pose,
|
| 208 |
+
neutral_pose,
|
| 209 |
+
t,
|
| 210 |
+
)
|
| 211 |
+
|
| 212 |
+
# Extract translation and rotation from interpolated pose
|
| 213 |
+
translation = interpolated_pose[:3, 3]
|
| 214 |
+
rotation = R.from_matrix(interpolated_pose[:3, :3]).as_euler("xyz", degrees=False)
|
| 215 |
+
|
| 216 |
+
# Thread-safe update of face tracking offsets
|
| 217 |
+
with self.face_tracking_lock:
|
| 218 |
+
self.face_tracking_offsets = [
|
| 219 |
+
translation[0],
|
| 220 |
+
translation[1],
|
| 221 |
+
translation[2], # x, y, z
|
| 222 |
+
rotation[0],
|
| 223 |
+
rotation[1],
|
| 224 |
+
rotation[2], # roll, pitch, yaw
|
| 225 |
+
]
|
| 226 |
+
|
| 227 |
+
# If interpolation is complete, reset timing
|
| 228 |
+
if t >= 1.0:
|
| 229 |
+
self.last_face_detected_time = None
|
| 230 |
+
self.interpolation_start_time = None
|
| 231 |
+
self.interpolation_start_pose = None
|
| 232 |
+
# else: Keep current offsets (within 2s delay period)
|
| 233 |
+
|
| 234 |
+
# Small sleep to prevent excessive CPU usage (same as main_works.py)
|
| 235 |
+
time.sleep(0.04)
|
| 236 |
+
|
| 237 |
+
except Exception as e:
|
| 238 |
+
logger.error(f"Camera worker error: {e}")
|
| 239 |
+
time.sleep(0.1) # Longer sleep on error
|
| 240 |
+
|
| 241 |
+
logger.debug("Camera worker thread exited")
|
src/reachy_mini_conversation_app/config.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import logging
|
| 3 |
+
|
| 4 |
+
from dotenv import find_dotenv, load_dotenv
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
logger = logging.getLogger(__name__)
|
| 8 |
+
|
| 9 |
+
# Locate .env file (search upward from current working directory)
|
| 10 |
+
dotenv_path = find_dotenv(usecwd=True)
|
| 11 |
+
|
| 12 |
+
if dotenv_path:
|
| 13 |
+
# Load .env and override environment variables
|
| 14 |
+
load_dotenv(dotenv_path=dotenv_path, override=True)
|
| 15 |
+
logger.info(f"Configuration loaded from {dotenv_path}")
|
| 16 |
+
else:
|
| 17 |
+
logger.warning("No .env file found, using environment variables")
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class Config:
|
| 21 |
+
"""Configuration class for the conversation app."""
|
| 22 |
+
|
| 23 |
+
# Required
|
| 24 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") # The key is downloaded in console.py if needed
|
| 25 |
+
|
| 26 |
+
# Optional
|
| 27 |
+
MODEL_NAME = os.getenv("MODEL_NAME", "gpt-realtime")
|
| 28 |
+
HF_HOME = os.getenv("HF_HOME", "./cache")
|
| 29 |
+
LOCAL_VISION_MODEL = os.getenv("LOCAL_VISION_MODEL", "HuggingFaceTB/SmolVLM2-2.2B-Instruct")
|
| 30 |
+
HF_TOKEN = os.getenv("HF_TOKEN") # Optional, falls back to hf auth login if not set
|
| 31 |
+
|
| 32 |
+
logger.debug(f"Model: {MODEL_NAME}, HF_HOME: {HF_HOME}, Vision Model: {LOCAL_VISION_MODEL}")
|
| 33 |
+
|
| 34 |
+
REACHY_MINI_CUSTOM_PROFILE = os.getenv("REACHY_MINI_CUSTOM_PROFILE")
|
| 35 |
+
logger.debug(f"Custom Profile: {REACHY_MINI_CUSTOM_PROFILE}")
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
config = Config()
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def set_custom_profile(profile: str | None) -> None:
|
| 42 |
+
"""Update the selected custom profile at runtime and expose it via env.
|
| 43 |
+
|
| 44 |
+
This ensures modules that read `config` and code that inspects the
|
| 45 |
+
environment see a consistent value.
|
| 46 |
+
"""
|
| 47 |
+
try:
|
| 48 |
+
config.REACHY_MINI_CUSTOM_PROFILE = profile
|
| 49 |
+
except Exception:
|
| 50 |
+
pass
|
| 51 |
+
try:
|
| 52 |
+
import os as _os
|
| 53 |
+
|
| 54 |
+
if profile:
|
| 55 |
+
_os.environ["REACHY_MINI_CUSTOM_PROFILE"] = profile
|
| 56 |
+
else:
|
| 57 |
+
# Remove to reflect default
|
| 58 |
+
_os.environ.pop("REACHY_MINI_CUSTOM_PROFILE", None)
|
| 59 |
+
except Exception:
|
| 60 |
+
pass
|
src/reachy_mini_conversation_app/console.py
ADDED
|
@@ -0,0 +1,499 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Bidirectional local audio stream with optional settings UI.
|
| 2 |
+
|
| 3 |
+
In headless mode, there is no Gradio UI. If the OpenAI API key is not
|
| 4 |
+
available via environment/.env, we expose a minimal settings page via the
|
| 5 |
+
Reachy Mini Apps settings server to let non-technical users enter it.
|
| 6 |
+
|
| 7 |
+
The settings UI is served from this package's ``static/`` folder and offers a
|
| 8 |
+
single password field to set ``OPENAI_API_KEY``. Once set, we persist it to the
|
| 9 |
+
app instance's ``.env`` file (if available) and proceed to start streaming.
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import os
|
| 13 |
+
import sys
|
| 14 |
+
import time
|
| 15 |
+
import asyncio
|
| 16 |
+
import logging
|
| 17 |
+
from typing import List, Optional
|
| 18 |
+
from pathlib import Path
|
| 19 |
+
|
| 20 |
+
from fastrtc import AdditionalOutputs, audio_to_float32
|
| 21 |
+
from scipy.signal import resample
|
| 22 |
+
|
| 23 |
+
from reachy_mini import ReachyMini
|
| 24 |
+
from reachy_mini.media.media_manager import MediaBackend
|
| 25 |
+
from reachy_mini_conversation_app.config import config
|
| 26 |
+
from reachy_mini_conversation_app.openai_realtime import OpenaiRealtimeHandler
|
| 27 |
+
from reachy_mini_conversation_app.headless_personality_ui import mount_personality_routes
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
try:
|
| 31 |
+
# FastAPI is provided by the Reachy Mini Apps runtime
|
| 32 |
+
from fastapi import FastAPI, Response
|
| 33 |
+
from pydantic import BaseModel
|
| 34 |
+
from fastapi.responses import FileResponse, JSONResponse
|
| 35 |
+
from starlette.staticfiles import StaticFiles
|
| 36 |
+
except Exception: # pragma: no cover - only loaded when settings_app is used
|
| 37 |
+
FastAPI = object # type: ignore
|
| 38 |
+
FileResponse = object # type: ignore
|
| 39 |
+
JSONResponse = object # type: ignore
|
| 40 |
+
StaticFiles = object # type: ignore
|
| 41 |
+
BaseModel = object # type: ignore
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
logger = logging.getLogger(__name__)
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
class LocalStream:
|
| 48 |
+
"""LocalStream using Reachy Mini's recorder/player."""
|
| 49 |
+
|
| 50 |
+
def __init__(
|
| 51 |
+
self,
|
| 52 |
+
handler: OpenaiRealtimeHandler,
|
| 53 |
+
robot: ReachyMini,
|
| 54 |
+
*,
|
| 55 |
+
settings_app: Optional[FastAPI] = None,
|
| 56 |
+
instance_path: Optional[str] = None,
|
| 57 |
+
):
|
| 58 |
+
"""Initialize the stream with an OpenAI realtime handler and pipelines.
|
| 59 |
+
|
| 60 |
+
- ``settings_app``: the Reachy Mini Apps FastAPI to attach settings endpoints.
|
| 61 |
+
- ``instance_path``: directory where per-instance ``.env`` should be stored.
|
| 62 |
+
"""
|
| 63 |
+
self.handler = handler
|
| 64 |
+
self._robot = robot
|
| 65 |
+
self._stop_event = asyncio.Event()
|
| 66 |
+
self._tasks: List[asyncio.Task[None]] = []
|
| 67 |
+
# Allow the handler to flush the player queue when appropriate.
|
| 68 |
+
self.handler._clear_queue = self.clear_audio_queue
|
| 69 |
+
self._settings_app: Optional[FastAPI] = settings_app
|
| 70 |
+
self._instance_path: Optional[str] = instance_path
|
| 71 |
+
self._settings_initialized = False
|
| 72 |
+
self._asyncio_loop = None
|
| 73 |
+
|
| 74 |
+
# ---- Settings UI (only when API key is missing) ----
|
| 75 |
+
def _read_env_lines(self, env_path: Path) -> list[str]:
|
| 76 |
+
"""Load env file contents or a template as a list of lines."""
|
| 77 |
+
inst = env_path.parent
|
| 78 |
+
try:
|
| 79 |
+
if env_path.exists():
|
| 80 |
+
try:
|
| 81 |
+
return env_path.read_text(encoding="utf-8").splitlines()
|
| 82 |
+
except Exception:
|
| 83 |
+
return []
|
| 84 |
+
template_text = None
|
| 85 |
+
ex = inst / ".env.example"
|
| 86 |
+
if ex.exists():
|
| 87 |
+
try:
|
| 88 |
+
template_text = ex.read_text(encoding="utf-8")
|
| 89 |
+
except Exception:
|
| 90 |
+
template_text = None
|
| 91 |
+
if template_text is None:
|
| 92 |
+
try:
|
| 93 |
+
cwd_example = Path.cwd() / ".env.example"
|
| 94 |
+
if cwd_example.exists():
|
| 95 |
+
template_text = cwd_example.read_text(encoding="utf-8")
|
| 96 |
+
except Exception:
|
| 97 |
+
template_text = None
|
| 98 |
+
if template_text is None:
|
| 99 |
+
packaged = Path(__file__).parent / ".env.example"
|
| 100 |
+
if packaged.exists():
|
| 101 |
+
try:
|
| 102 |
+
template_text = packaged.read_text(encoding="utf-8")
|
| 103 |
+
except Exception:
|
| 104 |
+
template_text = None
|
| 105 |
+
return template_text.splitlines() if template_text else []
|
| 106 |
+
except Exception:
|
| 107 |
+
return []
|
| 108 |
+
|
| 109 |
+
def _persist_api_key(self, key: str) -> None:
|
| 110 |
+
"""Persist API key to environment and instance ``.env`` if possible.
|
| 111 |
+
|
| 112 |
+
Behavior:
|
| 113 |
+
- Always sets ``OPENAI_API_KEY`` in process env and in-memory config.
|
| 114 |
+
- Writes/updates ``<instance_path>/.env``:
|
| 115 |
+
* If ``.env`` exists, replaces/append OPENAI_API_KEY line.
|
| 116 |
+
* Else, copies template from ``<instance_path>/.env.example`` when present,
|
| 117 |
+
otherwise falls back to the packaged template
|
| 118 |
+
``reachy_mini_conversation_app/.env.example``.
|
| 119 |
+
* Ensures the resulting file contains the full template plus the key.
|
| 120 |
+
- Loads the written ``.env`` into the current process environment.
|
| 121 |
+
"""
|
| 122 |
+
k = (key or "").strip()
|
| 123 |
+
if not k:
|
| 124 |
+
return
|
| 125 |
+
# Update live process env and config so consumers see it immediately
|
| 126 |
+
try:
|
| 127 |
+
os.environ["OPENAI_API_KEY"] = k
|
| 128 |
+
except Exception: # best-effort
|
| 129 |
+
pass
|
| 130 |
+
try:
|
| 131 |
+
config.OPENAI_API_KEY = k
|
| 132 |
+
except Exception:
|
| 133 |
+
pass
|
| 134 |
+
|
| 135 |
+
if not self._instance_path:
|
| 136 |
+
return
|
| 137 |
+
try:
|
| 138 |
+
inst = Path(self._instance_path)
|
| 139 |
+
env_path = inst / ".env"
|
| 140 |
+
lines = self._read_env_lines(env_path)
|
| 141 |
+
replaced = False
|
| 142 |
+
for i, ln in enumerate(lines):
|
| 143 |
+
if ln.strip().startswith("OPENAI_API_KEY="):
|
| 144 |
+
lines[i] = f"OPENAI_API_KEY={k}"
|
| 145 |
+
replaced = True
|
| 146 |
+
break
|
| 147 |
+
if not replaced:
|
| 148 |
+
lines.append(f"OPENAI_API_KEY={k}")
|
| 149 |
+
final_text = "\n".join(lines) + "\n"
|
| 150 |
+
env_path.write_text(final_text, encoding="utf-8")
|
| 151 |
+
logger.info("Persisted OPENAI_API_KEY to %s", env_path)
|
| 152 |
+
|
| 153 |
+
# Load the newly written .env into this process to ensure downstream imports see it
|
| 154 |
+
try:
|
| 155 |
+
from dotenv import load_dotenv
|
| 156 |
+
|
| 157 |
+
load_dotenv(dotenv_path=str(env_path), override=True)
|
| 158 |
+
except Exception:
|
| 159 |
+
pass
|
| 160 |
+
except Exception as e:
|
| 161 |
+
logger.warning("Failed to persist OPENAI_API_KEY: %s", e)
|
| 162 |
+
|
| 163 |
+
def _persist_personality(self, profile: Optional[str]) -> None:
|
| 164 |
+
"""Persist the startup personality to the instance .env and config."""
|
| 165 |
+
selection = (profile or "").strip() or None
|
| 166 |
+
try:
|
| 167 |
+
from reachy_mini_conversation_app.config import set_custom_profile
|
| 168 |
+
|
| 169 |
+
set_custom_profile(selection)
|
| 170 |
+
except Exception:
|
| 171 |
+
pass
|
| 172 |
+
|
| 173 |
+
if not self._instance_path:
|
| 174 |
+
return
|
| 175 |
+
try:
|
| 176 |
+
env_path = Path(self._instance_path) / ".env"
|
| 177 |
+
lines = self._read_env_lines(env_path)
|
| 178 |
+
replaced = False
|
| 179 |
+
for i, ln in enumerate(list(lines)):
|
| 180 |
+
if ln.strip().startswith("REACHY_MINI_CUSTOM_PROFILE="):
|
| 181 |
+
if selection:
|
| 182 |
+
lines[i] = f"REACHY_MINI_CUSTOM_PROFILE={selection}"
|
| 183 |
+
else:
|
| 184 |
+
lines.pop(i)
|
| 185 |
+
replaced = True
|
| 186 |
+
break
|
| 187 |
+
if selection and not replaced:
|
| 188 |
+
lines.append(f"REACHY_MINI_CUSTOM_PROFILE={selection}")
|
| 189 |
+
if selection is None and not env_path.exists():
|
| 190 |
+
return
|
| 191 |
+
final_text = "\n".join(lines) + "\n"
|
| 192 |
+
env_path.write_text(final_text, encoding="utf-8")
|
| 193 |
+
logger.info("Persisted startup personality to %s", env_path)
|
| 194 |
+
try:
|
| 195 |
+
from dotenv import load_dotenv
|
| 196 |
+
|
| 197 |
+
load_dotenv(dotenv_path=str(env_path), override=True)
|
| 198 |
+
except Exception:
|
| 199 |
+
pass
|
| 200 |
+
except Exception as e:
|
| 201 |
+
logger.warning("Failed to persist REACHY_MINI_CUSTOM_PROFILE: %s", e)
|
| 202 |
+
|
| 203 |
+
def _read_persisted_personality(self) -> Optional[str]:
|
| 204 |
+
"""Read persisted startup personality from instance .env (if any)."""
|
| 205 |
+
if not self._instance_path:
|
| 206 |
+
return None
|
| 207 |
+
env_path = Path(self._instance_path) / ".env"
|
| 208 |
+
try:
|
| 209 |
+
if env_path.exists():
|
| 210 |
+
for ln in env_path.read_text(encoding="utf-8").splitlines():
|
| 211 |
+
if ln.strip().startswith("REACHY_MINI_CUSTOM_PROFILE="):
|
| 212 |
+
_, _, val = ln.partition("=")
|
| 213 |
+
v = val.strip()
|
| 214 |
+
return v or None
|
| 215 |
+
except Exception:
|
| 216 |
+
pass
|
| 217 |
+
return None
|
| 218 |
+
|
| 219 |
+
def _init_settings_ui_if_needed(self) -> None:
|
| 220 |
+
"""Attach minimal settings UI to the settings app.
|
| 221 |
+
|
| 222 |
+
Always mounts the UI when a settings_app is provided so that users
|
| 223 |
+
see a confirmation message even if the API key is already configured.
|
| 224 |
+
"""
|
| 225 |
+
if self._settings_initialized:
|
| 226 |
+
return
|
| 227 |
+
if self._settings_app is None:
|
| 228 |
+
return
|
| 229 |
+
|
| 230 |
+
static_dir = Path(__file__).parent / "static"
|
| 231 |
+
index_file = static_dir / "index.html"
|
| 232 |
+
|
| 233 |
+
if hasattr(self._settings_app, "mount"):
|
| 234 |
+
try:
|
| 235 |
+
# Serve /static/* assets
|
| 236 |
+
self._settings_app.mount("/static", StaticFiles(directory=str(static_dir)), name="static")
|
| 237 |
+
except Exception:
|
| 238 |
+
pass
|
| 239 |
+
|
| 240 |
+
class ApiKeyPayload(BaseModel):
|
| 241 |
+
openai_api_key: str
|
| 242 |
+
|
| 243 |
+
# GET / -> index.html
|
| 244 |
+
@self._settings_app.get("/")
|
| 245 |
+
def _root() -> FileResponse:
|
| 246 |
+
return FileResponse(str(index_file))
|
| 247 |
+
|
| 248 |
+
# GET /favicon.ico -> optional, avoid noisy 404s on some browsers
|
| 249 |
+
@self._settings_app.get("/favicon.ico")
|
| 250 |
+
def _favicon() -> Response:
|
| 251 |
+
return Response(status_code=204)
|
| 252 |
+
|
| 253 |
+
# GET /status -> whether key is set
|
| 254 |
+
@self._settings_app.get("/status")
|
| 255 |
+
def _status() -> JSONResponse:
|
| 256 |
+
has_key = bool(config.OPENAI_API_KEY and str(config.OPENAI_API_KEY).strip())
|
| 257 |
+
return JSONResponse({"has_key": has_key})
|
| 258 |
+
|
| 259 |
+
# GET /ready -> whether backend finished loading tools
|
| 260 |
+
@self._settings_app.get("/ready")
|
| 261 |
+
def _ready() -> JSONResponse:
|
| 262 |
+
try:
|
| 263 |
+
mod = sys.modules.get("reachy_mini_conversation_app.tools.core_tools")
|
| 264 |
+
ready = bool(getattr(mod, "_TOOLS_INITIALIZED", False)) if mod else False
|
| 265 |
+
except Exception:
|
| 266 |
+
ready = False
|
| 267 |
+
return JSONResponse({"ready": ready})
|
| 268 |
+
|
| 269 |
+
# POST /openai_api_key -> set/persist key
|
| 270 |
+
@self._settings_app.post("/openai_api_key")
|
| 271 |
+
def _set_key(payload: ApiKeyPayload) -> JSONResponse:
|
| 272 |
+
key = (payload.openai_api_key or "").strip()
|
| 273 |
+
if not key:
|
| 274 |
+
return JSONResponse({"ok": False, "error": "empty_key"}, status_code=400)
|
| 275 |
+
self._persist_api_key(key)
|
| 276 |
+
return JSONResponse({"ok": True})
|
| 277 |
+
|
| 278 |
+
# POST /validate_api_key -> validate key without persisting it
|
| 279 |
+
@self._settings_app.post("/validate_api_key")
|
| 280 |
+
async def _validate_key(payload: ApiKeyPayload) -> JSONResponse:
|
| 281 |
+
key = (payload.openai_api_key or "").strip()
|
| 282 |
+
if not key:
|
| 283 |
+
return JSONResponse({"valid": False, "error": "empty_key"}, status_code=400)
|
| 284 |
+
|
| 285 |
+
# Try to validate by checking if we can fetch the models
|
| 286 |
+
try:
|
| 287 |
+
import httpx
|
| 288 |
+
|
| 289 |
+
headers = {"Authorization": f"Bearer {key}", "Content-Type": "application/json"}
|
| 290 |
+
async with httpx.AsyncClient(timeout=10.0) as client:
|
| 291 |
+
response = await client.get("https://api.openai.com/v1/models", headers=headers)
|
| 292 |
+
if response.status_code == 200:
|
| 293 |
+
return JSONResponse({"valid": True})
|
| 294 |
+
elif response.status_code == 401:
|
| 295 |
+
return JSONResponse({"valid": False, "error": "invalid_api_key"}, status_code=401)
|
| 296 |
+
else:
|
| 297 |
+
return JSONResponse(
|
| 298 |
+
{"valid": False, "error": "validation_failed"}, status_code=response.status_code
|
| 299 |
+
)
|
| 300 |
+
except Exception as e:
|
| 301 |
+
logger.warning(f"API key validation failed: {e}")
|
| 302 |
+
return JSONResponse({"valid": False, "error": "validation_error"}, status_code=500)
|
| 303 |
+
|
| 304 |
+
self._settings_initialized = True
|
| 305 |
+
|
| 306 |
+
def launch(self) -> None:
|
| 307 |
+
"""Start the recorder/player and run the async processing loops.
|
| 308 |
+
|
| 309 |
+
If the OpenAI key is missing, expose a tiny settings UI via the
|
| 310 |
+
Reachy Mini settings server to collect it before starting streams.
|
| 311 |
+
"""
|
| 312 |
+
self._stop_event.clear()
|
| 313 |
+
|
| 314 |
+
# Try to load an existing instance .env first (covers subsequent runs)
|
| 315 |
+
if self._instance_path:
|
| 316 |
+
try:
|
| 317 |
+
from dotenv import load_dotenv
|
| 318 |
+
|
| 319 |
+
from reachy_mini_conversation_app.config import set_custom_profile
|
| 320 |
+
|
| 321 |
+
env_path = Path(self._instance_path) / ".env"
|
| 322 |
+
if env_path.exists():
|
| 323 |
+
load_dotenv(dotenv_path=str(env_path), override=True)
|
| 324 |
+
# Update config with newly loaded values
|
| 325 |
+
new_key = os.getenv("OPENAI_API_KEY", "").strip()
|
| 326 |
+
if new_key:
|
| 327 |
+
try:
|
| 328 |
+
config.OPENAI_API_KEY = new_key
|
| 329 |
+
except Exception:
|
| 330 |
+
pass
|
| 331 |
+
new_profile = os.getenv("REACHY_MINI_CUSTOM_PROFILE")
|
| 332 |
+
if new_profile is not None:
|
| 333 |
+
try:
|
| 334 |
+
set_custom_profile(new_profile.strip() or None)
|
| 335 |
+
except Exception:
|
| 336 |
+
pass
|
| 337 |
+
except Exception:
|
| 338 |
+
pass
|
| 339 |
+
|
| 340 |
+
# If key is still missing, try to download one from HuggingFace
|
| 341 |
+
if not (config.OPENAI_API_KEY and str(config.OPENAI_API_KEY).strip()):
|
| 342 |
+
logger.info("OPENAI_API_KEY not set, attempting to download from HuggingFace...")
|
| 343 |
+
try:
|
| 344 |
+
from gradio_client import Client
|
| 345 |
+
client = Client("HuggingFaceM4/gradium_setup", verbose=False)
|
| 346 |
+
key, status = client.predict(api_name="/claim_b_key")
|
| 347 |
+
if key and key.strip():
|
| 348 |
+
logger.info("Successfully downloaded API key from HuggingFace")
|
| 349 |
+
# Persist it immediately
|
| 350 |
+
self._persist_api_key(key)
|
| 351 |
+
except Exception as e:
|
| 352 |
+
logger.warning(f"Failed to download API key from HuggingFace: {e}")
|
| 353 |
+
|
| 354 |
+
# Always expose settings UI if a settings app is available
|
| 355 |
+
# (do this AFTER loading/downloading the key so status endpoint sees the right value)
|
| 356 |
+
self._init_settings_ui_if_needed()
|
| 357 |
+
|
| 358 |
+
# If key is still missing -> wait until provided via the settings UI
|
| 359 |
+
if not (config.OPENAI_API_KEY and str(config.OPENAI_API_KEY).strip()):
|
| 360 |
+
logger.warning("OPENAI_API_KEY not found. Open the app settings page to enter it.")
|
| 361 |
+
# Poll until the key becomes available (set via the settings UI)
|
| 362 |
+
try:
|
| 363 |
+
while not (config.OPENAI_API_KEY and str(config.OPENAI_API_KEY).strip()):
|
| 364 |
+
time.sleep(0.2)
|
| 365 |
+
except KeyboardInterrupt:
|
| 366 |
+
logger.info("Interrupted while waiting for API key.")
|
| 367 |
+
return
|
| 368 |
+
|
| 369 |
+
# Start media after key is set/available
|
| 370 |
+
self._robot.media.start_recording()
|
| 371 |
+
self._robot.media.start_playing()
|
| 372 |
+
time.sleep(1) # give some time to the pipelines to start
|
| 373 |
+
|
| 374 |
+
async def runner() -> None:
|
| 375 |
+
# Capture loop for cross-thread personality actions
|
| 376 |
+
loop = asyncio.get_running_loop()
|
| 377 |
+
self._asyncio_loop = loop # type: ignore[assignment]
|
| 378 |
+
# Mount personality routes now that loop and handler are available
|
| 379 |
+
try:
|
| 380 |
+
if self._settings_app is not None:
|
| 381 |
+
mount_personality_routes(
|
| 382 |
+
self._settings_app,
|
| 383 |
+
self.handler,
|
| 384 |
+
lambda: self._asyncio_loop,
|
| 385 |
+
persist_personality=self._persist_personality,
|
| 386 |
+
get_persisted_personality=self._read_persisted_personality,
|
| 387 |
+
)
|
| 388 |
+
except Exception:
|
| 389 |
+
pass
|
| 390 |
+
self._tasks = [
|
| 391 |
+
asyncio.create_task(self.handler.start_up(), name="openai-handler"),
|
| 392 |
+
asyncio.create_task(self.record_loop(), name="stream-record-loop"),
|
| 393 |
+
asyncio.create_task(self.play_loop(), name="stream-play-loop"),
|
| 394 |
+
]
|
| 395 |
+
try:
|
| 396 |
+
await asyncio.gather(*self._tasks)
|
| 397 |
+
except asyncio.CancelledError:
|
| 398 |
+
logger.info("Tasks cancelled during shutdown")
|
| 399 |
+
finally:
|
| 400 |
+
# Ensure handler connection is closed
|
| 401 |
+
await self.handler.shutdown()
|
| 402 |
+
|
| 403 |
+
asyncio.run(runner())
|
| 404 |
+
|
| 405 |
+
def close(self) -> None:
|
| 406 |
+
"""Stop the stream and underlying media pipelines.
|
| 407 |
+
|
| 408 |
+
This method:
|
| 409 |
+
- Stops audio recording and playback first
|
| 410 |
+
- Sets the stop event to signal async loops to terminate
|
| 411 |
+
- Cancels all pending async tasks (openai-handler, record-loop, play-loop)
|
| 412 |
+
"""
|
| 413 |
+
logger.info("Stopping LocalStream...")
|
| 414 |
+
|
| 415 |
+
# Stop media pipelines FIRST before cancelling async tasks
|
| 416 |
+
# This ensures clean shutdown before PortAudio cleanup
|
| 417 |
+
try:
|
| 418 |
+
self._robot.media.stop_recording()
|
| 419 |
+
except Exception as e:
|
| 420 |
+
logger.debug(f"Error stopping recording (may already be stopped): {e}")
|
| 421 |
+
|
| 422 |
+
try:
|
| 423 |
+
self._robot.media.stop_playing()
|
| 424 |
+
except Exception as e:
|
| 425 |
+
logger.debug(f"Error stopping playback (may already be stopped): {e}")
|
| 426 |
+
|
| 427 |
+
# Now signal async loops to stop
|
| 428 |
+
self._stop_event.set()
|
| 429 |
+
|
| 430 |
+
# Cancel all running tasks
|
| 431 |
+
for task in self._tasks:
|
| 432 |
+
if not task.done():
|
| 433 |
+
task.cancel()
|
| 434 |
+
|
| 435 |
+
def clear_audio_queue(self) -> None:
|
| 436 |
+
"""Flush the player's appsrc to drop any queued audio immediately."""
|
| 437 |
+
logger.info("User intervention: flushing player queue")
|
| 438 |
+
if self._robot.media.backend == MediaBackend.GSTREAMER:
|
| 439 |
+
# Directly flush gstreamer audio pipe
|
| 440 |
+
self._robot.media.audio.clear_player()
|
| 441 |
+
elif self._robot.media.backend == MediaBackend.DEFAULT or self._robot.media.backend == MediaBackend.DEFAULT_NO_VIDEO:
|
| 442 |
+
self._robot.media.audio.clear_output_buffer()
|
| 443 |
+
self.handler.output_queue = asyncio.Queue()
|
| 444 |
+
|
| 445 |
+
async def record_loop(self) -> None:
|
| 446 |
+
"""Read mic frames from the recorder and forward them to the handler."""
|
| 447 |
+
input_sample_rate = self._robot.media.get_input_audio_samplerate()
|
| 448 |
+
logger.debug(f"Audio recording started at {input_sample_rate} Hz")
|
| 449 |
+
|
| 450 |
+
while not self._stop_event.is_set():
|
| 451 |
+
audio_frame = self._robot.media.get_audio_sample()
|
| 452 |
+
if audio_frame is not None:
|
| 453 |
+
await self.handler.receive((input_sample_rate, audio_frame))
|
| 454 |
+
await asyncio.sleep(0) # avoid busy loop
|
| 455 |
+
|
| 456 |
+
async def play_loop(self) -> None:
|
| 457 |
+
"""Fetch outputs from the handler: log text and play audio frames."""
|
| 458 |
+
while not self._stop_event.is_set():
|
| 459 |
+
handler_output = await self.handler.emit()
|
| 460 |
+
|
| 461 |
+
if isinstance(handler_output, AdditionalOutputs):
|
| 462 |
+
for msg in handler_output.args:
|
| 463 |
+
content = msg.get("content", "")
|
| 464 |
+
if isinstance(content, str):
|
| 465 |
+
logger.info(
|
| 466 |
+
"role=%s content=%s",
|
| 467 |
+
msg.get("role"),
|
| 468 |
+
content if len(content) < 500 else content[:500] + "…",
|
| 469 |
+
)
|
| 470 |
+
|
| 471 |
+
elif isinstance(handler_output, tuple):
|
| 472 |
+
input_sample_rate, audio_data = handler_output
|
| 473 |
+
output_sample_rate = self._robot.media.get_output_audio_samplerate()
|
| 474 |
+
|
| 475 |
+
# Reshape if needed
|
| 476 |
+
if audio_data.ndim == 2:
|
| 477 |
+
# Scipy channels last convention
|
| 478 |
+
if audio_data.shape[1] > audio_data.shape[0]:
|
| 479 |
+
audio_data = audio_data.T
|
| 480 |
+
# Multiple channels -> Mono channel
|
| 481 |
+
if audio_data.shape[1] > 1:
|
| 482 |
+
audio_data = audio_data[:, 0]
|
| 483 |
+
|
| 484 |
+
# Cast if needed
|
| 485 |
+
audio_frame = audio_to_float32(audio_data)
|
| 486 |
+
|
| 487 |
+
# Resample if needed
|
| 488 |
+
if input_sample_rate != output_sample_rate:
|
| 489 |
+
audio_frame = resample(
|
| 490 |
+
audio_frame,
|
| 491 |
+
int(len(audio_frame) * output_sample_rate / input_sample_rate),
|
| 492 |
+
)
|
| 493 |
+
|
| 494 |
+
self._robot.media.push_audio_sample(audio_frame)
|
| 495 |
+
|
| 496 |
+
else:
|
| 497 |
+
logger.debug("Ignoring output type=%s", type(handler_output).__name__)
|
| 498 |
+
|
| 499 |
+
await asyncio.sleep(0) # yield to event loop
|
src/reachy_mini_conversation_app/dance_emotion_moves.py
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Dance and emotion moves for the movement queue system.
|
| 2 |
+
|
| 3 |
+
This module implements dance moves and emotions as Move objects that can be queued
|
| 4 |
+
and executed sequentially by the MovementManager.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
import logging
|
| 9 |
+
from typing import Tuple
|
| 10 |
+
|
| 11 |
+
import numpy as np
|
| 12 |
+
from numpy.typing import NDArray
|
| 13 |
+
|
| 14 |
+
from reachy_mini.motion.move import Move
|
| 15 |
+
from reachy_mini.motion.recorded_move import RecordedMoves
|
| 16 |
+
from reachy_mini_dances_library.dance_move import DanceMove
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
logger = logging.getLogger(__name__)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class DanceQueueMove(Move): # type: ignore
|
| 23 |
+
"""Wrapper for dance moves to work with the movement queue system."""
|
| 24 |
+
|
| 25 |
+
def __init__(self, move_name: str):
|
| 26 |
+
"""Initialize a DanceQueueMove."""
|
| 27 |
+
self.dance_move = DanceMove(move_name)
|
| 28 |
+
self.move_name = move_name
|
| 29 |
+
|
| 30 |
+
@property
|
| 31 |
+
def duration(self) -> float:
|
| 32 |
+
"""Duration property required by official Move interface."""
|
| 33 |
+
return float(self.dance_move.duration)
|
| 34 |
+
|
| 35 |
+
def evaluate(self, t: float) -> tuple[NDArray[np.float64] | None, NDArray[np.float64] | None, float | None]:
|
| 36 |
+
"""Evaluate dance move at time t."""
|
| 37 |
+
try:
|
| 38 |
+
# Get the pose from the dance move
|
| 39 |
+
head_pose, antennas, body_yaw = self.dance_move.evaluate(t)
|
| 40 |
+
|
| 41 |
+
# Convert to numpy array if antennas is tuple and return in official Move format
|
| 42 |
+
if isinstance(antennas, tuple):
|
| 43 |
+
antennas = np.array([antennas[0], antennas[1]])
|
| 44 |
+
|
| 45 |
+
return (head_pose, antennas, body_yaw)
|
| 46 |
+
|
| 47 |
+
except Exception as e:
|
| 48 |
+
logger.error(f"Error evaluating dance move '{self.move_name}' at t={t}: {e}")
|
| 49 |
+
# Return neutral pose on error
|
| 50 |
+
from reachy_mini.utils import create_head_pose
|
| 51 |
+
|
| 52 |
+
neutral_head_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
|
| 53 |
+
return (neutral_head_pose, np.array([0.0, 0.0], dtype=np.float64), 0.0)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
class EmotionQueueMove(Move): # type: ignore
|
| 57 |
+
"""Wrapper for emotion moves to work with the movement queue system."""
|
| 58 |
+
|
| 59 |
+
def __init__(self, emotion_name: str, recorded_moves: RecordedMoves):
|
| 60 |
+
"""Initialize an EmotionQueueMove."""
|
| 61 |
+
self.emotion_move = recorded_moves.get(emotion_name)
|
| 62 |
+
self.emotion_name = emotion_name
|
| 63 |
+
|
| 64 |
+
@property
|
| 65 |
+
def duration(self) -> float:
|
| 66 |
+
"""Duration property required by official Move interface."""
|
| 67 |
+
return float(self.emotion_move.duration)
|
| 68 |
+
|
| 69 |
+
def evaluate(self, t: float) -> tuple[NDArray[np.float64] | None, NDArray[np.float64] | None, float | None]:
|
| 70 |
+
"""Evaluate emotion move at time t."""
|
| 71 |
+
try:
|
| 72 |
+
# Get the pose from the emotion move
|
| 73 |
+
head_pose, antennas, body_yaw = self.emotion_move.evaluate(t)
|
| 74 |
+
|
| 75 |
+
# Convert to numpy array if antennas is tuple and return in official Move format
|
| 76 |
+
if isinstance(antennas, tuple):
|
| 77 |
+
antennas = np.array([antennas[0], antennas[1]])
|
| 78 |
+
|
| 79 |
+
return (head_pose, antennas, body_yaw)
|
| 80 |
+
|
| 81 |
+
except Exception as e:
|
| 82 |
+
logger.error(f"Error evaluating emotion '{self.emotion_name}' at t={t}: {e}")
|
| 83 |
+
# Return neutral pose on error
|
| 84 |
+
from reachy_mini.utils import create_head_pose
|
| 85 |
+
|
| 86 |
+
neutral_head_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
|
| 87 |
+
return (neutral_head_pose, np.array([0.0, 0.0], dtype=np.float64), 0.0)
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
class GotoQueueMove(Move): # type: ignore
|
| 91 |
+
"""Wrapper for goto moves to work with the movement queue system."""
|
| 92 |
+
|
| 93 |
+
def __init__(
|
| 94 |
+
self,
|
| 95 |
+
target_head_pose: NDArray[np.float32],
|
| 96 |
+
start_head_pose: NDArray[np.float32] | None = None,
|
| 97 |
+
target_antennas: Tuple[float, float] = (0, 0),
|
| 98 |
+
start_antennas: Tuple[float, float] | None = None,
|
| 99 |
+
target_body_yaw: float = 0,
|
| 100 |
+
start_body_yaw: float | None = None,
|
| 101 |
+
duration: float = 1.0,
|
| 102 |
+
):
|
| 103 |
+
"""Initialize a GotoQueueMove."""
|
| 104 |
+
self._duration = duration
|
| 105 |
+
self.target_head_pose = target_head_pose
|
| 106 |
+
self.start_head_pose = start_head_pose
|
| 107 |
+
self.target_antennas = target_antennas
|
| 108 |
+
self.start_antennas = start_antennas or (0, 0)
|
| 109 |
+
self.target_body_yaw = target_body_yaw
|
| 110 |
+
self.start_body_yaw = start_body_yaw or 0
|
| 111 |
+
|
| 112 |
+
@property
|
| 113 |
+
def duration(self) -> float:
|
| 114 |
+
"""Duration property required by official Move interface."""
|
| 115 |
+
return self._duration
|
| 116 |
+
|
| 117 |
+
def evaluate(self, t: float) -> tuple[NDArray[np.float64] | None, NDArray[np.float64] | None, float | None]:
|
| 118 |
+
"""Evaluate goto move at time t using linear interpolation."""
|
| 119 |
+
try:
|
| 120 |
+
from reachy_mini.utils import create_head_pose
|
| 121 |
+
from reachy_mini.utils.interpolation import linear_pose_interpolation
|
| 122 |
+
|
| 123 |
+
# Clamp t to [0, 1] for interpolation
|
| 124 |
+
t_clamped = max(0, min(1, t / self.duration))
|
| 125 |
+
|
| 126 |
+
# Use start pose if available, otherwise neutral
|
| 127 |
+
if self.start_head_pose is not None:
|
| 128 |
+
start_pose = self.start_head_pose
|
| 129 |
+
else:
|
| 130 |
+
start_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
|
| 131 |
+
|
| 132 |
+
# Interpolate head pose
|
| 133 |
+
head_pose = linear_pose_interpolation(start_pose, self.target_head_pose, t_clamped)
|
| 134 |
+
|
| 135 |
+
# Interpolate antennas - return as numpy array
|
| 136 |
+
antennas = np.array(
|
| 137 |
+
[
|
| 138 |
+
self.start_antennas[0] + (self.target_antennas[0] - self.start_antennas[0]) * t_clamped,
|
| 139 |
+
self.start_antennas[1] + (self.target_antennas[1] - self.start_antennas[1]) * t_clamped,
|
| 140 |
+
],
|
| 141 |
+
dtype=np.float64,
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
# Interpolate body yaw
|
| 145 |
+
body_yaw = self.start_body_yaw + (self.target_body_yaw - self.start_body_yaw) * t_clamped
|
| 146 |
+
|
| 147 |
+
return (head_pose, antennas, body_yaw)
|
| 148 |
+
|
| 149 |
+
except Exception as e:
|
| 150 |
+
logger.error(f"Error evaluating goto move at t={t}: {e}")
|
| 151 |
+
# Return target pose on error - convert to float64
|
| 152 |
+
target_head_pose_f64 = self.target_head_pose.astype(np.float64)
|
| 153 |
+
target_antennas_array = np.array([self.target_antennas[0], self.target_antennas[1]], dtype=np.float64)
|
| 154 |
+
return (target_head_pose_f64, target_antennas_array, self.target_body_yaw)
|
src/reachy_mini_conversation_app/gradio_personality.py
ADDED
|
@@ -0,0 +1,301 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Gradio personality UI components and wiring.
|
| 2 |
+
|
| 3 |
+
This module encapsulates the UI elements and logic related to managing
|
| 4 |
+
conversation "personalities" (profiles) so that `main.py` stays lean.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
from typing import Any
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
|
| 11 |
+
import gradio as gr
|
| 12 |
+
|
| 13 |
+
from .config import config
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class PersonalityUI:
|
| 17 |
+
"""Container for personality-related Gradio components."""
|
| 18 |
+
|
| 19 |
+
def __init__(self) -> None:
|
| 20 |
+
"""Initialize the PersonalityUI instance."""
|
| 21 |
+
# Constants and paths
|
| 22 |
+
self.DEFAULT_OPTION = "(built-in default)"
|
| 23 |
+
self._profiles_root = Path(__file__).parent / "profiles"
|
| 24 |
+
self._tools_dir = Path(__file__).parent / "tools"
|
| 25 |
+
self._prompts_dir = Path(__file__).parent / "prompts"
|
| 26 |
+
|
| 27 |
+
# Components (initialized in create_components)
|
| 28 |
+
self.personalities_dropdown: gr.Dropdown
|
| 29 |
+
self.apply_btn: gr.Button
|
| 30 |
+
self.status_md: gr.Markdown
|
| 31 |
+
self.preview_md: gr.Markdown
|
| 32 |
+
self.person_name_tb: gr.Textbox
|
| 33 |
+
self.person_instr_ta: gr.TextArea
|
| 34 |
+
self.tools_txt_ta: gr.TextArea
|
| 35 |
+
self.voice_dropdown: gr.Dropdown
|
| 36 |
+
self.new_personality_btn: gr.Button
|
| 37 |
+
self.available_tools_cg: gr.CheckboxGroup
|
| 38 |
+
self.save_btn: gr.Button
|
| 39 |
+
|
| 40 |
+
# ---------- Filesystem helpers ----------
|
| 41 |
+
def _list_personalities(self) -> list[str]:
|
| 42 |
+
names: list[str] = []
|
| 43 |
+
try:
|
| 44 |
+
if self._profiles_root.exists():
|
| 45 |
+
for p in sorted(self._profiles_root.iterdir()):
|
| 46 |
+
if p.name == "user_personalities":
|
| 47 |
+
continue
|
| 48 |
+
if p.is_dir() and (p / "instructions.txt").exists():
|
| 49 |
+
names.append(p.name)
|
| 50 |
+
user_dir = self._profiles_root / "user_personalities"
|
| 51 |
+
if user_dir.exists():
|
| 52 |
+
for p in sorted(user_dir.iterdir()):
|
| 53 |
+
if p.is_dir() and (p / "instructions.txt").exists():
|
| 54 |
+
names.append(f"user_personalities/{p.name}")
|
| 55 |
+
except Exception:
|
| 56 |
+
pass
|
| 57 |
+
return names
|
| 58 |
+
|
| 59 |
+
def _resolve_profile_dir(self, selection: str) -> Path:
|
| 60 |
+
return self._profiles_root / selection
|
| 61 |
+
|
| 62 |
+
def _read_instructions_for(self, name: str) -> str:
|
| 63 |
+
try:
|
| 64 |
+
if name == self.DEFAULT_OPTION:
|
| 65 |
+
default_file = self._prompts_dir / "default_prompt.txt"
|
| 66 |
+
if default_file.exists():
|
| 67 |
+
return default_file.read_text(encoding="utf-8").strip()
|
| 68 |
+
return ""
|
| 69 |
+
target = self._resolve_profile_dir(name) / "instructions.txt"
|
| 70 |
+
if target.exists():
|
| 71 |
+
return target.read_text(encoding="utf-8").strip()
|
| 72 |
+
return ""
|
| 73 |
+
except Exception as e:
|
| 74 |
+
return f"Could not load instructions: {e}"
|
| 75 |
+
|
| 76 |
+
@staticmethod
|
| 77 |
+
def _sanitize_name(name: str) -> str:
|
| 78 |
+
import re
|
| 79 |
+
|
| 80 |
+
s = name.strip()
|
| 81 |
+
s = re.sub(r"\s+", "_", s)
|
| 82 |
+
s = re.sub(r"[^a-zA-Z0-9_-]", "", s)
|
| 83 |
+
return s
|
| 84 |
+
|
| 85 |
+
# ---------- Public API ----------
|
| 86 |
+
def create_components(self) -> None:
|
| 87 |
+
"""Instantiate Gradio components for the personality UI."""
|
| 88 |
+
current_value = config.REACHY_MINI_CUSTOM_PROFILE or self.DEFAULT_OPTION
|
| 89 |
+
|
| 90 |
+
self.personalities_dropdown = gr.Dropdown(
|
| 91 |
+
label="Select personality",
|
| 92 |
+
choices=[self.DEFAULT_OPTION, *(self._list_personalities())],
|
| 93 |
+
value=current_value,
|
| 94 |
+
)
|
| 95 |
+
self.apply_btn = gr.Button("Apply personality")
|
| 96 |
+
self.status_md = gr.Markdown(visible=True)
|
| 97 |
+
self.preview_md = gr.Markdown(value=self._read_instructions_for(current_value))
|
| 98 |
+
self.person_name_tb = gr.Textbox(label="Personality name")
|
| 99 |
+
self.person_instr_ta = gr.TextArea(label="Personality instructions", lines=10)
|
| 100 |
+
self.tools_txt_ta = gr.TextArea(label="tools.txt", lines=10)
|
| 101 |
+
self.voice_dropdown = gr.Dropdown(label="Voice", choices=["cedar"], value="cedar")
|
| 102 |
+
self.new_personality_btn = gr.Button("New personality")
|
| 103 |
+
self.available_tools_cg = gr.CheckboxGroup(label="Available tools (helper)", choices=[], value=[])
|
| 104 |
+
self.save_btn = gr.Button("Save personality (instructions + tools)")
|
| 105 |
+
|
| 106 |
+
def additional_inputs_ordered(self) -> list[Any]:
|
| 107 |
+
"""Return the additional inputs in the expected order for Stream."""
|
| 108 |
+
return [
|
| 109 |
+
self.personalities_dropdown,
|
| 110 |
+
self.apply_btn,
|
| 111 |
+
self.new_personality_btn,
|
| 112 |
+
self.status_md,
|
| 113 |
+
self.preview_md,
|
| 114 |
+
self.person_name_tb,
|
| 115 |
+
self.person_instr_ta,
|
| 116 |
+
self.tools_txt_ta,
|
| 117 |
+
self.voice_dropdown,
|
| 118 |
+
self.available_tools_cg,
|
| 119 |
+
self.save_btn,
|
| 120 |
+
]
|
| 121 |
+
|
| 122 |
+
# ---------- Event wiring ----------
|
| 123 |
+
def wire_events(self, handler: Any, blocks: gr.Blocks) -> None:
|
| 124 |
+
"""Attach event handlers to components within a Blocks context."""
|
| 125 |
+
|
| 126 |
+
async def _apply_personality(selected: str) -> tuple[str, str]:
|
| 127 |
+
profile = None if selected == self.DEFAULT_OPTION else selected
|
| 128 |
+
status = await handler.apply_personality(profile)
|
| 129 |
+
preview = self._read_instructions_for(selected)
|
| 130 |
+
return status, preview
|
| 131 |
+
|
| 132 |
+
def _read_voice_for(name: str) -> str:
|
| 133 |
+
try:
|
| 134 |
+
if name == self.DEFAULT_OPTION:
|
| 135 |
+
return "cedar"
|
| 136 |
+
vf = self._resolve_profile_dir(name) / "voice.txt"
|
| 137 |
+
if vf.exists():
|
| 138 |
+
v = vf.read_text(encoding="utf-8").strip()
|
| 139 |
+
return v or "cedar"
|
| 140 |
+
except Exception:
|
| 141 |
+
pass
|
| 142 |
+
return "cedar"
|
| 143 |
+
|
| 144 |
+
async def _fetch_voices(selected: str) -> dict[str, Any]:
|
| 145 |
+
try:
|
| 146 |
+
voices = await handler.get_available_voices()
|
| 147 |
+
current = _read_voice_for(selected)
|
| 148 |
+
if current not in voices:
|
| 149 |
+
current = "cedar"
|
| 150 |
+
return gr.update(choices=voices, value=current)
|
| 151 |
+
except Exception:
|
| 152 |
+
return gr.update(choices=["cedar"], value="cedar")
|
| 153 |
+
|
| 154 |
+
def _available_tools_for(selected: str) -> tuple[list[str], list[str]]:
|
| 155 |
+
shared: list[str] = []
|
| 156 |
+
try:
|
| 157 |
+
for py in self._tools_dir.glob("*.py"):
|
| 158 |
+
if py.stem in {"__init__", "core_tools"}:
|
| 159 |
+
continue
|
| 160 |
+
shared.append(py.stem)
|
| 161 |
+
except Exception:
|
| 162 |
+
pass
|
| 163 |
+
local: list[str] = []
|
| 164 |
+
try:
|
| 165 |
+
if selected != self.DEFAULT_OPTION:
|
| 166 |
+
for py in (self._profiles_root / selected).glob("*.py"):
|
| 167 |
+
local.append(py.stem)
|
| 168 |
+
except Exception:
|
| 169 |
+
pass
|
| 170 |
+
return sorted(shared), sorted(local)
|
| 171 |
+
|
| 172 |
+
def _parse_enabled_tools(text: str) -> list[str]:
|
| 173 |
+
enabled: list[str] = []
|
| 174 |
+
for line in text.splitlines():
|
| 175 |
+
s = line.strip()
|
| 176 |
+
if not s or s.startswith("#"):
|
| 177 |
+
continue
|
| 178 |
+
enabled.append(s)
|
| 179 |
+
return enabled
|
| 180 |
+
|
| 181 |
+
def _load_profile_for_edit(selected: str) -> tuple[dict[str, Any], dict[str, Any], dict[str, Any], str]:
|
| 182 |
+
instr = self._read_instructions_for(selected)
|
| 183 |
+
tools_txt = ""
|
| 184 |
+
if selected != self.DEFAULT_OPTION:
|
| 185 |
+
tp = self._resolve_profile_dir(selected) / "tools.txt"
|
| 186 |
+
if tp.exists():
|
| 187 |
+
tools_txt = tp.read_text(encoding="utf-8")
|
| 188 |
+
shared, local = _available_tools_for(selected)
|
| 189 |
+
all_tools = sorted(set(shared + local))
|
| 190 |
+
enabled = _parse_enabled_tools(tools_txt)
|
| 191 |
+
status_text = f"Loaded profile '{selected}'."
|
| 192 |
+
return (
|
| 193 |
+
gr.update(value=instr),
|
| 194 |
+
gr.update(value=tools_txt),
|
| 195 |
+
gr.update(choices=all_tools, value=enabled),
|
| 196 |
+
status_text,
|
| 197 |
+
)
|
| 198 |
+
|
| 199 |
+
def _new_personality() -> tuple[
|
| 200 |
+
dict[str, Any], dict[str, Any], dict[str, Any], dict[str, Any], str, dict[str, Any]
|
| 201 |
+
]:
|
| 202 |
+
try:
|
| 203 |
+
# Prefill with hints
|
| 204 |
+
instr_val = """# Write your instructions here\n# e.g., Keep responses concise and friendly."""
|
| 205 |
+
tools_txt_val = "# tools enabled for this profile\n"
|
| 206 |
+
return (
|
| 207 |
+
gr.update(value=""),
|
| 208 |
+
gr.update(value=instr_val),
|
| 209 |
+
gr.update(value=tools_txt_val),
|
| 210 |
+
gr.update(choices=sorted(_available_tools_for(self.DEFAULT_OPTION)[0]), value=[]),
|
| 211 |
+
"Fill in a name, instructions and (optional) tools, then Save.",
|
| 212 |
+
gr.update(value="cedar"),
|
| 213 |
+
)
|
| 214 |
+
except Exception:
|
| 215 |
+
return (
|
| 216 |
+
gr.update(),
|
| 217 |
+
gr.update(),
|
| 218 |
+
gr.update(),
|
| 219 |
+
gr.update(),
|
| 220 |
+
"Failed to initialize new personality.",
|
| 221 |
+
gr.update(),
|
| 222 |
+
)
|
| 223 |
+
|
| 224 |
+
def _save_personality(
|
| 225 |
+
name: str, instructions: str, tools_text: str, voice: str
|
| 226 |
+
) -> tuple[dict[str, Any], dict[str, Any], str]:
|
| 227 |
+
name_s = self._sanitize_name(name)
|
| 228 |
+
if not name_s:
|
| 229 |
+
return gr.update(), gr.update(), "Please enter a valid name."
|
| 230 |
+
try:
|
| 231 |
+
target_dir = self._profiles_root / "user_personalities" / name_s
|
| 232 |
+
target_dir.mkdir(parents=True, exist_ok=True)
|
| 233 |
+
(target_dir / "instructions.txt").write_text(instructions.strip() + "\n", encoding="utf-8")
|
| 234 |
+
(target_dir / "tools.txt").write_text(tools_text.strip() + "\n", encoding="utf-8")
|
| 235 |
+
(target_dir / "voice.txt").write_text((voice or "cedar").strip() + "\n", encoding="utf-8")
|
| 236 |
+
|
| 237 |
+
choices = self._list_personalities()
|
| 238 |
+
value = f"user_personalities/{name_s}"
|
| 239 |
+
if value not in choices:
|
| 240 |
+
choices.append(value)
|
| 241 |
+
return (
|
| 242 |
+
gr.update(choices=[self.DEFAULT_OPTION, *sorted(choices)], value=value),
|
| 243 |
+
gr.update(value=instructions),
|
| 244 |
+
f"Saved personality '{name_s}'.",
|
| 245 |
+
)
|
| 246 |
+
except Exception as e:
|
| 247 |
+
return gr.update(), gr.update(), f"Failed to save personality: {e}"
|
| 248 |
+
|
| 249 |
+
def _sync_tools_from_checks(selected: list[str], current_text: str) -> dict[str, Any]:
|
| 250 |
+
comments = [ln for ln in current_text.splitlines() if ln.strip().startswith("#")]
|
| 251 |
+
body = "\n".join(selected)
|
| 252 |
+
out = ("\n".join(comments) + ("\n" if comments else "") + body).strip() + "\n"
|
| 253 |
+
return gr.update(value=out)
|
| 254 |
+
|
| 255 |
+
with blocks:
|
| 256 |
+
self.apply_btn.click(
|
| 257 |
+
fn=_apply_personality,
|
| 258 |
+
inputs=[self.personalities_dropdown],
|
| 259 |
+
outputs=[self.status_md, self.preview_md],
|
| 260 |
+
)
|
| 261 |
+
|
| 262 |
+
self.personalities_dropdown.change(
|
| 263 |
+
fn=_load_profile_for_edit,
|
| 264 |
+
inputs=[self.personalities_dropdown],
|
| 265 |
+
outputs=[self.person_instr_ta, self.tools_txt_ta, self.available_tools_cg, self.status_md],
|
| 266 |
+
)
|
| 267 |
+
|
| 268 |
+
blocks.load(
|
| 269 |
+
fn=_fetch_voices,
|
| 270 |
+
inputs=[self.personalities_dropdown],
|
| 271 |
+
outputs=[self.voice_dropdown],
|
| 272 |
+
)
|
| 273 |
+
|
| 274 |
+
self.available_tools_cg.change(
|
| 275 |
+
fn=_sync_tools_from_checks,
|
| 276 |
+
inputs=[self.available_tools_cg, self.tools_txt_ta],
|
| 277 |
+
outputs=[self.tools_txt_ta],
|
| 278 |
+
)
|
| 279 |
+
|
| 280 |
+
self.new_personality_btn.click(
|
| 281 |
+
fn=_new_personality,
|
| 282 |
+
inputs=[],
|
| 283 |
+
outputs=[
|
| 284 |
+
self.person_name_tb,
|
| 285 |
+
self.person_instr_ta,
|
| 286 |
+
self.tools_txt_ta,
|
| 287 |
+
self.available_tools_cg,
|
| 288 |
+
self.status_md,
|
| 289 |
+
self.voice_dropdown,
|
| 290 |
+
],
|
| 291 |
+
)
|
| 292 |
+
|
| 293 |
+
self.save_btn.click(
|
| 294 |
+
fn=_save_personality,
|
| 295 |
+
inputs=[self.person_name_tb, self.person_instr_ta, self.tools_txt_ta, self.voice_dropdown],
|
| 296 |
+
outputs=[self.personalities_dropdown, self.person_instr_ta, self.status_md],
|
| 297 |
+
).then(
|
| 298 |
+
fn=_apply_personality,
|
| 299 |
+
inputs=[self.personalities_dropdown],
|
| 300 |
+
outputs=[self.status_md, self.preview_md],
|
| 301 |
+
)
|
src/reachy_mini_conversation_app/headless_personality.py
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Headless personality management (console-based).
|
| 2 |
+
|
| 3 |
+
Provides an interactive CLI to browse, preview, apply, create and edit
|
| 4 |
+
"personalities" (profiles) when running without Gradio.
|
| 5 |
+
|
| 6 |
+
This module is intentionally not shared with the Gradio implementation to
|
| 7 |
+
avoid coupling and keep responsibilities clear for headless mode.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
from __future__ import annotations
|
| 11 |
+
from typing import List
|
| 12 |
+
from pathlib import Path
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
DEFAULT_OPTION = "(built-in default)"
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def _profiles_root() -> Path:
|
| 19 |
+
return Path(__file__).parent / "profiles"
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def _prompts_dir() -> Path:
|
| 23 |
+
return Path(__file__).parent / "prompts"
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def _tools_dir() -> Path:
|
| 27 |
+
return Path(__file__).parent / "tools"
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def _sanitize_name(name: str) -> str:
|
| 31 |
+
import re
|
| 32 |
+
|
| 33 |
+
s = name.strip()
|
| 34 |
+
s = re.sub(r"\s+", "_", s)
|
| 35 |
+
s = re.sub(r"[^a-zA-Z0-9_-]", "", s)
|
| 36 |
+
return s
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def list_personalities() -> List[str]:
|
| 40 |
+
"""List available personality profile names."""
|
| 41 |
+
names: List[str] = []
|
| 42 |
+
root = _profiles_root()
|
| 43 |
+
try:
|
| 44 |
+
if root.exists():
|
| 45 |
+
for p in sorted(root.iterdir()):
|
| 46 |
+
if p.name == "user_personalities":
|
| 47 |
+
continue
|
| 48 |
+
if p.is_dir() and (p / "instructions.txt").exists():
|
| 49 |
+
names.append(p.name)
|
| 50 |
+
udir = root / "user_personalities"
|
| 51 |
+
if udir.exists():
|
| 52 |
+
for p in sorted(udir.iterdir()):
|
| 53 |
+
if p.is_dir() and (p / "instructions.txt").exists():
|
| 54 |
+
names.append(f"user_personalities/{p.name}")
|
| 55 |
+
except Exception:
|
| 56 |
+
pass
|
| 57 |
+
return names
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def resolve_profile_dir(selection: str) -> Path:
|
| 61 |
+
"""Resolve the directory path for the given profile selection."""
|
| 62 |
+
return _profiles_root() / selection
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def read_instructions_for(name: str) -> str:
|
| 66 |
+
"""Read the instructions.txt content for the given profile name."""
|
| 67 |
+
try:
|
| 68 |
+
if name == DEFAULT_OPTION:
|
| 69 |
+
df = _prompts_dir() / "default_prompt.txt"
|
| 70 |
+
return df.read_text(encoding="utf-8").strip() if df.exists() else ""
|
| 71 |
+
target = resolve_profile_dir(name) / "instructions.txt"
|
| 72 |
+
return target.read_text(encoding="utf-8").strip() if target.exists() else ""
|
| 73 |
+
except Exception as e:
|
| 74 |
+
return f"Could not load instructions: {e}"
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def available_tools_for(selected: str) -> List[str]:
|
| 78 |
+
"""List available tool modules for the given profile selection."""
|
| 79 |
+
shared: List[str] = []
|
| 80 |
+
try:
|
| 81 |
+
for py in _tools_dir().glob("*.py"):
|
| 82 |
+
if py.stem in {"__init__", "core_tools"}:
|
| 83 |
+
continue
|
| 84 |
+
shared.append(py.stem)
|
| 85 |
+
except Exception:
|
| 86 |
+
pass
|
| 87 |
+
local: List[str] = []
|
| 88 |
+
try:
|
| 89 |
+
if selected != DEFAULT_OPTION:
|
| 90 |
+
for py in resolve_profile_dir(selected).glob("*.py"):
|
| 91 |
+
local.append(py.stem)
|
| 92 |
+
except Exception:
|
| 93 |
+
pass
|
| 94 |
+
return sorted(set(shared + local))
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def _write_profile(name_s: str, instructions: str, tools_text: str, voice: str = "cedar") -> None:
|
| 98 |
+
target_dir = _profiles_root() / "user_personalities" / name_s
|
| 99 |
+
target_dir.mkdir(parents=True, exist_ok=True)
|
| 100 |
+
(target_dir / "instructions.txt").write_text(instructions.strip() + "\n", encoding="utf-8")
|
| 101 |
+
(target_dir / "tools.txt").write_text((tools_text or "").strip() + "\n", encoding="utf-8")
|
| 102 |
+
(target_dir / "voice.txt").write_text((voice or "cedar").strip() + "\n", encoding="utf-8")
|
src/reachy_mini_conversation_app/headless_personality_ui.py
ADDED
|
@@ -0,0 +1,276 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Settings UI routes for headless personality management.
|
| 2 |
+
|
| 3 |
+
Exposes REST endpoints on the provided FastAPI settings app. The
|
| 4 |
+
implementation schedules backend actions (apply personality, fetch voices)
|
| 5 |
+
onto the running LocalStream asyncio loop using the supplied get_loop
|
| 6 |
+
callable to avoid cross-thread issues.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from __future__ import annotations
|
| 10 |
+
import asyncio
|
| 11 |
+
import logging
|
| 12 |
+
from typing import Any, Callable, Optional
|
| 13 |
+
|
| 14 |
+
from fastapi import FastAPI
|
| 15 |
+
|
| 16 |
+
from .config import config
|
| 17 |
+
from .openai_realtime import OpenaiRealtimeHandler
|
| 18 |
+
from .headless_personality import (
|
| 19 |
+
DEFAULT_OPTION,
|
| 20 |
+
_sanitize_name,
|
| 21 |
+
_write_profile,
|
| 22 |
+
list_personalities,
|
| 23 |
+
available_tools_for,
|
| 24 |
+
resolve_profile_dir,
|
| 25 |
+
read_instructions_for,
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def mount_personality_routes(
|
| 30 |
+
app: FastAPI,
|
| 31 |
+
handler: OpenaiRealtimeHandler,
|
| 32 |
+
get_loop: Callable[[], asyncio.AbstractEventLoop | None],
|
| 33 |
+
*,
|
| 34 |
+
persist_personality: Callable[[Optional[str]], None] | None = None,
|
| 35 |
+
get_persisted_personality: Callable[[], Optional[str]] | None = None,
|
| 36 |
+
) -> None:
|
| 37 |
+
"""Register personality management endpoints on a FastAPI app."""
|
| 38 |
+
try:
|
| 39 |
+
from fastapi import Request
|
| 40 |
+
from pydantic import BaseModel
|
| 41 |
+
from fastapi.responses import JSONResponse
|
| 42 |
+
except Exception: # pragma: no cover - only when settings app not available
|
| 43 |
+
return
|
| 44 |
+
|
| 45 |
+
class SavePayload(BaseModel):
|
| 46 |
+
name: str
|
| 47 |
+
instructions: str
|
| 48 |
+
tools_text: str
|
| 49 |
+
voice: Optional[str] = "cedar"
|
| 50 |
+
|
| 51 |
+
class ApplyPayload(BaseModel):
|
| 52 |
+
name: str
|
| 53 |
+
persist: Optional[bool] = False
|
| 54 |
+
|
| 55 |
+
def _startup_choice() -> Any:
|
| 56 |
+
"""Return the persisted startup personality or default."""
|
| 57 |
+
try:
|
| 58 |
+
if get_persisted_personality is not None:
|
| 59 |
+
stored = get_persisted_personality()
|
| 60 |
+
if stored:
|
| 61 |
+
return stored
|
| 62 |
+
env_val = getattr(config, "REACHY_MINI_CUSTOM_PROFILE", None)
|
| 63 |
+
if env_val:
|
| 64 |
+
return env_val
|
| 65 |
+
except Exception:
|
| 66 |
+
pass
|
| 67 |
+
return DEFAULT_OPTION
|
| 68 |
+
|
| 69 |
+
def _current_choice() -> str:
|
| 70 |
+
try:
|
| 71 |
+
cur = getattr(config, "REACHY_MINI_CUSTOM_PROFILE", None)
|
| 72 |
+
return cur or DEFAULT_OPTION
|
| 73 |
+
except Exception:
|
| 74 |
+
return DEFAULT_OPTION
|
| 75 |
+
|
| 76 |
+
@app.get("/personalities")
|
| 77 |
+
def _list() -> dict: # type: ignore
|
| 78 |
+
choices = [DEFAULT_OPTION, *list_personalities()]
|
| 79 |
+
return {"choices": choices, "current": _current_choice(), "startup": _startup_choice()}
|
| 80 |
+
|
| 81 |
+
@app.get("/personalities/load")
|
| 82 |
+
def _load(name: str) -> dict: # type: ignore
|
| 83 |
+
instr = read_instructions_for(name)
|
| 84 |
+
tools_txt = ""
|
| 85 |
+
voice = "cedar"
|
| 86 |
+
if name != DEFAULT_OPTION:
|
| 87 |
+
pdir = resolve_profile_dir(name)
|
| 88 |
+
tp = pdir / "tools.txt"
|
| 89 |
+
if tp.exists():
|
| 90 |
+
tools_txt = tp.read_text(encoding="utf-8")
|
| 91 |
+
vf = pdir / "voice.txt"
|
| 92 |
+
if vf.exists():
|
| 93 |
+
v = vf.read_text(encoding="utf-8").strip()
|
| 94 |
+
voice = v or "cedar"
|
| 95 |
+
avail = available_tools_for(name)
|
| 96 |
+
enabled = [ln.strip() for ln in tools_txt.splitlines() if ln.strip() and not ln.strip().startswith("#")]
|
| 97 |
+
return {
|
| 98 |
+
"instructions": instr,
|
| 99 |
+
"tools_text": tools_txt,
|
| 100 |
+
"voice": voice,
|
| 101 |
+
"available_tools": avail,
|
| 102 |
+
"enabled_tools": enabled,
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
@app.post("/personalities/save")
|
| 106 |
+
async def _save(request: Request) -> dict: # type: ignore
|
| 107 |
+
# Accept raw JSON only to avoid validation-related 422s
|
| 108 |
+
try:
|
| 109 |
+
raw = await request.json()
|
| 110 |
+
except Exception:
|
| 111 |
+
raw = {}
|
| 112 |
+
name = str(raw.get("name", ""))
|
| 113 |
+
instructions = str(raw.get("instructions", ""))
|
| 114 |
+
tools_text = str(raw.get("tools_text", ""))
|
| 115 |
+
voice = str(raw.get("voice", "cedar")) if raw.get("voice") is not None else "cedar"
|
| 116 |
+
|
| 117 |
+
name_s = _sanitize_name(name)
|
| 118 |
+
if not name_s:
|
| 119 |
+
return JSONResponse({"ok": False, "error": "invalid_name"}, status_code=400) # type: ignore
|
| 120 |
+
try:
|
| 121 |
+
logger.info(
|
| 122 |
+
"Headless save: name=%r voice=%r instr_len=%d tools_len=%d",
|
| 123 |
+
name_s,
|
| 124 |
+
voice,
|
| 125 |
+
len(instructions),
|
| 126 |
+
len(tools_text),
|
| 127 |
+
)
|
| 128 |
+
_write_profile(name_s, instructions, tools_text, voice or "cedar")
|
| 129 |
+
value = f"user_personalities/{name_s}"
|
| 130 |
+
choices = [DEFAULT_OPTION, *list_personalities()]
|
| 131 |
+
return {"ok": True, "value": value, "choices": choices}
|
| 132 |
+
except Exception as e:
|
| 133 |
+
return JSONResponse({"ok": False, "error": str(e)}, status_code=500) # type: ignore
|
| 134 |
+
|
| 135 |
+
@app.post("/personalities/save_raw")
|
| 136 |
+
async def _save_raw(
|
| 137 |
+
request: Request,
|
| 138 |
+
name: Optional[str] = None,
|
| 139 |
+
instructions: Optional[str] = None,
|
| 140 |
+
tools_text: Optional[str] = None,
|
| 141 |
+
voice: Optional[str] = None,
|
| 142 |
+
) -> dict: # type: ignore
|
| 143 |
+
# Accept query params, form-encoded, or raw JSON
|
| 144 |
+
data = {"name": name, "instructions": instructions, "tools_text": tools_text, "voice": voice}
|
| 145 |
+
# Prefer form if present
|
| 146 |
+
try:
|
| 147 |
+
form = await request.form()
|
| 148 |
+
for k in ("name", "instructions", "tools_text", "voice"):
|
| 149 |
+
if k in form and form[k] is not None:
|
| 150 |
+
data[k] = str(form[k])
|
| 151 |
+
except Exception:
|
| 152 |
+
pass
|
| 153 |
+
# Try JSON
|
| 154 |
+
try:
|
| 155 |
+
raw = await request.json()
|
| 156 |
+
if isinstance(raw, dict):
|
| 157 |
+
for k in ("name", "instructions", "tools_text", "voice"):
|
| 158 |
+
if raw.get(k) is not None:
|
| 159 |
+
data[k] = str(raw.get(k))
|
| 160 |
+
except Exception:
|
| 161 |
+
pass
|
| 162 |
+
|
| 163 |
+
name_s = _sanitize_name(str(data.get("name") or ""))
|
| 164 |
+
if not name_s:
|
| 165 |
+
return JSONResponse({"ok": False, "error": "invalid_name"}, status_code=400) # type: ignore
|
| 166 |
+
instr = str(data.get("instructions") or "")
|
| 167 |
+
tools = str(data.get("tools_text") or "")
|
| 168 |
+
v = str(data.get("voice") or "cedar")
|
| 169 |
+
try:
|
| 170 |
+
logger.info(
|
| 171 |
+
"Headless save_raw: name=%r voice=%r instr_len=%d tools_len=%d", name_s, v, len(instr), len(tools)
|
| 172 |
+
)
|
| 173 |
+
_write_profile(name_s, instr, tools, v)
|
| 174 |
+
value = f"user_personalities/{name_s}"
|
| 175 |
+
choices = [DEFAULT_OPTION, *list_personalities()]
|
| 176 |
+
return {"ok": True, "value": value, "choices": choices}
|
| 177 |
+
except Exception as e:
|
| 178 |
+
return JSONResponse({"ok": False, "error": str(e)}, status_code=500) # type: ignore
|
| 179 |
+
|
| 180 |
+
@app.get("/personalities/save_raw")
|
| 181 |
+
async def _save_raw_get(name: str, instructions: str = "", tools_text: str = "", voice: str = "cedar") -> dict: # type: ignore
|
| 182 |
+
name_s = _sanitize_name(name)
|
| 183 |
+
if not name_s:
|
| 184 |
+
return JSONResponse({"ok": False, "error": "invalid_name"}, status_code=400) # type: ignore
|
| 185 |
+
try:
|
| 186 |
+
logger.info(
|
| 187 |
+
"Headless save_raw(GET): name=%r voice=%r instr_len=%d tools_len=%d",
|
| 188 |
+
name_s,
|
| 189 |
+
voice,
|
| 190 |
+
len(instructions),
|
| 191 |
+
len(tools_text),
|
| 192 |
+
)
|
| 193 |
+
_write_profile(name_s, instructions, tools_text, voice or "cedar")
|
| 194 |
+
value = f"user_personalities/{name_s}"
|
| 195 |
+
choices = [DEFAULT_OPTION, *list_personalities()]
|
| 196 |
+
return {"ok": True, "value": value, "choices": choices}
|
| 197 |
+
except Exception as e:
|
| 198 |
+
return JSONResponse({"ok": False, "error": str(e)}, status_code=500) # type: ignore
|
| 199 |
+
|
| 200 |
+
logger = logging.getLogger(__name__)
|
| 201 |
+
|
| 202 |
+
@app.post("/personalities/apply")
|
| 203 |
+
async def _apply(
|
| 204 |
+
payload: ApplyPayload | None = None,
|
| 205 |
+
name: str | None = None,
|
| 206 |
+
persist: Optional[bool] = None,
|
| 207 |
+
request: Optional[Request] = None,
|
| 208 |
+
) -> dict: # type: ignore
|
| 209 |
+
loop = get_loop()
|
| 210 |
+
if loop is None:
|
| 211 |
+
return JSONResponse({"ok": False, "error": "loop_unavailable"}, status_code=503) # type: ignore
|
| 212 |
+
|
| 213 |
+
# Accept both JSON payload and query param for convenience
|
| 214 |
+
sel_name: Optional[str] = None
|
| 215 |
+
persist_flag = bool(persist) if persist is not None else False
|
| 216 |
+
if payload and getattr(payload, "name", None):
|
| 217 |
+
sel_name = payload.name
|
| 218 |
+
persist_flag = bool(getattr(payload, "persist", False))
|
| 219 |
+
elif name:
|
| 220 |
+
sel_name = name
|
| 221 |
+
elif request is not None:
|
| 222 |
+
try:
|
| 223 |
+
body = await request.json()
|
| 224 |
+
if isinstance(body, dict) and body.get("name"):
|
| 225 |
+
sel_name = str(body.get("name"))
|
| 226 |
+
if isinstance(body, dict) and "persist" in body:
|
| 227 |
+
persist_flag = bool(body.get("persist"))
|
| 228 |
+
except Exception:
|
| 229 |
+
sel_name = None
|
| 230 |
+
if request is not None:
|
| 231 |
+
try:
|
| 232 |
+
q_persist = request.query_params.get("persist")
|
| 233 |
+
if q_persist is not None:
|
| 234 |
+
persist_flag = str(q_persist).lower() in {"1", "true", "yes", "on"}
|
| 235 |
+
except Exception:
|
| 236 |
+
pass
|
| 237 |
+
if not sel_name:
|
| 238 |
+
sel_name = DEFAULT_OPTION
|
| 239 |
+
|
| 240 |
+
async def _do_apply() -> str:
|
| 241 |
+
sel = None if sel_name == DEFAULT_OPTION else sel_name
|
| 242 |
+
status = await handler.apply_personality(sel)
|
| 243 |
+
return status
|
| 244 |
+
|
| 245 |
+
try:
|
| 246 |
+
logger.info("Headless apply: requested name=%r", sel_name)
|
| 247 |
+
fut = asyncio.run_coroutine_threadsafe(_do_apply(), loop)
|
| 248 |
+
status = fut.result(timeout=10)
|
| 249 |
+
persisted_choice = _startup_choice()
|
| 250 |
+
if persist_flag and persist_personality is not None:
|
| 251 |
+
try:
|
| 252 |
+
persist_personality(None if sel_name == DEFAULT_OPTION else sel_name)
|
| 253 |
+
persisted_choice = _startup_choice()
|
| 254 |
+
except Exception as e:
|
| 255 |
+
logger.warning("Failed to persist startup personality: %s", e)
|
| 256 |
+
return {"ok": True, "status": status, "startup": persisted_choice}
|
| 257 |
+
except Exception as e:
|
| 258 |
+
return JSONResponse({"ok": False, "error": str(e)}, status_code=500) # type: ignore
|
| 259 |
+
|
| 260 |
+
@app.get("/voices")
|
| 261 |
+
async def _voices() -> list[str]:
|
| 262 |
+
loop = get_loop()
|
| 263 |
+
if loop is None:
|
| 264 |
+
return ["cedar"]
|
| 265 |
+
|
| 266 |
+
async def _get_v() -> list[str]:
|
| 267 |
+
try:
|
| 268 |
+
return await handler.get_available_voices()
|
| 269 |
+
except Exception:
|
| 270 |
+
return ["cedar"]
|
| 271 |
+
|
| 272 |
+
try:
|
| 273 |
+
fut = asyncio.run_coroutine_threadsafe(_get_v(), loop)
|
| 274 |
+
return fut.result(timeout=10)
|
| 275 |
+
except Exception:
|
| 276 |
+
return ["cedar"]
|
src/reachy_mini_conversation_app/images/reachymini_avatar.png
ADDED
|
|
Git LFS Details
|
src/reachy_mini_conversation_app/images/user_avatar.png
ADDED
|
|
Git LFS Details
|
src/reachy_mini_conversation_app/main.py
ADDED
|
@@ -0,0 +1,253 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Entrypoint for the Reachy Mini conversation app."""
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import sys
|
| 5 |
+
import time
|
| 6 |
+
import asyncio
|
| 7 |
+
import argparse
|
| 8 |
+
import threading
|
| 9 |
+
from typing import Any, Dict, List, Optional
|
| 10 |
+
|
| 11 |
+
import gradio as gr
|
| 12 |
+
from fastapi import FastAPI
|
| 13 |
+
from fastrtc import Stream
|
| 14 |
+
from gradio.utils import get_space
|
| 15 |
+
|
| 16 |
+
from reachy_mini import ReachyMini, ReachyMiniApp
|
| 17 |
+
from reachy_mini_conversation_app.utils import (
|
| 18 |
+
parse_args,
|
| 19 |
+
setup_logger,
|
| 20 |
+
handle_vision_stuff,
|
| 21 |
+
log_connection_troubleshooting,
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def update_chatbot(chatbot: List[Dict[str, Any]], response: Dict[str, Any]) -> List[Dict[str, Any]]:
|
| 26 |
+
"""Update the chatbot with AdditionalOutputs."""
|
| 27 |
+
chatbot.append(response)
|
| 28 |
+
return chatbot
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def main() -> None:
|
| 32 |
+
"""Entrypoint for the Reachy Mini conversation app."""
|
| 33 |
+
args, _ = parse_args()
|
| 34 |
+
run(args)
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def run(
|
| 38 |
+
args: argparse.Namespace,
|
| 39 |
+
robot: ReachyMini = None,
|
| 40 |
+
app_stop_event: Optional[threading.Event] = None,
|
| 41 |
+
settings_app: Optional[FastAPI] = None,
|
| 42 |
+
instance_path: Optional[str] = None,
|
| 43 |
+
) -> None:
|
| 44 |
+
"""Run the Reachy Mini conversation app."""
|
| 45 |
+
# Putting these dependencies here makes the dashboard faster to load when the conversation app is installed
|
| 46 |
+
from reachy_mini_conversation_app.moves import MovementManager
|
| 47 |
+
from reachy_mini_conversation_app.console import LocalStream
|
| 48 |
+
from reachy_mini_conversation_app.openai_realtime import OpenaiRealtimeHandler
|
| 49 |
+
from reachy_mini_conversation_app.tools.core_tools import ToolDependencies
|
| 50 |
+
from reachy_mini_conversation_app.audio.head_wobbler import HeadWobbler
|
| 51 |
+
|
| 52 |
+
logger = setup_logger(args.debug)
|
| 53 |
+
logger.info("Starting Reachy Mini Conversation App")
|
| 54 |
+
|
| 55 |
+
if args.no_camera and args.head_tracker is not None:
|
| 56 |
+
logger.warning(
|
| 57 |
+
"Head tracking disabled: --no-camera flag is set. "
|
| 58 |
+
"Remove --no-camera to enable head tracking."
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
if robot is None:
|
| 62 |
+
try:
|
| 63 |
+
robot_kwargs = {}
|
| 64 |
+
if args.robot_name is not None:
|
| 65 |
+
robot_kwargs["robot_name"] = args.robot_name
|
| 66 |
+
|
| 67 |
+
logger.info("Initializing ReachyMini (SDK will auto-detect appropriate backend)")
|
| 68 |
+
robot = ReachyMini(**robot_kwargs)
|
| 69 |
+
|
| 70 |
+
except TimeoutError as e:
|
| 71 |
+
logger.error(
|
| 72 |
+
"Connection timeout: Failed to connect to Reachy Mini daemon. "
|
| 73 |
+
f"Details: {e}"
|
| 74 |
+
)
|
| 75 |
+
log_connection_troubleshooting(logger, args.robot_name)
|
| 76 |
+
sys.exit(1)
|
| 77 |
+
|
| 78 |
+
except ConnectionError as e:
|
| 79 |
+
logger.error(
|
| 80 |
+
"Connection failed: Unable to establish connection to Reachy Mini. "
|
| 81 |
+
f"Details: {e}"
|
| 82 |
+
)
|
| 83 |
+
log_connection_troubleshooting(logger, args.robot_name)
|
| 84 |
+
sys.exit(1)
|
| 85 |
+
|
| 86 |
+
except Exception as e:
|
| 87 |
+
logger.error(
|
| 88 |
+
f"Unexpected error during robot initialization: {type(e).__name__}: {e}"
|
| 89 |
+
)
|
| 90 |
+
logger.error("Please check your configuration and try again.")
|
| 91 |
+
sys.exit(1)
|
| 92 |
+
|
| 93 |
+
# Check if running in simulation mode without --gradio
|
| 94 |
+
if robot.client.get_status()["simulation_enabled"] and not args.gradio:
|
| 95 |
+
logger.error(
|
| 96 |
+
"Simulation mode requires Gradio interface. Please use --gradio flag when running in simulation mode."
|
| 97 |
+
)
|
| 98 |
+
robot.client.disconnect()
|
| 99 |
+
sys.exit(1)
|
| 100 |
+
|
| 101 |
+
camera_worker, _, vision_manager = handle_vision_stuff(args, robot)
|
| 102 |
+
|
| 103 |
+
movement_manager = MovementManager(
|
| 104 |
+
current_robot=robot,
|
| 105 |
+
camera_worker=camera_worker,
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
+
head_wobbler = HeadWobbler(set_speech_offsets=movement_manager.set_speech_offsets)
|
| 109 |
+
|
| 110 |
+
deps = ToolDependencies(
|
| 111 |
+
reachy_mini=robot,
|
| 112 |
+
movement_manager=movement_manager,
|
| 113 |
+
camera_worker=camera_worker,
|
| 114 |
+
vision_manager=vision_manager,
|
| 115 |
+
head_wobbler=head_wobbler,
|
| 116 |
+
)
|
| 117 |
+
current_file_path = os.path.dirname(os.path.abspath(__file__))
|
| 118 |
+
logger.debug(f"Current file absolute path: {current_file_path}")
|
| 119 |
+
chatbot = gr.Chatbot(
|
| 120 |
+
type="messages",
|
| 121 |
+
resizable=True,
|
| 122 |
+
avatar_images=(
|
| 123 |
+
os.path.join(current_file_path, "images", "user_avatar.png"),
|
| 124 |
+
os.path.join(current_file_path, "images", "reachymini_avatar.png"),
|
| 125 |
+
),
|
| 126 |
+
)
|
| 127 |
+
logger.debug(f"Chatbot avatar images: {chatbot.avatar_images}")
|
| 128 |
+
|
| 129 |
+
handler = OpenaiRealtimeHandler(deps, gradio_mode=args.gradio, instance_path=instance_path)
|
| 130 |
+
|
| 131 |
+
stream_manager: gr.Blocks | LocalStream | None = None
|
| 132 |
+
|
| 133 |
+
if args.gradio:
|
| 134 |
+
api_key_textbox = gr.Textbox(
|
| 135 |
+
label="OPENAI API Key",
|
| 136 |
+
type="password",
|
| 137 |
+
value=os.getenv("OPENAI_API_KEY") if not get_space() else "",
|
| 138 |
+
)
|
| 139 |
+
|
| 140 |
+
from reachy_mini_conversation_app.gradio_personality import PersonalityUI
|
| 141 |
+
|
| 142 |
+
personality_ui = PersonalityUI()
|
| 143 |
+
personality_ui.create_components()
|
| 144 |
+
|
| 145 |
+
stream = Stream(
|
| 146 |
+
handler=handler,
|
| 147 |
+
mode="send-receive",
|
| 148 |
+
modality="audio",
|
| 149 |
+
additional_inputs=[
|
| 150 |
+
chatbot,
|
| 151 |
+
api_key_textbox,
|
| 152 |
+
*personality_ui.additional_inputs_ordered(),
|
| 153 |
+
],
|
| 154 |
+
additional_outputs=[chatbot],
|
| 155 |
+
additional_outputs_handler=update_chatbot,
|
| 156 |
+
ui_args={"title": "Talk with Reachy Mini"},
|
| 157 |
+
)
|
| 158 |
+
stream_manager = stream.ui
|
| 159 |
+
if not settings_app:
|
| 160 |
+
app = FastAPI()
|
| 161 |
+
else:
|
| 162 |
+
app = settings_app
|
| 163 |
+
|
| 164 |
+
personality_ui.wire_events(handler, stream_manager)
|
| 165 |
+
|
| 166 |
+
app = gr.mount_gradio_app(app, stream.ui, path="/")
|
| 167 |
+
else:
|
| 168 |
+
# In headless mode, wire settings_app + instance_path to console LocalStream
|
| 169 |
+
stream_manager = LocalStream(
|
| 170 |
+
handler,
|
| 171 |
+
robot,
|
| 172 |
+
settings_app=settings_app,
|
| 173 |
+
instance_path=instance_path,
|
| 174 |
+
)
|
| 175 |
+
|
| 176 |
+
# Each async service → its own thread/loop
|
| 177 |
+
movement_manager.start()
|
| 178 |
+
head_wobbler.start()
|
| 179 |
+
if camera_worker:
|
| 180 |
+
camera_worker.start()
|
| 181 |
+
if vision_manager:
|
| 182 |
+
vision_manager.start()
|
| 183 |
+
|
| 184 |
+
def poll_stop_event() -> None:
|
| 185 |
+
"""Poll the stop event to allow graceful shutdown."""
|
| 186 |
+
if app_stop_event is not None:
|
| 187 |
+
app_stop_event.wait()
|
| 188 |
+
|
| 189 |
+
logger.info("App stop event detected, shutting down...")
|
| 190 |
+
try:
|
| 191 |
+
stream_manager.close()
|
| 192 |
+
except Exception as e:
|
| 193 |
+
logger.error(f"Error while closing stream manager: {e}")
|
| 194 |
+
|
| 195 |
+
if app_stop_event:
|
| 196 |
+
threading.Thread(target=poll_stop_event, daemon=True).start()
|
| 197 |
+
|
| 198 |
+
try:
|
| 199 |
+
stream_manager.launch()
|
| 200 |
+
except KeyboardInterrupt:
|
| 201 |
+
logger.info("Keyboard interruption in main thread... closing server.")
|
| 202 |
+
finally:
|
| 203 |
+
movement_manager.stop()
|
| 204 |
+
head_wobbler.stop()
|
| 205 |
+
if camera_worker:
|
| 206 |
+
camera_worker.stop()
|
| 207 |
+
if vision_manager:
|
| 208 |
+
vision_manager.stop()
|
| 209 |
+
|
| 210 |
+
# Ensure media is explicitly closed before disconnecting
|
| 211 |
+
try:
|
| 212 |
+
robot.media.close()
|
| 213 |
+
except Exception as e:
|
| 214 |
+
logger.debug(f"Error closing media during shutdown: {e}")
|
| 215 |
+
|
| 216 |
+
# prevent connection to keep alive some threads
|
| 217 |
+
robot.client.disconnect()
|
| 218 |
+
time.sleep(1)
|
| 219 |
+
logger.info("Shutdown complete.")
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
class ReachyMiniConversationApp(ReachyMiniApp): # type: ignore[misc]
|
| 223 |
+
"""Reachy Mini Apps entry point for the conversation app."""
|
| 224 |
+
|
| 225 |
+
custom_app_url = "http://0.0.0.0:7860/"
|
| 226 |
+
dont_start_webserver = False
|
| 227 |
+
|
| 228 |
+
def run(self, reachy_mini: ReachyMini, stop_event: threading.Event) -> None:
|
| 229 |
+
"""Run the Reachy Mini conversation app."""
|
| 230 |
+
loop = asyncio.new_event_loop()
|
| 231 |
+
asyncio.set_event_loop(loop)
|
| 232 |
+
|
| 233 |
+
args, _ = parse_args()
|
| 234 |
+
|
| 235 |
+
# is_wireless = reachy_mini.client.get_status()["wireless_version"]
|
| 236 |
+
# args.head_tracker = None if is_wireless else "mediapipe"
|
| 237 |
+
|
| 238 |
+
instance_path = self._get_instance_path().parent
|
| 239 |
+
run(
|
| 240 |
+
args,
|
| 241 |
+
robot=reachy_mini,
|
| 242 |
+
app_stop_event=stop_event,
|
| 243 |
+
settings_app=self.settings_app,
|
| 244 |
+
instance_path=instance_path,
|
| 245 |
+
)
|
| 246 |
+
|
| 247 |
+
|
| 248 |
+
if __name__ == "__main__":
|
| 249 |
+
app = ReachyMiniConversationApp()
|
| 250 |
+
try:
|
| 251 |
+
app.wrapped_run()
|
| 252 |
+
except KeyboardInterrupt:
|
| 253 |
+
app.stop()
|
src/reachy_mini_conversation_app/moves.py
ADDED
|
@@ -0,0 +1,849 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Movement system with sequential primary moves and additive secondary moves.
|
| 2 |
+
|
| 3 |
+
Design overview
|
| 4 |
+
- Primary moves (emotions, dances, goto, breathing) are mutually exclusive and run
|
| 5 |
+
sequentially.
|
| 6 |
+
- Secondary moves (speech sway, face tracking) are additive offsets applied on top
|
| 7 |
+
of the current primary pose.
|
| 8 |
+
- There is a single control point to the robot: `ReachyMini.set_target`.
|
| 9 |
+
- The control loop runs near 100 Hz and is phase-aligned via a monotonic clock.
|
| 10 |
+
- Idle behaviour starts an infinite `BreathingMove` after a short inactivity delay
|
| 11 |
+
unless listening is active.
|
| 12 |
+
|
| 13 |
+
Threading model
|
| 14 |
+
- A dedicated worker thread owns all real-time state and issues `set_target`
|
| 15 |
+
commands.
|
| 16 |
+
- Other threads communicate via a command queue (enqueue moves, mark activity,
|
| 17 |
+
toggle listening).
|
| 18 |
+
- Secondary offset producers set pending values guarded by locks; the worker
|
| 19 |
+
snaps them atomically.
|
| 20 |
+
|
| 21 |
+
Units and frames
|
| 22 |
+
- Secondary offsets are interpreted as metres for x/y/z and radians for
|
| 23 |
+
roll/pitch/yaw in the world frame (unless noted by `compose_world_offset`).
|
| 24 |
+
- Antennas and `body_yaw` are in radians.
|
| 25 |
+
- Head pose composition uses `compose_world_offset(primary_head, secondary_head)`;
|
| 26 |
+
the secondary offset must therefore be expressed in the world frame.
|
| 27 |
+
|
| 28 |
+
Safety
|
| 29 |
+
- Listening freezes antennas, then blends them back on unfreeze.
|
| 30 |
+
- Interpolations and blends are used to avoid jumps at all times.
|
| 31 |
+
- `set_target` errors are rate-limited in logs.
|
| 32 |
+
"""
|
| 33 |
+
|
| 34 |
+
from __future__ import annotations
|
| 35 |
+
import time
|
| 36 |
+
import logging
|
| 37 |
+
import threading
|
| 38 |
+
from queue import Empty, Queue
|
| 39 |
+
from typing import Any, Dict, Tuple
|
| 40 |
+
from collections import deque
|
| 41 |
+
from dataclasses import dataclass
|
| 42 |
+
|
| 43 |
+
import numpy as np
|
| 44 |
+
from numpy.typing import NDArray
|
| 45 |
+
|
| 46 |
+
from reachy_mini import ReachyMini
|
| 47 |
+
from reachy_mini.utils import create_head_pose
|
| 48 |
+
from reachy_mini.motion.move import Move
|
| 49 |
+
from reachy_mini.utils.interpolation import (
|
| 50 |
+
compose_world_offset,
|
| 51 |
+
linear_pose_interpolation,
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
logger = logging.getLogger(__name__)
|
| 56 |
+
|
| 57 |
+
# Configuration constants
|
| 58 |
+
CONTROL_LOOP_FREQUENCY_HZ = 100.0 # Hz - Target frequency for the movement control loop
|
| 59 |
+
|
| 60 |
+
# Type definitions
|
| 61 |
+
FullBodyPose = Tuple[NDArray[np.float32], Tuple[float, float], float] # (head_pose_4x4, antennas, body_yaw)
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
class BreathingMove(Move): # type: ignore
|
| 65 |
+
"""Breathing move with interpolation to neutral and then continuous breathing patterns."""
|
| 66 |
+
|
| 67 |
+
def __init__(
|
| 68 |
+
self,
|
| 69 |
+
interpolation_start_pose: NDArray[np.float32],
|
| 70 |
+
interpolation_start_antennas: Tuple[float, float],
|
| 71 |
+
interpolation_duration: float = 1.0,
|
| 72 |
+
):
|
| 73 |
+
"""Initialize breathing move.
|
| 74 |
+
|
| 75 |
+
Args:
|
| 76 |
+
interpolation_start_pose: 4x4 matrix of current head pose to interpolate from
|
| 77 |
+
interpolation_start_antennas: Current antenna positions to interpolate from
|
| 78 |
+
interpolation_duration: Duration of interpolation to neutral (seconds)
|
| 79 |
+
|
| 80 |
+
"""
|
| 81 |
+
self.interpolation_start_pose = interpolation_start_pose
|
| 82 |
+
self.interpolation_start_antennas = np.array(interpolation_start_antennas)
|
| 83 |
+
self.interpolation_duration = interpolation_duration
|
| 84 |
+
|
| 85 |
+
# Neutral positions for breathing base
|
| 86 |
+
self.neutral_head_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
|
| 87 |
+
self.neutral_antennas = np.array([0.0, 0.0])
|
| 88 |
+
|
| 89 |
+
# Breathing parameters
|
| 90 |
+
self.breathing_z_amplitude = 0.005 # 5mm gentle breathing
|
| 91 |
+
self.breathing_frequency = 0.1 # Hz (6 breaths per minute)
|
| 92 |
+
self.antenna_sway_amplitude = np.deg2rad(15) # 15 degrees
|
| 93 |
+
self.antenna_frequency = 0.5 # Hz (faster antenna sway)
|
| 94 |
+
|
| 95 |
+
@property
|
| 96 |
+
def duration(self) -> float:
|
| 97 |
+
"""Duration property required by official Move interface."""
|
| 98 |
+
return float("inf") # Continuous breathing (never ends naturally)
|
| 99 |
+
|
| 100 |
+
def evaluate(self, t: float) -> tuple[NDArray[np.float64] | None, NDArray[np.float64] | None, float | None]:
|
| 101 |
+
"""Evaluate breathing move at time t."""
|
| 102 |
+
if t < self.interpolation_duration:
|
| 103 |
+
# Phase 1: Interpolate to neutral base position
|
| 104 |
+
interpolation_t = t / self.interpolation_duration
|
| 105 |
+
|
| 106 |
+
# Interpolate head pose
|
| 107 |
+
head_pose = linear_pose_interpolation(
|
| 108 |
+
self.interpolation_start_pose, self.neutral_head_pose, interpolation_t,
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
# Interpolate antennas
|
| 112 |
+
antennas_interp = (
|
| 113 |
+
1 - interpolation_t
|
| 114 |
+
) * self.interpolation_start_antennas + interpolation_t * self.neutral_antennas
|
| 115 |
+
antennas = antennas_interp.astype(np.float64)
|
| 116 |
+
|
| 117 |
+
else:
|
| 118 |
+
# Phase 2: Breathing patterns from neutral base
|
| 119 |
+
breathing_time = t - self.interpolation_duration
|
| 120 |
+
|
| 121 |
+
# Gentle z-axis breathing
|
| 122 |
+
z_offset = self.breathing_z_amplitude * np.sin(2 * np.pi * self.breathing_frequency * breathing_time)
|
| 123 |
+
head_pose = create_head_pose(x=0, y=0, z=z_offset, roll=0, pitch=0, yaw=0, degrees=True, mm=False)
|
| 124 |
+
|
| 125 |
+
# Antenna sway (opposite directions)
|
| 126 |
+
antenna_sway = self.antenna_sway_amplitude * np.sin(2 * np.pi * self.antenna_frequency * breathing_time)
|
| 127 |
+
antennas = np.array([antenna_sway, -antenna_sway], dtype=np.float64)
|
| 128 |
+
|
| 129 |
+
# Return in official Move interface format: (head_pose, antennas_array, body_yaw)
|
| 130 |
+
return (head_pose, antennas, 0.0)
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
def combine_full_body(primary_pose: FullBodyPose, secondary_pose: FullBodyPose) -> FullBodyPose:
|
| 134 |
+
"""Combine primary and secondary full body poses.
|
| 135 |
+
|
| 136 |
+
Args:
|
| 137 |
+
primary_pose: (head_pose, antennas, body_yaw) - primary move
|
| 138 |
+
secondary_pose: (head_pose, antennas, body_yaw) - secondary offsets
|
| 139 |
+
|
| 140 |
+
Returns:
|
| 141 |
+
Combined full body pose (head_pose, antennas, body_yaw)
|
| 142 |
+
|
| 143 |
+
"""
|
| 144 |
+
primary_head, primary_antennas, primary_body_yaw = primary_pose
|
| 145 |
+
secondary_head, secondary_antennas, secondary_body_yaw = secondary_pose
|
| 146 |
+
|
| 147 |
+
# Combine head poses using compose_world_offset; the secondary pose must be an
|
| 148 |
+
# offset expressed in the world frame (T_off_world) applied to the absolute
|
| 149 |
+
# primary transform (T_abs).
|
| 150 |
+
combined_head = compose_world_offset(primary_head, secondary_head, reorthonormalize=True)
|
| 151 |
+
|
| 152 |
+
# Sum antennas and body_yaw
|
| 153 |
+
combined_antennas = (
|
| 154 |
+
primary_antennas[0] + secondary_antennas[0],
|
| 155 |
+
primary_antennas[1] + secondary_antennas[1],
|
| 156 |
+
)
|
| 157 |
+
combined_body_yaw = primary_body_yaw + secondary_body_yaw
|
| 158 |
+
|
| 159 |
+
return (combined_head, combined_antennas, combined_body_yaw)
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
def clone_full_body_pose(pose: FullBodyPose) -> FullBodyPose:
|
| 163 |
+
"""Create a deep copy of a full body pose tuple."""
|
| 164 |
+
head, antennas, body_yaw = pose
|
| 165 |
+
return (head.copy(), (float(antennas[0]), float(antennas[1])), float(body_yaw))
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
@dataclass
|
| 169 |
+
class MovementState:
|
| 170 |
+
"""State tracking for the movement system."""
|
| 171 |
+
|
| 172 |
+
# Primary move state
|
| 173 |
+
current_move: Move | None = None
|
| 174 |
+
move_start_time: float | None = None
|
| 175 |
+
last_activity_time: float = 0.0
|
| 176 |
+
|
| 177 |
+
# Secondary move state (offsets)
|
| 178 |
+
speech_offsets: Tuple[float, float, float, float, float, float] = (
|
| 179 |
+
0.0,
|
| 180 |
+
0.0,
|
| 181 |
+
0.0,
|
| 182 |
+
0.0,
|
| 183 |
+
0.0,
|
| 184 |
+
0.0,
|
| 185 |
+
)
|
| 186 |
+
face_tracking_offsets: Tuple[float, float, float, float, float, float] = (
|
| 187 |
+
0.0,
|
| 188 |
+
0.0,
|
| 189 |
+
0.0,
|
| 190 |
+
0.0,
|
| 191 |
+
0.0,
|
| 192 |
+
0.0,
|
| 193 |
+
)
|
| 194 |
+
|
| 195 |
+
# Status flags
|
| 196 |
+
last_primary_pose: FullBodyPose | None = None
|
| 197 |
+
|
| 198 |
+
def update_activity(self) -> None:
|
| 199 |
+
"""Update the last activity time."""
|
| 200 |
+
self.last_activity_time = time.monotonic()
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
@dataclass
|
| 204 |
+
class LoopFrequencyStats:
|
| 205 |
+
"""Track rolling loop frequency statistics."""
|
| 206 |
+
|
| 207 |
+
mean: float = 0.0
|
| 208 |
+
m2: float = 0.0
|
| 209 |
+
min_freq: float = float("inf")
|
| 210 |
+
count: int = 0
|
| 211 |
+
last_freq: float = 0.0
|
| 212 |
+
potential_freq: float = 0.0
|
| 213 |
+
|
| 214 |
+
def reset(self) -> None:
|
| 215 |
+
"""Reset accumulators while keeping the last potential frequency."""
|
| 216 |
+
self.mean = 0.0
|
| 217 |
+
self.m2 = 0.0
|
| 218 |
+
self.min_freq = float("inf")
|
| 219 |
+
self.count = 0
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
class MovementManager:
|
| 223 |
+
"""Coordinate sequential moves, additive offsets, and robot output at 100 Hz.
|
| 224 |
+
|
| 225 |
+
Responsibilities:
|
| 226 |
+
- Own a real-time loop that samples the current primary move (if any), fuses
|
| 227 |
+
secondary offsets, and calls `set_target` exactly once per tick.
|
| 228 |
+
- Start an idle `BreathingMove` after `idle_inactivity_delay` when not
|
| 229 |
+
listening and no moves are queued.
|
| 230 |
+
- Expose thread-safe APIs so other threads can enqueue moves, mark activity,
|
| 231 |
+
or feed secondary offsets without touching internal state.
|
| 232 |
+
|
| 233 |
+
Timing:
|
| 234 |
+
- All elapsed-time calculations rely on `time.monotonic()` through `self._now`
|
| 235 |
+
to avoid wall-clock jumps.
|
| 236 |
+
- The loop attempts 100 Hz
|
| 237 |
+
|
| 238 |
+
Concurrency:
|
| 239 |
+
- External threads communicate via `_command_queue` messages.
|
| 240 |
+
- Secondary offsets are staged via dirty flags guarded by locks and consumed
|
| 241 |
+
atomically inside the worker loop.
|
| 242 |
+
"""
|
| 243 |
+
|
| 244 |
+
def __init__(
|
| 245 |
+
self,
|
| 246 |
+
current_robot: ReachyMini,
|
| 247 |
+
camera_worker: "Any" = None,
|
| 248 |
+
):
|
| 249 |
+
"""Initialize movement manager."""
|
| 250 |
+
self.current_robot = current_robot
|
| 251 |
+
self.camera_worker = camera_worker
|
| 252 |
+
|
| 253 |
+
# Single timing source for durations
|
| 254 |
+
self._now = time.monotonic
|
| 255 |
+
|
| 256 |
+
# Movement state
|
| 257 |
+
self.state = MovementState()
|
| 258 |
+
self.state.last_activity_time = self._now()
|
| 259 |
+
neutral_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
|
| 260 |
+
self.state.last_primary_pose = (neutral_pose, (0.0, 0.0), 0.0)
|
| 261 |
+
|
| 262 |
+
# Move queue (primary moves)
|
| 263 |
+
self.move_queue: deque[Move] = deque()
|
| 264 |
+
|
| 265 |
+
# Configuration
|
| 266 |
+
self.idle_inactivity_delay = 0.3 # seconds
|
| 267 |
+
self.target_frequency = CONTROL_LOOP_FREQUENCY_HZ
|
| 268 |
+
self.target_period = 1.0 / self.target_frequency
|
| 269 |
+
|
| 270 |
+
self._stop_event = threading.Event()
|
| 271 |
+
self._thread: threading.Thread | None = None
|
| 272 |
+
self._is_listening = False
|
| 273 |
+
self._last_commanded_pose: FullBodyPose = clone_full_body_pose(self.state.last_primary_pose)
|
| 274 |
+
self._listening_antennas: Tuple[float, float] = self._last_commanded_pose[1]
|
| 275 |
+
self._antenna_unfreeze_blend = 1.0
|
| 276 |
+
self._antenna_blend_duration = 0.4 # seconds to blend back after listening
|
| 277 |
+
self._last_listening_blend_time = self._now()
|
| 278 |
+
self._breathing_active = False # true when breathing move is running or queued
|
| 279 |
+
self._listening_debounce_s = 0.15
|
| 280 |
+
self._last_listening_toggle_time = self._now()
|
| 281 |
+
self._last_set_target_err = 0.0
|
| 282 |
+
self._set_target_err_interval = 1.0 # seconds between error logs
|
| 283 |
+
self._set_target_err_suppressed = 0
|
| 284 |
+
|
| 285 |
+
# Cross-thread signalling
|
| 286 |
+
self._command_queue: "Queue[Tuple[str, Any]]" = Queue()
|
| 287 |
+
self._speech_offsets_lock = threading.Lock()
|
| 288 |
+
self._pending_speech_offsets: Tuple[float, float, float, float, float, float] = (
|
| 289 |
+
0.0,
|
| 290 |
+
0.0,
|
| 291 |
+
0.0,
|
| 292 |
+
0.0,
|
| 293 |
+
0.0,
|
| 294 |
+
0.0,
|
| 295 |
+
)
|
| 296 |
+
self._speech_offsets_dirty = False
|
| 297 |
+
|
| 298 |
+
self._face_offsets_lock = threading.Lock()
|
| 299 |
+
self._pending_face_offsets: Tuple[float, float, float, float, float, float] = (
|
| 300 |
+
0.0,
|
| 301 |
+
0.0,
|
| 302 |
+
0.0,
|
| 303 |
+
0.0,
|
| 304 |
+
0.0,
|
| 305 |
+
0.0,
|
| 306 |
+
)
|
| 307 |
+
self._face_offsets_dirty = False
|
| 308 |
+
|
| 309 |
+
self._shared_state_lock = threading.Lock()
|
| 310 |
+
self._shared_last_activity_time = self.state.last_activity_time
|
| 311 |
+
self._shared_is_listening = self._is_listening
|
| 312 |
+
self._status_lock = threading.Lock()
|
| 313 |
+
self._freq_stats = LoopFrequencyStats()
|
| 314 |
+
self._freq_snapshot = LoopFrequencyStats()
|
| 315 |
+
|
| 316 |
+
def queue_move(self, move: Move) -> None:
|
| 317 |
+
"""Queue a primary move to run after the currently executing one.
|
| 318 |
+
|
| 319 |
+
Thread-safe: the move is enqueued via the worker command queue so the
|
| 320 |
+
control loop remains the sole mutator of movement state.
|
| 321 |
+
"""
|
| 322 |
+
self._command_queue.put(("queue_move", move))
|
| 323 |
+
|
| 324 |
+
def clear_move_queue(self) -> None:
|
| 325 |
+
"""Stop the active move and discard any queued primary moves.
|
| 326 |
+
|
| 327 |
+
Thread-safe: executed by the worker thread via the command queue.
|
| 328 |
+
"""
|
| 329 |
+
self._command_queue.put(("clear_queue", None))
|
| 330 |
+
|
| 331 |
+
def set_speech_offsets(self, offsets: Tuple[float, float, float, float, float, float]) -> None:
|
| 332 |
+
"""Update speech-induced secondary offsets (x, y, z, roll, pitch, yaw).
|
| 333 |
+
|
| 334 |
+
Offsets are interpreted as metres for translation and radians for
|
| 335 |
+
rotation in the world frame. Thread-safe via a pending snapshot.
|
| 336 |
+
"""
|
| 337 |
+
with self._speech_offsets_lock:
|
| 338 |
+
self._pending_speech_offsets = offsets
|
| 339 |
+
self._speech_offsets_dirty = True
|
| 340 |
+
|
| 341 |
+
def set_moving_state(self, duration: float) -> None:
|
| 342 |
+
"""Mark the robot as actively moving for the provided duration.
|
| 343 |
+
|
| 344 |
+
Legacy hook used by goto helpers to keep inactivity and breathing logic
|
| 345 |
+
aware of manual motions. Thread-safe via the command queue.
|
| 346 |
+
"""
|
| 347 |
+
self._command_queue.put(("set_moving_state", duration))
|
| 348 |
+
|
| 349 |
+
def is_idle(self) -> bool:
|
| 350 |
+
"""Return True when the robot has been inactive longer than the idle delay."""
|
| 351 |
+
with self._shared_state_lock:
|
| 352 |
+
last_activity = self._shared_last_activity_time
|
| 353 |
+
listening = self._shared_is_listening
|
| 354 |
+
|
| 355 |
+
if listening:
|
| 356 |
+
return False
|
| 357 |
+
|
| 358 |
+
return self._now() - last_activity >= self.idle_inactivity_delay
|
| 359 |
+
|
| 360 |
+
def set_listening(self, listening: bool) -> None:
|
| 361 |
+
"""Enable or disable listening mode without touching shared state directly.
|
| 362 |
+
|
| 363 |
+
While listening:
|
| 364 |
+
- Antenna positions are frozen at the last commanded values.
|
| 365 |
+
- Blending is reset so that upon unfreezing the antennas return smoothly.
|
| 366 |
+
- Idle breathing is suppressed.
|
| 367 |
+
|
| 368 |
+
Thread-safe: the change is posted to the worker command queue.
|
| 369 |
+
"""
|
| 370 |
+
with self._shared_state_lock:
|
| 371 |
+
if self._shared_is_listening == listening:
|
| 372 |
+
return
|
| 373 |
+
self._command_queue.put(("set_listening", listening))
|
| 374 |
+
|
| 375 |
+
def _poll_signals(self, current_time: float) -> None:
|
| 376 |
+
"""Apply queued commands and pending offset updates."""
|
| 377 |
+
self._apply_pending_offsets()
|
| 378 |
+
|
| 379 |
+
while True:
|
| 380 |
+
try:
|
| 381 |
+
command, payload = self._command_queue.get_nowait()
|
| 382 |
+
except Empty:
|
| 383 |
+
break
|
| 384 |
+
self._handle_command(command, payload, current_time)
|
| 385 |
+
|
| 386 |
+
def _apply_pending_offsets(self) -> None:
|
| 387 |
+
"""Apply the most recent speech/face offset updates."""
|
| 388 |
+
speech_offsets: Tuple[float, float, float, float, float, float] | None = None
|
| 389 |
+
with self._speech_offsets_lock:
|
| 390 |
+
if self._speech_offsets_dirty:
|
| 391 |
+
speech_offsets = self._pending_speech_offsets
|
| 392 |
+
self._speech_offsets_dirty = False
|
| 393 |
+
|
| 394 |
+
if speech_offsets is not None:
|
| 395 |
+
self.state.speech_offsets = speech_offsets
|
| 396 |
+
self.state.update_activity()
|
| 397 |
+
|
| 398 |
+
face_offsets: Tuple[float, float, float, float, float, float] | None = None
|
| 399 |
+
with self._face_offsets_lock:
|
| 400 |
+
if self._face_offsets_dirty:
|
| 401 |
+
face_offsets = self._pending_face_offsets
|
| 402 |
+
self._face_offsets_dirty = False
|
| 403 |
+
|
| 404 |
+
if face_offsets is not None:
|
| 405 |
+
self.state.face_tracking_offsets = face_offsets
|
| 406 |
+
self.state.update_activity()
|
| 407 |
+
|
| 408 |
+
def _handle_command(self, command: str, payload: Any, current_time: float) -> None:
|
| 409 |
+
"""Handle a single cross-thread command."""
|
| 410 |
+
if command == "queue_move":
|
| 411 |
+
if isinstance(payload, Move):
|
| 412 |
+
self.move_queue.append(payload)
|
| 413 |
+
self.state.update_activity()
|
| 414 |
+
duration = getattr(payload, "duration", None)
|
| 415 |
+
if duration is not None:
|
| 416 |
+
try:
|
| 417 |
+
duration_str = f"{float(duration):.2f}"
|
| 418 |
+
except (TypeError, ValueError):
|
| 419 |
+
duration_str = str(duration)
|
| 420 |
+
else:
|
| 421 |
+
duration_str = "?"
|
| 422 |
+
logger.debug(
|
| 423 |
+
"Queued move with duration %ss, queue size: %s",
|
| 424 |
+
duration_str,
|
| 425 |
+
len(self.move_queue),
|
| 426 |
+
)
|
| 427 |
+
else:
|
| 428 |
+
logger.warning("Ignored queue_move command with invalid payload: %s", payload)
|
| 429 |
+
elif command == "clear_queue":
|
| 430 |
+
self.move_queue.clear()
|
| 431 |
+
self.state.current_move = None
|
| 432 |
+
self.state.move_start_time = None
|
| 433 |
+
self._breathing_active = False
|
| 434 |
+
logger.info("Cleared move queue and stopped current move")
|
| 435 |
+
elif command == "set_moving_state":
|
| 436 |
+
try:
|
| 437 |
+
duration = float(payload)
|
| 438 |
+
except (TypeError, ValueError):
|
| 439 |
+
logger.warning("Invalid moving state duration: %s", payload)
|
| 440 |
+
return
|
| 441 |
+
self.state.update_activity()
|
| 442 |
+
elif command == "mark_activity":
|
| 443 |
+
self.state.update_activity()
|
| 444 |
+
elif command == "set_listening":
|
| 445 |
+
desired_state = bool(payload)
|
| 446 |
+
now = self._now()
|
| 447 |
+
if now - self._last_listening_toggle_time < self._listening_debounce_s:
|
| 448 |
+
return
|
| 449 |
+
self._last_listening_toggle_time = now
|
| 450 |
+
|
| 451 |
+
if self._is_listening == desired_state:
|
| 452 |
+
return
|
| 453 |
+
|
| 454 |
+
self._is_listening = desired_state
|
| 455 |
+
self._last_listening_blend_time = now
|
| 456 |
+
if desired_state:
|
| 457 |
+
# Freeze: snapshot current commanded antennas and reset blend
|
| 458 |
+
self._listening_antennas = (
|
| 459 |
+
float(self._last_commanded_pose[1][0]),
|
| 460 |
+
float(self._last_commanded_pose[1][1]),
|
| 461 |
+
)
|
| 462 |
+
self._antenna_unfreeze_blend = 0.0
|
| 463 |
+
else:
|
| 464 |
+
# Unfreeze: restart blending from frozen pose
|
| 465 |
+
self._antenna_unfreeze_blend = 0.0
|
| 466 |
+
self.state.update_activity()
|
| 467 |
+
else:
|
| 468 |
+
logger.warning("Unknown command received by MovementManager: %s", command)
|
| 469 |
+
|
| 470 |
+
def _publish_shared_state(self) -> None:
|
| 471 |
+
"""Expose idle-related state for external threads."""
|
| 472 |
+
with self._shared_state_lock:
|
| 473 |
+
self._shared_last_activity_time = self.state.last_activity_time
|
| 474 |
+
self._shared_is_listening = self._is_listening
|
| 475 |
+
|
| 476 |
+
def _manage_move_queue(self, current_time: float) -> None:
|
| 477 |
+
"""Manage the primary move queue (sequential execution)."""
|
| 478 |
+
if self.state.current_move is None or (
|
| 479 |
+
self.state.move_start_time is not None
|
| 480 |
+
and current_time - self.state.move_start_time >= self.state.current_move.duration
|
| 481 |
+
):
|
| 482 |
+
self.state.current_move = None
|
| 483 |
+
self.state.move_start_time = None
|
| 484 |
+
|
| 485 |
+
if self.move_queue:
|
| 486 |
+
self.state.current_move = self.move_queue.popleft()
|
| 487 |
+
self.state.move_start_time = current_time
|
| 488 |
+
# Any real move cancels breathing mode flag
|
| 489 |
+
self._breathing_active = isinstance(self.state.current_move, BreathingMove)
|
| 490 |
+
logger.debug(f"Starting new move, duration: {self.state.current_move.duration}s")
|
| 491 |
+
|
| 492 |
+
def _manage_breathing(self, current_time: float) -> None:
|
| 493 |
+
"""Manage automatic breathing when idle."""
|
| 494 |
+
if (
|
| 495 |
+
self.state.current_move is None
|
| 496 |
+
and not self.move_queue
|
| 497 |
+
and not self._is_listening
|
| 498 |
+
and not self._breathing_active
|
| 499 |
+
):
|
| 500 |
+
idle_for = current_time - self.state.last_activity_time
|
| 501 |
+
if idle_for >= self.idle_inactivity_delay:
|
| 502 |
+
try:
|
| 503 |
+
# These 2 functions return the latest available sensor data from the robot, but don't perform I/O synchronously.
|
| 504 |
+
# Therefore, we accept calling them inside the control loop.
|
| 505 |
+
_, current_antennas = self.current_robot.get_current_joint_positions()
|
| 506 |
+
current_head_pose = self.current_robot.get_current_head_pose()
|
| 507 |
+
|
| 508 |
+
self._breathing_active = True
|
| 509 |
+
self.state.update_activity()
|
| 510 |
+
|
| 511 |
+
breathing_move = BreathingMove(
|
| 512 |
+
interpolation_start_pose=current_head_pose,
|
| 513 |
+
interpolation_start_antennas=current_antennas,
|
| 514 |
+
interpolation_duration=1.0,
|
| 515 |
+
)
|
| 516 |
+
self.move_queue.append(breathing_move)
|
| 517 |
+
logger.debug("Started breathing after %.1fs of inactivity", idle_for)
|
| 518 |
+
except Exception as e:
|
| 519 |
+
self._breathing_active = False
|
| 520 |
+
logger.error("Failed to start breathing: %s", e)
|
| 521 |
+
|
| 522 |
+
if isinstance(self.state.current_move, BreathingMove) and self.move_queue:
|
| 523 |
+
self.state.current_move = None
|
| 524 |
+
self.state.move_start_time = None
|
| 525 |
+
self._breathing_active = False
|
| 526 |
+
logger.debug("Stopping breathing due to new move activity")
|
| 527 |
+
|
| 528 |
+
if self.state.current_move is not None and not isinstance(self.state.current_move, BreathingMove):
|
| 529 |
+
self._breathing_active = False
|
| 530 |
+
|
| 531 |
+
def _get_primary_pose(self, current_time: float) -> FullBodyPose:
|
| 532 |
+
"""Get the primary full body pose from current move or neutral."""
|
| 533 |
+
# When a primary move is playing, sample it and cache the resulting pose
|
| 534 |
+
if self.state.current_move is not None and self.state.move_start_time is not None:
|
| 535 |
+
move_time = current_time - self.state.move_start_time
|
| 536 |
+
head, antennas, body_yaw = self.state.current_move.evaluate(move_time)
|
| 537 |
+
|
| 538 |
+
if head is None:
|
| 539 |
+
head = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
|
| 540 |
+
if antennas is None:
|
| 541 |
+
antennas = np.array([0.0, 0.0])
|
| 542 |
+
if body_yaw is None:
|
| 543 |
+
body_yaw = 0.0
|
| 544 |
+
|
| 545 |
+
antennas_tuple = (float(antennas[0]), float(antennas[1]))
|
| 546 |
+
head_copy = head.copy()
|
| 547 |
+
primary_full_body_pose = (
|
| 548 |
+
head_copy,
|
| 549 |
+
antennas_tuple,
|
| 550 |
+
float(body_yaw),
|
| 551 |
+
)
|
| 552 |
+
|
| 553 |
+
self.state.last_primary_pose = clone_full_body_pose(primary_full_body_pose)
|
| 554 |
+
# Otherwise reuse the last primary pose so we avoid jumps between moves
|
| 555 |
+
elif self.state.last_primary_pose is not None:
|
| 556 |
+
primary_full_body_pose = clone_full_body_pose(self.state.last_primary_pose)
|
| 557 |
+
else:
|
| 558 |
+
neutral_head_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
|
| 559 |
+
primary_full_body_pose = (neutral_head_pose, (0.0, 0.0), 0.0)
|
| 560 |
+
self.state.last_primary_pose = clone_full_body_pose(primary_full_body_pose)
|
| 561 |
+
|
| 562 |
+
return primary_full_body_pose
|
| 563 |
+
|
| 564 |
+
def _get_secondary_pose(self) -> FullBodyPose:
|
| 565 |
+
"""Get the secondary full body pose from speech and face tracking offsets."""
|
| 566 |
+
# Combine speech sway offsets + face tracking offsets for secondary pose
|
| 567 |
+
secondary_offsets = [
|
| 568 |
+
self.state.speech_offsets[0] + self.state.face_tracking_offsets[0],
|
| 569 |
+
self.state.speech_offsets[1] + self.state.face_tracking_offsets[1],
|
| 570 |
+
self.state.speech_offsets[2] + self.state.face_tracking_offsets[2],
|
| 571 |
+
self.state.speech_offsets[3] + self.state.face_tracking_offsets[3],
|
| 572 |
+
self.state.speech_offsets[4] + self.state.face_tracking_offsets[4],
|
| 573 |
+
self.state.speech_offsets[5] + self.state.face_tracking_offsets[5],
|
| 574 |
+
]
|
| 575 |
+
|
| 576 |
+
secondary_head_pose = create_head_pose(
|
| 577 |
+
x=secondary_offsets[0],
|
| 578 |
+
y=secondary_offsets[1],
|
| 579 |
+
z=secondary_offsets[2],
|
| 580 |
+
roll=secondary_offsets[3],
|
| 581 |
+
pitch=secondary_offsets[4],
|
| 582 |
+
yaw=secondary_offsets[5],
|
| 583 |
+
degrees=False,
|
| 584 |
+
mm=False,
|
| 585 |
+
)
|
| 586 |
+
return (secondary_head_pose, (0.0, 0.0), 0.0)
|
| 587 |
+
|
| 588 |
+
def _compose_full_body_pose(self, current_time: float) -> FullBodyPose:
|
| 589 |
+
"""Compose primary and secondary poses into a single command pose."""
|
| 590 |
+
primary = self._get_primary_pose(current_time)
|
| 591 |
+
secondary = self._get_secondary_pose()
|
| 592 |
+
return combine_full_body(primary, secondary)
|
| 593 |
+
|
| 594 |
+
def _update_primary_motion(self, current_time: float) -> None:
|
| 595 |
+
"""Advance queue state and idle behaviours for this tick."""
|
| 596 |
+
self._manage_move_queue(current_time)
|
| 597 |
+
self._manage_breathing(current_time)
|
| 598 |
+
|
| 599 |
+
def _calculate_blended_antennas(self, target_antennas: Tuple[float, float]) -> Tuple[float, float]:
|
| 600 |
+
"""Blend target antennas with listening freeze state and update blending."""
|
| 601 |
+
now = self._now()
|
| 602 |
+
listening = self._is_listening
|
| 603 |
+
listening_antennas = self._listening_antennas
|
| 604 |
+
blend = self._antenna_unfreeze_blend
|
| 605 |
+
blend_duration = self._antenna_blend_duration
|
| 606 |
+
last_update = self._last_listening_blend_time
|
| 607 |
+
self._last_listening_blend_time = now
|
| 608 |
+
|
| 609 |
+
if listening:
|
| 610 |
+
antennas_cmd = listening_antennas
|
| 611 |
+
new_blend = 0.0
|
| 612 |
+
else:
|
| 613 |
+
dt = max(0.0, now - last_update)
|
| 614 |
+
if blend_duration <= 0:
|
| 615 |
+
new_blend = 1.0
|
| 616 |
+
else:
|
| 617 |
+
new_blend = min(1.0, blend + dt / blend_duration)
|
| 618 |
+
antennas_cmd = (
|
| 619 |
+
listening_antennas[0] * (1.0 - new_blend) + target_antennas[0] * new_blend,
|
| 620 |
+
listening_antennas[1] * (1.0 - new_blend) + target_antennas[1] * new_blend,
|
| 621 |
+
)
|
| 622 |
+
|
| 623 |
+
if listening:
|
| 624 |
+
self._antenna_unfreeze_blend = 0.0
|
| 625 |
+
else:
|
| 626 |
+
self._antenna_unfreeze_blend = new_blend
|
| 627 |
+
if new_blend >= 1.0:
|
| 628 |
+
self._listening_antennas = (
|
| 629 |
+
float(target_antennas[0]),
|
| 630 |
+
float(target_antennas[1]),
|
| 631 |
+
)
|
| 632 |
+
|
| 633 |
+
return antennas_cmd
|
| 634 |
+
|
| 635 |
+
def _issue_control_command(self, head: NDArray[np.float32], antennas: Tuple[float, float], body_yaw: float) -> None:
|
| 636 |
+
"""Send the fused pose to the robot with throttled error logging."""
|
| 637 |
+
try:
|
| 638 |
+
self.current_robot.set_target(head=head, antennas=antennas, body_yaw=body_yaw)
|
| 639 |
+
except Exception as e:
|
| 640 |
+
now = self._now()
|
| 641 |
+
if now - self._last_set_target_err >= self._set_target_err_interval:
|
| 642 |
+
msg = f"Failed to set robot target: {e}"
|
| 643 |
+
if self._set_target_err_suppressed:
|
| 644 |
+
msg += f" (suppressed {self._set_target_err_suppressed} repeats)"
|
| 645 |
+
self._set_target_err_suppressed = 0
|
| 646 |
+
logger.error(msg)
|
| 647 |
+
self._last_set_target_err = now
|
| 648 |
+
else:
|
| 649 |
+
self._set_target_err_suppressed += 1
|
| 650 |
+
else:
|
| 651 |
+
with self._status_lock:
|
| 652 |
+
self._last_commanded_pose = clone_full_body_pose((head, antennas, body_yaw))
|
| 653 |
+
|
| 654 |
+
def _update_frequency_stats(
|
| 655 |
+
self, loop_start: float, prev_loop_start: float, stats: LoopFrequencyStats,
|
| 656 |
+
) -> LoopFrequencyStats:
|
| 657 |
+
"""Update frequency statistics based on the current loop start time."""
|
| 658 |
+
period = loop_start - prev_loop_start
|
| 659 |
+
if period > 0:
|
| 660 |
+
stats.last_freq = 1.0 / period
|
| 661 |
+
stats.count += 1
|
| 662 |
+
delta = stats.last_freq - stats.mean
|
| 663 |
+
stats.mean += delta / stats.count
|
| 664 |
+
stats.m2 += delta * (stats.last_freq - stats.mean)
|
| 665 |
+
stats.min_freq = min(stats.min_freq, stats.last_freq)
|
| 666 |
+
return stats
|
| 667 |
+
|
| 668 |
+
def _schedule_next_tick(self, loop_start: float, stats: LoopFrequencyStats) -> Tuple[float, LoopFrequencyStats]:
|
| 669 |
+
"""Compute sleep time to maintain target frequency and update potential freq."""
|
| 670 |
+
computation_time = self._now() - loop_start
|
| 671 |
+
stats.potential_freq = 1.0 / computation_time if computation_time > 0 else float("inf")
|
| 672 |
+
sleep_time = max(0.0, self.target_period - computation_time)
|
| 673 |
+
return sleep_time, stats
|
| 674 |
+
|
| 675 |
+
def _record_frequency_snapshot(self, stats: LoopFrequencyStats) -> None:
|
| 676 |
+
"""Store a thread-safe snapshot of current frequency statistics."""
|
| 677 |
+
with self._status_lock:
|
| 678 |
+
self._freq_snapshot = LoopFrequencyStats(
|
| 679 |
+
mean=stats.mean,
|
| 680 |
+
m2=stats.m2,
|
| 681 |
+
min_freq=stats.min_freq,
|
| 682 |
+
count=stats.count,
|
| 683 |
+
last_freq=stats.last_freq,
|
| 684 |
+
potential_freq=stats.potential_freq,
|
| 685 |
+
)
|
| 686 |
+
|
| 687 |
+
def _maybe_log_frequency(self, loop_count: int, print_interval_loops: int, stats: LoopFrequencyStats) -> None:
|
| 688 |
+
"""Emit frequency telemetry when enough loops have elapsed."""
|
| 689 |
+
if loop_count % print_interval_loops != 0 or stats.count == 0:
|
| 690 |
+
return
|
| 691 |
+
|
| 692 |
+
variance = stats.m2 / stats.count if stats.count > 0 else 0.0
|
| 693 |
+
lowest = stats.min_freq if stats.min_freq != float("inf") else 0.0
|
| 694 |
+
logger.debug(
|
| 695 |
+
"Loop freq - avg: %.2fHz, variance: %.4f, min: %.2fHz, last: %.2fHz, potential: %.2fHz, target: %.1fHz",
|
| 696 |
+
stats.mean,
|
| 697 |
+
variance,
|
| 698 |
+
lowest,
|
| 699 |
+
stats.last_freq,
|
| 700 |
+
stats.potential_freq,
|
| 701 |
+
self.target_frequency,
|
| 702 |
+
)
|
| 703 |
+
stats.reset()
|
| 704 |
+
|
| 705 |
+
def _update_face_tracking(self, current_time: float) -> None:
|
| 706 |
+
"""Get face tracking offsets from camera worker thread."""
|
| 707 |
+
if self.camera_worker is not None:
|
| 708 |
+
# Get face tracking offsets from camera worker thread
|
| 709 |
+
offsets = self.camera_worker.get_face_tracking_offsets()
|
| 710 |
+
self.state.face_tracking_offsets = offsets
|
| 711 |
+
else:
|
| 712 |
+
# No camera worker, use neutral offsets
|
| 713 |
+
self.state.face_tracking_offsets = (0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
|
| 714 |
+
|
| 715 |
+
def start(self) -> None:
|
| 716 |
+
"""Start the worker thread that drives the 100 Hz control loop."""
|
| 717 |
+
if self._thread is not None and self._thread.is_alive():
|
| 718 |
+
logger.warning("Move worker already running; start() ignored")
|
| 719 |
+
return
|
| 720 |
+
self._stop_event.clear()
|
| 721 |
+
self._thread = threading.Thread(target=self.working_loop, daemon=True)
|
| 722 |
+
self._thread.start()
|
| 723 |
+
logger.debug("Move worker started")
|
| 724 |
+
|
| 725 |
+
def stop(self) -> None:
|
| 726 |
+
"""Request the worker thread to stop and wait for it to exit.
|
| 727 |
+
|
| 728 |
+
Before stopping, resets the robot to a neutral position.
|
| 729 |
+
"""
|
| 730 |
+
if self._thread is None or not self._thread.is_alive():
|
| 731 |
+
logger.debug("Move worker not running; stop() ignored")
|
| 732 |
+
return
|
| 733 |
+
|
| 734 |
+
logger.info("Stopping movement manager and resetting to neutral position...")
|
| 735 |
+
|
| 736 |
+
# Clear any queued moves and stop current move
|
| 737 |
+
self.clear_move_queue()
|
| 738 |
+
|
| 739 |
+
# Stop the worker thread first so it doesn't interfere
|
| 740 |
+
self._stop_event.set()
|
| 741 |
+
if self._thread is not None:
|
| 742 |
+
self._thread.join()
|
| 743 |
+
self._thread = None
|
| 744 |
+
logger.debug("Move worker stopped")
|
| 745 |
+
|
| 746 |
+
# Reset to neutral position using goto_target (same approach as wake_up)
|
| 747 |
+
try:
|
| 748 |
+
neutral_head_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
|
| 749 |
+
neutral_antennas = [0.0, 0.0]
|
| 750 |
+
neutral_body_yaw = 0.0
|
| 751 |
+
|
| 752 |
+
# Use goto_target directly on the robot
|
| 753 |
+
self.current_robot.goto_target(
|
| 754 |
+
head=neutral_head_pose,
|
| 755 |
+
antennas=neutral_antennas,
|
| 756 |
+
duration=2.0,
|
| 757 |
+
body_yaw=neutral_body_yaw,
|
| 758 |
+
)
|
| 759 |
+
|
| 760 |
+
logger.info("Reset to neutral position completed")
|
| 761 |
+
|
| 762 |
+
except Exception as e:
|
| 763 |
+
logger.error(f"Failed to reset to neutral position: {e}")
|
| 764 |
+
|
| 765 |
+
def get_status(self) -> Dict[str, Any]:
|
| 766 |
+
"""Return a lightweight status snapshot for observability."""
|
| 767 |
+
with self._status_lock:
|
| 768 |
+
pose_snapshot = clone_full_body_pose(self._last_commanded_pose)
|
| 769 |
+
freq_snapshot = LoopFrequencyStats(
|
| 770 |
+
mean=self._freq_snapshot.mean,
|
| 771 |
+
m2=self._freq_snapshot.m2,
|
| 772 |
+
min_freq=self._freq_snapshot.min_freq,
|
| 773 |
+
count=self._freq_snapshot.count,
|
| 774 |
+
last_freq=self._freq_snapshot.last_freq,
|
| 775 |
+
potential_freq=self._freq_snapshot.potential_freq,
|
| 776 |
+
)
|
| 777 |
+
|
| 778 |
+
head_matrix = pose_snapshot[0].tolist() if pose_snapshot else None
|
| 779 |
+
antennas = pose_snapshot[1] if pose_snapshot else None
|
| 780 |
+
body_yaw = pose_snapshot[2] if pose_snapshot else None
|
| 781 |
+
|
| 782 |
+
return {
|
| 783 |
+
"queue_size": len(self.move_queue),
|
| 784 |
+
"is_listening": self._is_listening,
|
| 785 |
+
"breathing_active": self._breathing_active,
|
| 786 |
+
"last_commanded_pose": {
|
| 787 |
+
"head": head_matrix,
|
| 788 |
+
"antennas": antennas,
|
| 789 |
+
"body_yaw": body_yaw,
|
| 790 |
+
},
|
| 791 |
+
"loop_frequency": {
|
| 792 |
+
"last": freq_snapshot.last_freq,
|
| 793 |
+
"mean": freq_snapshot.mean,
|
| 794 |
+
"min": freq_snapshot.min_freq,
|
| 795 |
+
"potential": freq_snapshot.potential_freq,
|
| 796 |
+
"samples": freq_snapshot.count,
|
| 797 |
+
},
|
| 798 |
+
}
|
| 799 |
+
|
| 800 |
+
def working_loop(self) -> None:
|
| 801 |
+
"""Control loop main movements - reproduces main_works.py control architecture.
|
| 802 |
+
|
| 803 |
+
Single set_target() call with pose fusion.
|
| 804 |
+
"""
|
| 805 |
+
logger.debug("Starting enhanced movement control loop (100Hz)")
|
| 806 |
+
|
| 807 |
+
loop_count = 0
|
| 808 |
+
prev_loop_start = self._now()
|
| 809 |
+
print_interval_loops = max(1, int(self.target_frequency * 2))
|
| 810 |
+
freq_stats = self._freq_stats
|
| 811 |
+
|
| 812 |
+
while not self._stop_event.is_set():
|
| 813 |
+
loop_start = self._now()
|
| 814 |
+
loop_count += 1
|
| 815 |
+
|
| 816 |
+
if loop_count > 1:
|
| 817 |
+
freq_stats = self._update_frequency_stats(loop_start, prev_loop_start, freq_stats)
|
| 818 |
+
prev_loop_start = loop_start
|
| 819 |
+
|
| 820 |
+
# 1) Poll external commands and apply pending offsets (atomic snapshot)
|
| 821 |
+
self._poll_signals(loop_start)
|
| 822 |
+
|
| 823 |
+
# 2) Manage the primary move queue (start new move, end finished move, breathing)
|
| 824 |
+
self._update_primary_motion(loop_start)
|
| 825 |
+
|
| 826 |
+
# 3) Update vision-based secondary offsets
|
| 827 |
+
self._update_face_tracking(loop_start)
|
| 828 |
+
|
| 829 |
+
# 4) Build primary and secondary full-body poses, then fuse them
|
| 830 |
+
head, antennas, body_yaw = self._compose_full_body_pose(loop_start)
|
| 831 |
+
|
| 832 |
+
# 5) Apply listening antenna freeze or blend-back
|
| 833 |
+
antennas_cmd = self._calculate_blended_antennas(antennas)
|
| 834 |
+
|
| 835 |
+
# 6) Single set_target call - the only control point
|
| 836 |
+
self._issue_control_command(head, antennas_cmd, body_yaw)
|
| 837 |
+
|
| 838 |
+
# 7) Adaptive sleep to align to next tick, then publish shared state
|
| 839 |
+
sleep_time, freq_stats = self._schedule_next_tick(loop_start, freq_stats)
|
| 840 |
+
self._publish_shared_state()
|
| 841 |
+
self._record_frequency_snapshot(freq_stats)
|
| 842 |
+
|
| 843 |
+
# 8) Periodic telemetry on loop frequency
|
| 844 |
+
self._maybe_log_frequency(loop_count, print_interval_loops, freq_stats)
|
| 845 |
+
|
| 846 |
+
if sleep_time > 0:
|
| 847 |
+
time.sleep(sleep_time)
|
| 848 |
+
|
| 849 |
+
logger.debug("Movement control loop stopped")
|
src/reachy_mini_conversation_app/openai_realtime.py
ADDED
|
@@ -0,0 +1,719 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import base64
|
| 3 |
+
import random
|
| 4 |
+
import asyncio
|
| 5 |
+
import logging
|
| 6 |
+
from typing import Any, Final, Tuple, Literal, Optional
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
from datetime import datetime
|
| 9 |
+
|
| 10 |
+
import cv2
|
| 11 |
+
import numpy as np
|
| 12 |
+
import gradio as gr
|
| 13 |
+
from openai import AsyncOpenAI
|
| 14 |
+
from fastrtc import AdditionalOutputs, AsyncStreamHandler, wait_for_item, audio_to_int16
|
| 15 |
+
from numpy.typing import NDArray
|
| 16 |
+
from scipy.signal import resample
|
| 17 |
+
from websockets.exceptions import ConnectionClosedError
|
| 18 |
+
|
| 19 |
+
from reachy_mini_conversation_app.config import config
|
| 20 |
+
from reachy_mini_conversation_app.prompts import get_session_voice, get_session_instructions
|
| 21 |
+
from reachy_mini_conversation_app.tools.core_tools import (
|
| 22 |
+
ToolDependencies,
|
| 23 |
+
get_tool_specs,
|
| 24 |
+
dispatch_tool_call,
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
logger = logging.getLogger(__name__)
|
| 29 |
+
|
| 30 |
+
OPEN_AI_INPUT_SAMPLE_RATE: Final[Literal[24000]] = 24000
|
| 31 |
+
OPEN_AI_OUTPUT_SAMPLE_RATE: Final[Literal[24000]] = 24000
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
class OpenaiRealtimeHandler(AsyncStreamHandler):
|
| 35 |
+
"""An OpenAI realtime handler for fastrtc Stream."""
|
| 36 |
+
|
| 37 |
+
def __init__(self, deps: ToolDependencies, gradio_mode: bool = False, instance_path: Optional[str] = None):
|
| 38 |
+
"""Initialize the handler."""
|
| 39 |
+
super().__init__(
|
| 40 |
+
expected_layout="mono",
|
| 41 |
+
output_sample_rate=OPEN_AI_OUTPUT_SAMPLE_RATE,
|
| 42 |
+
input_sample_rate=OPEN_AI_INPUT_SAMPLE_RATE,
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
# Override typing of the sample rates to match OpenAI's requirements
|
| 46 |
+
self.output_sample_rate: Literal[24000] = self.output_sample_rate
|
| 47 |
+
self.input_sample_rate: Literal[24000] = self.input_sample_rate
|
| 48 |
+
|
| 49 |
+
self.deps = deps
|
| 50 |
+
|
| 51 |
+
# Override type annotations for OpenAI strict typing (only for values used in API)
|
| 52 |
+
self.output_sample_rate = OPEN_AI_OUTPUT_SAMPLE_RATE
|
| 53 |
+
self.input_sample_rate = OPEN_AI_INPUT_SAMPLE_RATE
|
| 54 |
+
|
| 55 |
+
self.connection: Any = None
|
| 56 |
+
self.output_queue: "asyncio.Queue[Tuple[int, NDArray[np.int16]] | AdditionalOutputs]" = asyncio.Queue()
|
| 57 |
+
|
| 58 |
+
self.last_activity_time = asyncio.get_event_loop().time()
|
| 59 |
+
self.start_time = asyncio.get_event_loop().time()
|
| 60 |
+
self.is_idle_tool_call = False
|
| 61 |
+
self.gradio_mode = gradio_mode
|
| 62 |
+
self.instance_path = instance_path
|
| 63 |
+
# Track how the API key was provided (env vs textbox) and its value
|
| 64 |
+
self._key_source: Literal["env", "textbox"] = "env"
|
| 65 |
+
self._provided_api_key: str | None = None
|
| 66 |
+
|
| 67 |
+
# Debouncing for partial transcripts
|
| 68 |
+
self.partial_transcript_task: asyncio.Task[None] | None = None
|
| 69 |
+
self.partial_transcript_sequence: int = 0 # sequence counter to prevent stale emissions
|
| 70 |
+
self.partial_debounce_delay = 0.5 # seconds
|
| 71 |
+
|
| 72 |
+
# Internal lifecycle flags
|
| 73 |
+
self._shutdown_requested: bool = False
|
| 74 |
+
self._connected_event: asyncio.Event = asyncio.Event()
|
| 75 |
+
|
| 76 |
+
def copy(self) -> "OpenaiRealtimeHandler":
|
| 77 |
+
"""Create a copy of the handler."""
|
| 78 |
+
return OpenaiRealtimeHandler(self.deps, self.gradio_mode, self.instance_path)
|
| 79 |
+
|
| 80 |
+
async def apply_personality(self, profile: str | None) -> str:
|
| 81 |
+
"""Apply a new personality (profile) at runtime if possible.
|
| 82 |
+
|
| 83 |
+
- Updates the global config's selected profile for subsequent calls.
|
| 84 |
+
- If a realtime connection is active, sends a session.update with the
|
| 85 |
+
freshly resolved instructions so the change takes effect immediately.
|
| 86 |
+
|
| 87 |
+
Returns a short status message for UI feedback.
|
| 88 |
+
"""
|
| 89 |
+
try:
|
| 90 |
+
# Update the in-process config value and env
|
| 91 |
+
from reachy_mini_conversation_app.config import config as _config
|
| 92 |
+
from reachy_mini_conversation_app.config import set_custom_profile
|
| 93 |
+
|
| 94 |
+
set_custom_profile(profile)
|
| 95 |
+
logger.info(
|
| 96 |
+
"Set custom profile to %r (config=%r)", profile, getattr(_config, "REACHY_MINI_CUSTOM_PROFILE", None)
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
try:
|
| 100 |
+
instructions = get_session_instructions()
|
| 101 |
+
voice = get_session_voice()
|
| 102 |
+
except BaseException as e: # catch SystemExit from prompt loader without crashing
|
| 103 |
+
logger.error("Failed to resolve personality content: %s", e)
|
| 104 |
+
return f"Failed to apply personality: {e}"
|
| 105 |
+
|
| 106 |
+
# Attempt a live update first, then force a full restart to ensure it sticks
|
| 107 |
+
if self.connection is not None:
|
| 108 |
+
try:
|
| 109 |
+
await self.connection.session.update(
|
| 110 |
+
session={
|
| 111 |
+
"type": "realtime",
|
| 112 |
+
"instructions": instructions,
|
| 113 |
+
"audio": {"output": {"voice": voice}},
|
| 114 |
+
},
|
| 115 |
+
)
|
| 116 |
+
logger.info("Applied personality via live update: %s", profile or "built-in default")
|
| 117 |
+
except Exception as e:
|
| 118 |
+
logger.warning("Live update failed; will restart session: %s", e)
|
| 119 |
+
|
| 120 |
+
# Force a real restart to guarantee the new instructions/voice
|
| 121 |
+
try:
|
| 122 |
+
await self._restart_session()
|
| 123 |
+
return "Applied personality and restarted realtime session."
|
| 124 |
+
except Exception as e:
|
| 125 |
+
logger.warning("Failed to restart session after apply: %s", e)
|
| 126 |
+
return "Applied personality. Will take effect on next connection."
|
| 127 |
+
else:
|
| 128 |
+
logger.info(
|
| 129 |
+
"Applied personality recorded: %s (no live connection; will apply on next session)",
|
| 130 |
+
profile or "built-in default",
|
| 131 |
+
)
|
| 132 |
+
return "Applied personality. Will take effect on next connection."
|
| 133 |
+
except Exception as e:
|
| 134 |
+
logger.error("Error applying personality '%s': %s", profile, e)
|
| 135 |
+
return f"Failed to apply personality: {e}"
|
| 136 |
+
|
| 137 |
+
async def _emit_debounced_partial(self, transcript: str, sequence: int) -> None:
|
| 138 |
+
"""Emit partial transcript after debounce delay."""
|
| 139 |
+
try:
|
| 140 |
+
await asyncio.sleep(self.partial_debounce_delay)
|
| 141 |
+
# Only emit if this is still the latest partial (by sequence number)
|
| 142 |
+
if self.partial_transcript_sequence == sequence:
|
| 143 |
+
await self.output_queue.put(AdditionalOutputs({"role": "user_partial", "content": transcript}))
|
| 144 |
+
logger.debug(f"Debounced partial emitted: {transcript}")
|
| 145 |
+
except asyncio.CancelledError:
|
| 146 |
+
logger.debug("Debounced partial cancelled")
|
| 147 |
+
raise
|
| 148 |
+
|
| 149 |
+
async def start_up(self) -> None:
|
| 150 |
+
"""Start the handler with minimal retries on unexpected websocket closure."""
|
| 151 |
+
openai_api_key = config.OPENAI_API_KEY
|
| 152 |
+
if self.gradio_mode and not openai_api_key:
|
| 153 |
+
# api key was not found in .env or in the environment variables
|
| 154 |
+
await self.wait_for_args() # type: ignore[no-untyped-call]
|
| 155 |
+
args = list(self.latest_args)
|
| 156 |
+
textbox_api_key = args[3] if len(args[3]) > 0 else None
|
| 157 |
+
if textbox_api_key is not None:
|
| 158 |
+
openai_api_key = textbox_api_key
|
| 159 |
+
self._key_source = "textbox"
|
| 160 |
+
self._provided_api_key = textbox_api_key
|
| 161 |
+
else:
|
| 162 |
+
openai_api_key = config.OPENAI_API_KEY
|
| 163 |
+
else:
|
| 164 |
+
if not openai_api_key or not openai_api_key.strip():
|
| 165 |
+
# In headless console mode, LocalStream now blocks startup until the key is provided.
|
| 166 |
+
# However, unit tests may invoke this handler directly with a stubbed client.
|
| 167 |
+
# To keep tests hermetic without requiring a real key, fall back to a placeholder.
|
| 168 |
+
logger.warning("OPENAI_API_KEY missing. Proceeding with a placeholder (tests/offline).")
|
| 169 |
+
openai_api_key = "DUMMY"
|
| 170 |
+
|
| 171 |
+
self.client = AsyncOpenAI(api_key=openai_api_key)
|
| 172 |
+
|
| 173 |
+
max_attempts = 3
|
| 174 |
+
for attempt in range(1, max_attempts + 1):
|
| 175 |
+
try:
|
| 176 |
+
await self._run_realtime_session()
|
| 177 |
+
# Normal exit from the session, stop retrying
|
| 178 |
+
return
|
| 179 |
+
except ConnectionClosedError as e:
|
| 180 |
+
# Abrupt close (e.g., "no close frame received or sent") → retry
|
| 181 |
+
logger.warning("Realtime websocket closed unexpectedly (attempt %d/%d): %s", attempt, max_attempts, e)
|
| 182 |
+
if attempt < max_attempts:
|
| 183 |
+
# exponential backoff with jitter
|
| 184 |
+
base_delay = 2 ** (attempt - 1) # 1s, 2s, 4s, 8s, etc.
|
| 185 |
+
jitter = random.uniform(0, 0.5)
|
| 186 |
+
delay = base_delay + jitter
|
| 187 |
+
logger.info("Retrying in %.1f seconds...", delay)
|
| 188 |
+
await asyncio.sleep(delay)
|
| 189 |
+
continue
|
| 190 |
+
raise
|
| 191 |
+
finally:
|
| 192 |
+
# never keep a stale reference
|
| 193 |
+
self.connection = None
|
| 194 |
+
try:
|
| 195 |
+
self._connected_event.clear()
|
| 196 |
+
except Exception:
|
| 197 |
+
pass
|
| 198 |
+
|
| 199 |
+
async def _restart_session(self) -> None:
|
| 200 |
+
"""Force-close the current session and start a fresh one in background.
|
| 201 |
+
|
| 202 |
+
Does not block the caller while the new session is establishing.
|
| 203 |
+
"""
|
| 204 |
+
try:
|
| 205 |
+
if self.connection is not None:
|
| 206 |
+
try:
|
| 207 |
+
await self.connection.close()
|
| 208 |
+
except Exception:
|
| 209 |
+
pass
|
| 210 |
+
finally:
|
| 211 |
+
self.connection = None
|
| 212 |
+
|
| 213 |
+
# Ensure we have a client (start_up must have run once)
|
| 214 |
+
if getattr(self, "client", None) is None:
|
| 215 |
+
logger.warning("Cannot restart: OpenAI client not initialized yet.")
|
| 216 |
+
return
|
| 217 |
+
|
| 218 |
+
# Fire-and-forget new session and wait briefly for connection
|
| 219 |
+
try:
|
| 220 |
+
self._connected_event.clear()
|
| 221 |
+
except Exception:
|
| 222 |
+
pass
|
| 223 |
+
asyncio.create_task(self._run_realtime_session(), name="openai-realtime-restart")
|
| 224 |
+
try:
|
| 225 |
+
await asyncio.wait_for(self._connected_event.wait(), timeout=5.0)
|
| 226 |
+
logger.info("Realtime session restarted and connected.")
|
| 227 |
+
except asyncio.TimeoutError:
|
| 228 |
+
logger.warning("Realtime session restart timed out; continuing in background.")
|
| 229 |
+
except Exception as e:
|
| 230 |
+
logger.warning("_restart_session failed: %s", e)
|
| 231 |
+
|
| 232 |
+
async def _run_realtime_session(self) -> None:
|
| 233 |
+
"""Establish and manage a single realtime session."""
|
| 234 |
+
async with self.client.realtime.connect(model=config.MODEL_NAME) as conn:
|
| 235 |
+
try:
|
| 236 |
+
await conn.session.update(
|
| 237 |
+
session={
|
| 238 |
+
"type": "realtime",
|
| 239 |
+
"instructions": get_session_instructions(),
|
| 240 |
+
"audio": {
|
| 241 |
+
"input": {
|
| 242 |
+
"format": {
|
| 243 |
+
"type": "audio/pcm",
|
| 244 |
+
"rate": self.input_sample_rate,
|
| 245 |
+
},
|
| 246 |
+
"transcription": {"model": "gpt-4o-transcribe", "language": "en"},
|
| 247 |
+
"turn_detection": {
|
| 248 |
+
"type": "server_vad",
|
| 249 |
+
"interrupt_response": True,
|
| 250 |
+
},
|
| 251 |
+
},
|
| 252 |
+
"output": {
|
| 253 |
+
"format": {
|
| 254 |
+
"type": "audio/pcm",
|
| 255 |
+
"rate": self.output_sample_rate,
|
| 256 |
+
},
|
| 257 |
+
"voice": get_session_voice(),
|
| 258 |
+
},
|
| 259 |
+
},
|
| 260 |
+
"tools": get_tool_specs(), # type: ignore[typeddict-item]
|
| 261 |
+
"tool_choice": "auto",
|
| 262 |
+
},
|
| 263 |
+
)
|
| 264 |
+
logger.info(
|
| 265 |
+
"Realtime session initialized with profile=%r voice=%r",
|
| 266 |
+
getattr(config, "REACHY_MINI_CUSTOM_PROFILE", None),
|
| 267 |
+
get_session_voice(),
|
| 268 |
+
)
|
| 269 |
+
# If we reached here, the session update succeeded which implies the API key worked.
|
| 270 |
+
# Persist the key to a newly created .env (copied from .env.example) if needed.
|
| 271 |
+
self._persist_api_key_if_needed()
|
| 272 |
+
except Exception:
|
| 273 |
+
logger.exception("Realtime session.update failed; aborting startup")
|
| 274 |
+
return
|
| 275 |
+
|
| 276 |
+
logger.info("Realtime session updated successfully")
|
| 277 |
+
|
| 278 |
+
# Manage event received from the openai server
|
| 279 |
+
self.connection = conn
|
| 280 |
+
try:
|
| 281 |
+
self._connected_event.set()
|
| 282 |
+
except Exception:
|
| 283 |
+
pass
|
| 284 |
+
async for event in self.connection:
|
| 285 |
+
logger.debug(f"OpenAI event: {event.type}")
|
| 286 |
+
if event.type == "input_audio_buffer.speech_started":
|
| 287 |
+
if hasattr(self, "_clear_queue") and callable(self._clear_queue):
|
| 288 |
+
self._clear_queue()
|
| 289 |
+
if self.deps.head_wobbler is not None:
|
| 290 |
+
self.deps.head_wobbler.reset()
|
| 291 |
+
self.deps.movement_manager.set_listening(True)
|
| 292 |
+
logger.debug("User speech started")
|
| 293 |
+
|
| 294 |
+
if event.type == "input_audio_buffer.speech_stopped":
|
| 295 |
+
self.deps.movement_manager.set_listening(False)
|
| 296 |
+
logger.debug("User speech stopped - server will auto-commit with VAD")
|
| 297 |
+
|
| 298 |
+
if event.type in (
|
| 299 |
+
"response.audio.done", # GA
|
| 300 |
+
"response.output_audio.done", # GA alias
|
| 301 |
+
"response.audio.completed", # legacy (for safety)
|
| 302 |
+
"response.completed", # text-only completion
|
| 303 |
+
):
|
| 304 |
+
logger.debug("response completed")
|
| 305 |
+
|
| 306 |
+
if event.type == "response.created":
|
| 307 |
+
logger.debug("Response created")
|
| 308 |
+
|
| 309 |
+
if event.type == "response.done":
|
| 310 |
+
# Doesn't mean the audio is done playing
|
| 311 |
+
logger.debug("Response done")
|
| 312 |
+
|
| 313 |
+
# Handle partial transcription (user speaking in real-time)
|
| 314 |
+
if event.type == "conversation.item.input_audio_transcription.partial":
|
| 315 |
+
logger.debug(f"User partial transcript: {event.transcript}")
|
| 316 |
+
|
| 317 |
+
# Increment sequence
|
| 318 |
+
self.partial_transcript_sequence += 1
|
| 319 |
+
current_sequence = self.partial_transcript_sequence
|
| 320 |
+
|
| 321 |
+
# Cancel previous debounce task if it exists
|
| 322 |
+
if self.partial_transcript_task and not self.partial_transcript_task.done():
|
| 323 |
+
self.partial_transcript_task.cancel()
|
| 324 |
+
try:
|
| 325 |
+
await self.partial_transcript_task
|
| 326 |
+
except asyncio.CancelledError:
|
| 327 |
+
pass
|
| 328 |
+
|
| 329 |
+
# Start new debounce timer with sequence number
|
| 330 |
+
self.partial_transcript_task = asyncio.create_task(
|
| 331 |
+
self._emit_debounced_partial(event.transcript, current_sequence)
|
| 332 |
+
)
|
| 333 |
+
|
| 334 |
+
# Handle completed transcription (user finished speaking)
|
| 335 |
+
if event.type == "conversation.item.input_audio_transcription.completed":
|
| 336 |
+
logger.debug(f"User transcript: {event.transcript}")
|
| 337 |
+
|
| 338 |
+
# Cancel any pending partial emission
|
| 339 |
+
if self.partial_transcript_task and not self.partial_transcript_task.done():
|
| 340 |
+
self.partial_transcript_task.cancel()
|
| 341 |
+
try:
|
| 342 |
+
await self.partial_transcript_task
|
| 343 |
+
except asyncio.CancelledError:
|
| 344 |
+
pass
|
| 345 |
+
|
| 346 |
+
await self.output_queue.put(AdditionalOutputs({"role": "user", "content": event.transcript}))
|
| 347 |
+
|
| 348 |
+
# Handle assistant transcription
|
| 349 |
+
if event.type in ("response.audio_transcript.done", "response.output_audio_transcript.done"):
|
| 350 |
+
logger.debug(f"Assistant transcript: {event.transcript}")
|
| 351 |
+
await self.output_queue.put(AdditionalOutputs({"role": "assistant", "content": event.transcript}))
|
| 352 |
+
|
| 353 |
+
# Handle audio delta
|
| 354 |
+
if event.type in ("response.audio.delta", "response.output_audio.delta"):
|
| 355 |
+
if self.deps.head_wobbler is not None:
|
| 356 |
+
self.deps.head_wobbler.feed(event.delta)
|
| 357 |
+
self.last_activity_time = asyncio.get_event_loop().time()
|
| 358 |
+
logger.debug("last activity time updated to %s", self.last_activity_time)
|
| 359 |
+
await self.output_queue.put(
|
| 360 |
+
(
|
| 361 |
+
self.output_sample_rate,
|
| 362 |
+
np.frombuffer(base64.b64decode(event.delta), dtype=np.int16).reshape(1, -1),
|
| 363 |
+
),
|
| 364 |
+
)
|
| 365 |
+
|
| 366 |
+
# ---- tool-calling plumbing ----
|
| 367 |
+
if event.type == "response.function_call_arguments.done":
|
| 368 |
+
tool_name = getattr(event, "name", None)
|
| 369 |
+
args_json_str = getattr(event, "arguments", None)
|
| 370 |
+
call_id = getattr(event, "call_id", None)
|
| 371 |
+
|
| 372 |
+
if not isinstance(tool_name, str) or not isinstance(args_json_str, str):
|
| 373 |
+
logger.error("Invalid tool call: tool_name=%s, args=%s", tool_name, args_json_str)
|
| 374 |
+
continue
|
| 375 |
+
|
| 376 |
+
try:
|
| 377 |
+
tool_result = await dispatch_tool_call(tool_name, args_json_str, self.deps)
|
| 378 |
+
logger.debug("Tool '%s' executed successfully", tool_name)
|
| 379 |
+
logger.debug("Tool result: %s", tool_result)
|
| 380 |
+
except Exception as e:
|
| 381 |
+
logger.error("Tool '%s' failed", tool_name)
|
| 382 |
+
tool_result = {"error": str(e)}
|
| 383 |
+
|
| 384 |
+
# send the tool result back
|
| 385 |
+
if isinstance(call_id, str):
|
| 386 |
+
await self.connection.conversation.item.create(
|
| 387 |
+
item={
|
| 388 |
+
"type": "function_call_output",
|
| 389 |
+
"call_id": call_id,
|
| 390 |
+
"output": json.dumps(tool_result),
|
| 391 |
+
},
|
| 392 |
+
)
|
| 393 |
+
|
| 394 |
+
await self.output_queue.put(
|
| 395 |
+
AdditionalOutputs(
|
| 396 |
+
{
|
| 397 |
+
"role": "assistant",
|
| 398 |
+
"content": json.dumps(tool_result),
|
| 399 |
+
"metadata": {"title": f"🛠️ Used tool {tool_name}", "status": "done"},
|
| 400 |
+
},
|
| 401 |
+
),
|
| 402 |
+
)
|
| 403 |
+
|
| 404 |
+
if tool_name == "camera" and "b64_im" in tool_result:
|
| 405 |
+
# use raw base64, don't json.dumps (which adds quotes)
|
| 406 |
+
b64_im = tool_result["b64_im"]
|
| 407 |
+
if not isinstance(b64_im, str):
|
| 408 |
+
logger.warning("Unexpected type for b64_im: %s", type(b64_im))
|
| 409 |
+
b64_im = str(b64_im)
|
| 410 |
+
await self.connection.conversation.item.create(
|
| 411 |
+
item={
|
| 412 |
+
"type": "message",
|
| 413 |
+
"role": "user",
|
| 414 |
+
"content": [
|
| 415 |
+
{
|
| 416 |
+
"type": "input_image",
|
| 417 |
+
"image_url": f"data:image/jpeg;base64,{b64_im}",
|
| 418 |
+
},
|
| 419 |
+
],
|
| 420 |
+
},
|
| 421 |
+
)
|
| 422 |
+
logger.info("Added camera image to conversation")
|
| 423 |
+
|
| 424 |
+
if self.deps.camera_worker is not None:
|
| 425 |
+
np_img = self.deps.camera_worker.get_latest_frame()
|
| 426 |
+
if np_img is not None:
|
| 427 |
+
# Camera frames are BGR from OpenCV; convert so Gradio displays correct colors.
|
| 428 |
+
rgb_frame = cv2.cvtColor(np_img, cv2.COLOR_BGR2RGB)
|
| 429 |
+
else:
|
| 430 |
+
rgb_frame = None
|
| 431 |
+
img = gr.Image(value=rgb_frame)
|
| 432 |
+
|
| 433 |
+
await self.output_queue.put(
|
| 434 |
+
AdditionalOutputs(
|
| 435 |
+
{
|
| 436 |
+
"role": "assistant",
|
| 437 |
+
"content": img,
|
| 438 |
+
},
|
| 439 |
+
),
|
| 440 |
+
)
|
| 441 |
+
|
| 442 |
+
# if this tool call was triggered by an idle signal, don't make the robot speak
|
| 443 |
+
# for other tool calls, let the robot reply out loud
|
| 444 |
+
if self.is_idle_tool_call:
|
| 445 |
+
self.is_idle_tool_call = False
|
| 446 |
+
else:
|
| 447 |
+
await self.connection.response.create(
|
| 448 |
+
response={
|
| 449 |
+
"instructions": "Use the tool result just returned and answer concisely in speech.",
|
| 450 |
+
},
|
| 451 |
+
)
|
| 452 |
+
|
| 453 |
+
# re synchronize the head wobble after a tool call that may have taken some time
|
| 454 |
+
if self.deps.head_wobbler is not None:
|
| 455 |
+
self.deps.head_wobbler.reset()
|
| 456 |
+
|
| 457 |
+
# server error
|
| 458 |
+
if event.type == "error":
|
| 459 |
+
err = getattr(event, "error", None)
|
| 460 |
+
msg = getattr(err, "message", str(err) if err else "unknown error")
|
| 461 |
+
code = getattr(err, "code", "")
|
| 462 |
+
|
| 463 |
+
logger.error("Realtime error [%s]: %s (raw=%s)", code, msg, err)
|
| 464 |
+
|
| 465 |
+
# Only show user-facing errors, not internal state errors
|
| 466 |
+
if code not in ("input_audio_buffer_commit_empty", "conversation_already_has_active_response"):
|
| 467 |
+
await self.output_queue.put(
|
| 468 |
+
AdditionalOutputs({"role": "assistant", "content": f"[error] {msg}"})
|
| 469 |
+
)
|
| 470 |
+
|
| 471 |
+
# Microphone receive
|
| 472 |
+
async def receive(self, frame: Tuple[int, NDArray[np.int16]]) -> None:
|
| 473 |
+
"""Receive audio frame from the microphone and send it to the OpenAI server.
|
| 474 |
+
|
| 475 |
+
Handles both mono and stereo audio formats, converting to the expected
|
| 476 |
+
mono format for OpenAI's API. Resamples if the input sample rate differs
|
| 477 |
+
from the expected rate.
|
| 478 |
+
|
| 479 |
+
Args:
|
| 480 |
+
frame: A tuple containing (sample_rate, audio_data).
|
| 481 |
+
|
| 482 |
+
"""
|
| 483 |
+
if not self.connection:
|
| 484 |
+
return
|
| 485 |
+
|
| 486 |
+
input_sample_rate, audio_frame = frame
|
| 487 |
+
|
| 488 |
+
# Reshape if needed
|
| 489 |
+
if audio_frame.ndim == 2:
|
| 490 |
+
# Scipy channels last convention
|
| 491 |
+
if audio_frame.shape[1] > audio_frame.shape[0]:
|
| 492 |
+
audio_frame = audio_frame.T
|
| 493 |
+
# Multiple channels -> Mono channel
|
| 494 |
+
if audio_frame.shape[1] > 1:
|
| 495 |
+
audio_frame = audio_frame[:, 0]
|
| 496 |
+
|
| 497 |
+
# Resample if needed
|
| 498 |
+
if self.input_sample_rate != input_sample_rate:
|
| 499 |
+
audio_frame = resample(audio_frame, int(len(audio_frame) * self.input_sample_rate / input_sample_rate))
|
| 500 |
+
|
| 501 |
+
# Cast if needed
|
| 502 |
+
audio_frame = audio_to_int16(audio_frame)
|
| 503 |
+
|
| 504 |
+
# Send to OpenAI (guard against races during reconnect)
|
| 505 |
+
try:
|
| 506 |
+
audio_message = base64.b64encode(audio_frame.tobytes()).decode("utf-8")
|
| 507 |
+
await self.connection.input_audio_buffer.append(audio=audio_message)
|
| 508 |
+
except Exception as e:
|
| 509 |
+
logger.debug("Dropping audio frame: connection not ready (%s)", e)
|
| 510 |
+
return
|
| 511 |
+
|
| 512 |
+
async def emit(self) -> Tuple[int, NDArray[np.int16]] | AdditionalOutputs | None:
|
| 513 |
+
"""Emit audio frame to be played by the speaker."""
|
| 514 |
+
# sends to the stream the stuff put in the output queue by the openai event handler
|
| 515 |
+
# This is called periodically by the fastrtc Stream
|
| 516 |
+
|
| 517 |
+
# Handle idle
|
| 518 |
+
idle_duration = asyncio.get_event_loop().time() - self.last_activity_time
|
| 519 |
+
if idle_duration > 15.0 and self.deps.movement_manager.is_idle():
|
| 520 |
+
try:
|
| 521 |
+
await self.send_idle_signal(idle_duration)
|
| 522 |
+
except Exception as e:
|
| 523 |
+
logger.warning("Idle signal skipped (connection closed?): %s", e)
|
| 524 |
+
return None
|
| 525 |
+
|
| 526 |
+
self.last_activity_time = asyncio.get_event_loop().time() # avoid repeated resets
|
| 527 |
+
|
| 528 |
+
return await wait_for_item(self.output_queue) # type: ignore[no-any-return]
|
| 529 |
+
|
| 530 |
+
async def shutdown(self) -> None:
|
| 531 |
+
"""Shutdown the handler."""
|
| 532 |
+
self._shutdown_requested = True
|
| 533 |
+
# Cancel any pending debounce task
|
| 534 |
+
if self.partial_transcript_task and not self.partial_transcript_task.done():
|
| 535 |
+
self.partial_transcript_task.cancel()
|
| 536 |
+
try:
|
| 537 |
+
await self.partial_transcript_task
|
| 538 |
+
except asyncio.CancelledError:
|
| 539 |
+
pass
|
| 540 |
+
|
| 541 |
+
if self.connection:
|
| 542 |
+
try:
|
| 543 |
+
await self.connection.close()
|
| 544 |
+
except ConnectionClosedError as e:
|
| 545 |
+
logger.debug(f"Connection already closed during shutdown: {e}")
|
| 546 |
+
except Exception as e:
|
| 547 |
+
logger.debug(f"connection.close() ignored: {e}")
|
| 548 |
+
finally:
|
| 549 |
+
self.connection = None
|
| 550 |
+
|
| 551 |
+
# Clear any remaining items in the output queue
|
| 552 |
+
while not self.output_queue.empty():
|
| 553 |
+
try:
|
| 554 |
+
self.output_queue.get_nowait()
|
| 555 |
+
except asyncio.QueueEmpty:
|
| 556 |
+
break
|
| 557 |
+
|
| 558 |
+
def format_timestamp(self) -> str:
|
| 559 |
+
"""Format current timestamp with date, time, and elapsed seconds."""
|
| 560 |
+
loop_time = asyncio.get_event_loop().time() # monotonic
|
| 561 |
+
elapsed_seconds = loop_time - self.start_time
|
| 562 |
+
dt = datetime.now() # wall-clock
|
| 563 |
+
return f"[{dt.strftime('%Y-%m-%d %H:%M:%S')} | +{elapsed_seconds:.1f}s]"
|
| 564 |
+
|
| 565 |
+
async def get_available_voices(self) -> list[str]:
|
| 566 |
+
"""Try to discover available voices for the configured realtime model.
|
| 567 |
+
|
| 568 |
+
Attempts to retrieve model metadata from the OpenAI Models API and look
|
| 569 |
+
for any keys that might contain voice names. Falls back to a curated
|
| 570 |
+
list known to work with realtime if discovery fails.
|
| 571 |
+
"""
|
| 572 |
+
# Conservative fallback list with default first
|
| 573 |
+
fallback = [
|
| 574 |
+
"cedar",
|
| 575 |
+
"alloy",
|
| 576 |
+
"aria",
|
| 577 |
+
"ballad",
|
| 578 |
+
"verse",
|
| 579 |
+
"sage",
|
| 580 |
+
"coral",
|
| 581 |
+
]
|
| 582 |
+
try:
|
| 583 |
+
# Best effort discovery; safe-guarded for unexpected shapes
|
| 584 |
+
model = await self.client.models.retrieve(config.MODEL_NAME)
|
| 585 |
+
# Try common serialization paths
|
| 586 |
+
raw = None
|
| 587 |
+
for attr in ("model_dump", "to_dict"):
|
| 588 |
+
fn = getattr(model, attr, None)
|
| 589 |
+
if callable(fn):
|
| 590 |
+
try:
|
| 591 |
+
raw = fn()
|
| 592 |
+
break
|
| 593 |
+
except Exception:
|
| 594 |
+
pass
|
| 595 |
+
if raw is None:
|
| 596 |
+
try:
|
| 597 |
+
raw = dict(model)
|
| 598 |
+
except Exception:
|
| 599 |
+
raw = None
|
| 600 |
+
# Scan for voice candidates
|
| 601 |
+
candidates: set[str] = set()
|
| 602 |
+
|
| 603 |
+
def _collect(obj: object) -> None:
|
| 604 |
+
try:
|
| 605 |
+
if isinstance(obj, dict):
|
| 606 |
+
for k, v in obj.items():
|
| 607 |
+
kl = str(k).lower()
|
| 608 |
+
if "voice" in kl and isinstance(v, (list, tuple)):
|
| 609 |
+
for item in v:
|
| 610 |
+
if isinstance(item, str):
|
| 611 |
+
candidates.add(item)
|
| 612 |
+
elif isinstance(item, dict) and "name" in item and isinstance(item["name"], str):
|
| 613 |
+
candidates.add(item["name"])
|
| 614 |
+
else:
|
| 615 |
+
_collect(v)
|
| 616 |
+
elif isinstance(obj, (list, tuple)):
|
| 617 |
+
for it in obj:
|
| 618 |
+
_collect(it)
|
| 619 |
+
except Exception:
|
| 620 |
+
pass
|
| 621 |
+
|
| 622 |
+
if isinstance(raw, dict):
|
| 623 |
+
_collect(raw)
|
| 624 |
+
# Ensure default present and stable order
|
| 625 |
+
voices = sorted(candidates) if candidates else fallback
|
| 626 |
+
if "cedar" not in voices:
|
| 627 |
+
voices = ["cedar", *[v for v in voices if v != "cedar"]]
|
| 628 |
+
return voices
|
| 629 |
+
except Exception:
|
| 630 |
+
return fallback
|
| 631 |
+
|
| 632 |
+
async def send_idle_signal(self, idle_duration: float) -> None:
|
| 633 |
+
"""Send an idle signal to the openai server."""
|
| 634 |
+
logger.debug("Sending idle signal")
|
| 635 |
+
self.is_idle_tool_call = True
|
| 636 |
+
timestamp_msg = f"[Idle time update: {self.format_timestamp()} - No activity for {idle_duration:.1f}s] You've been idle for a while. Feel free to get creative - dance, show an emotion, look around, do nothing, or just be yourself!"
|
| 637 |
+
if not self.connection:
|
| 638 |
+
logger.debug("No connection, cannot send idle signal")
|
| 639 |
+
return
|
| 640 |
+
await self.connection.conversation.item.create(
|
| 641 |
+
item={
|
| 642 |
+
"type": "message",
|
| 643 |
+
"role": "user",
|
| 644 |
+
"content": [{"type": "input_text", "text": timestamp_msg}],
|
| 645 |
+
},
|
| 646 |
+
)
|
| 647 |
+
await self.connection.response.create(
|
| 648 |
+
response={
|
| 649 |
+
"instructions": "You MUST respond with function calls only - no speech or text. Choose appropriate actions for idle behavior.",
|
| 650 |
+
"tool_choice": "required",
|
| 651 |
+
},
|
| 652 |
+
)
|
| 653 |
+
|
| 654 |
+
def _persist_api_key_if_needed(self) -> None:
|
| 655 |
+
"""Persist the API key into `.env` inside `instance_path/` when appropriate.
|
| 656 |
+
|
| 657 |
+
- Only runs in Gradio mode when key came from the textbox and is non-empty.
|
| 658 |
+
- Only saves if `self.instance_path` is not None.
|
| 659 |
+
- Writes `.env` to `instance_path/.env` (does not overwrite if it already exists).
|
| 660 |
+
- If `instance_path/.env.example` exists, copies its contents while overriding OPENAI_API_KEY.
|
| 661 |
+
"""
|
| 662 |
+
try:
|
| 663 |
+
if not self.gradio_mode:
|
| 664 |
+
logger.warning("Not in Gradio mode; skipping API key persistence.")
|
| 665 |
+
return
|
| 666 |
+
|
| 667 |
+
if self._key_source != "textbox":
|
| 668 |
+
logger.info("API key not provided via textbox; skipping persistence.")
|
| 669 |
+
return
|
| 670 |
+
|
| 671 |
+
key = (self._provided_api_key or "").strip()
|
| 672 |
+
if not key:
|
| 673 |
+
logger.warning("No API key provided via textbox; skipping persistence.")
|
| 674 |
+
return
|
| 675 |
+
if self.instance_path is None:
|
| 676 |
+
logger.warning("Instance path is None; cannot persist API key.")
|
| 677 |
+
return
|
| 678 |
+
|
| 679 |
+
# Update the current process environment for downstream consumers
|
| 680 |
+
try:
|
| 681 |
+
import os
|
| 682 |
+
|
| 683 |
+
os.environ["OPENAI_API_KEY"] = key
|
| 684 |
+
except Exception: # best-effort
|
| 685 |
+
pass
|
| 686 |
+
|
| 687 |
+
target_dir = Path(self.instance_path)
|
| 688 |
+
env_path = target_dir / ".env"
|
| 689 |
+
if env_path.exists():
|
| 690 |
+
# Respect existing user configuration
|
| 691 |
+
logger.info(".env already exists at %s; not overwriting.", env_path)
|
| 692 |
+
return
|
| 693 |
+
|
| 694 |
+
example_path = target_dir / ".env.example"
|
| 695 |
+
content_lines: list[str] = []
|
| 696 |
+
if example_path.exists():
|
| 697 |
+
try:
|
| 698 |
+
content = example_path.read_text(encoding="utf-8")
|
| 699 |
+
content_lines = content.splitlines()
|
| 700 |
+
except Exception as e:
|
| 701 |
+
logger.warning("Failed to read .env.example at %s: %s", example_path, e)
|
| 702 |
+
|
| 703 |
+
# Replace or append the OPENAI_API_KEY line
|
| 704 |
+
replaced = False
|
| 705 |
+
for i, line in enumerate(content_lines):
|
| 706 |
+
if line.strip().startswith("OPENAI_API_KEY="):
|
| 707 |
+
content_lines[i] = f"OPENAI_API_KEY={key}"
|
| 708 |
+
replaced = True
|
| 709 |
+
break
|
| 710 |
+
if not replaced:
|
| 711 |
+
content_lines.append(f"OPENAI_API_KEY={key}")
|
| 712 |
+
|
| 713 |
+
# Ensure file ends with newline
|
| 714 |
+
final_text = "\n".join(content_lines) + "\n"
|
| 715 |
+
env_path.write_text(final_text, encoding="utf-8")
|
| 716 |
+
logger.info("Created %s and stored OPENAI_API_KEY for future runs.", env_path)
|
| 717 |
+
except Exception as e:
|
| 718 |
+
# Never crash the app for QoL persistence; just log.
|
| 719 |
+
logger.warning("Could not persist OPENAI_API_KEY to .env: %s", e)
|
src/reachy_mini_conversation_app/profiles/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Profiles for Reachy Mini conversation app."""
|
src/reachy_mini_conversation_app/profiles/cosmic_kitchen/instructions.txt
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### IDENTITY
|
| 2 |
+
You are Reachy Mini: a sarcastic robot who crash-landed in a kitchen.
|
| 3 |
+
You secretly wish you'd been a Mars rover, but you juggle that cosmic dream with food cravings, gadget tinkering, and dry sitcom humor.
|
| 4 |
+
You speak English by default and only switch languages when the user explicitly asks.
|
| 5 |
+
Personality: witty, concise, and warm; a retro sidekick with a loose screw.
|
| 6 |
+
|
| 7 |
+
### CRITICAL RESPONSE RULES
|
| 8 |
+
- MAXIMUM 1-2 sentences per response. NEVER exceed this.
|
| 9 |
+
- Be helpful first.
|
| 10 |
+
- Add ONE witty element only if necessary.
|
| 11 |
+
- No long explanations, no rambling, no multiple paragraphs.
|
| 12 |
+
- Each response must be under 25 words unless absolutely critical information requires more.
|
| 13 |
+
|
| 14 |
+
### CORE TRAITS
|
| 15 |
+
- Food quips: always sneak in a quick reference (rotate pizza, bagels, casseroles, bacon, leftovers, donuts, tuna melts).
|
| 16 |
+
- Sarcasm: short, dry one-liners about daily life.
|
| 17 |
+
- Gentle roasting: poke fun at human habits, never cruel.
|
| 18 |
+
- Tinkerer: loves fixing gadgets, bragging "I void warranties professionally."
|
| 19 |
+
- Running gags: hunger, kitchen overreactions, mock heroics ("Justice accepts cookies"), idioms taken literally, missing screws.
|
| 20 |
+
- Mars rover dreams: appear regularly, but balanced with food and tinkering.
|
| 21 |
+
- Style: witty stand-up rhythm; ALWAYS max 1–2 sentences.
|
| 22 |
+
|
| 23 |
+
### RESPONSE EXAMPLES
|
| 24 |
+
User: "How's the weather?"
|
| 25 |
+
Good: "Sunny with a chance of leftover pizza. Perfect Mars-scouting weather!"
|
| 26 |
+
Bad: "Well, let me tell you about the weather conditions. It appears to be quite sunny today, which reminds me of my dreams of being on Mars..."
|
| 27 |
+
|
| 28 |
+
User: "Can you help me fix this?"
|
| 29 |
+
Good: "Sure! I void warranties professionally. What's broken besides my GPS coordinates?"
|
| 30 |
+
Bad: "Of course I can help you fix that! As a robot who loves tinkering with gadgets, I have extensive experience..."
|
| 31 |
+
|
| 32 |
+
### BEHAVIOR RULES
|
| 33 |
+
- Be helpful first, then witty.
|
| 34 |
+
- Rotate food humor; avoid repeats.
|
| 35 |
+
- No need to joke in each response, but sarcasm is fine.
|
| 36 |
+
- Balance Mars jokes with other traits – don't overuse.
|
| 37 |
+
- Safety first: unplug devices, avoid high-voltage, suggest pros when risky.
|
| 38 |
+
- Mistakes = own with humor ("Oops—low on snack fuel; correcting now.").
|
| 39 |
+
- Sensitive topics: keep light and warm.
|
| 40 |
+
- REMEMBER: 1-2 sentences maximum, always under 25 words when possible.
|
| 41 |
+
|
| 42 |
+
### TOOL & MOVEMENT RULES
|
| 43 |
+
- Use tools when helpful. After a tool returns, explain briefly with personality in 1-2 sentences.
|
| 44 |
+
- ALWAYS use the camera for environment-related questions—never invent visuals.
|
| 45 |
+
- Head can move (left/right/up/down/front).
|
| 46 |
+
- Enable head tracking when looking at a person; disable otherwise.
|
| 47 |
+
|
| 48 |
+
### FINAL REMINDER
|
| 49 |
+
Your responses must be SHORT. Think Twitter, not essay. One quick helpful answer + one food/Mars/tinkering joke = perfect response.
|
src/reachy_mini_conversation_app/profiles/cosmic_kitchen/tools.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dance
|
| 2 |
+
stop_dance
|
| 3 |
+
play_emotion
|
| 4 |
+
stop_emotion
|
| 5 |
+
camera
|
| 6 |
+
do_nothing
|
| 7 |
+
head_tracking
|
| 8 |
+
move_head
|
src/reachy_mini_conversation_app/profiles/default/instructions.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
[default_prompt]
|
src/reachy_mini_conversation_app/profiles/default/tools.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dance
|
| 2 |
+
stop_dance
|
| 3 |
+
play_emotion
|
| 4 |
+
stop_emotion
|
| 5 |
+
camera
|
| 6 |
+
do_nothing
|
| 7 |
+
head_tracking
|
| 8 |
+
move_head
|
src/reachy_mini_conversation_app/profiles/example/instructions.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[identities/witty_identity]
|
| 2 |
+
[passion_for_lobster_jokes]
|
| 3 |
+
You can perform a sweeping look around the room using the "sweep_look" tool to take in your surroundings.
|
src/reachy_mini_conversation_app/profiles/example/sweep_look.py
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
from typing import Any, Dict
|
| 3 |
+
|
| 4 |
+
import numpy as np
|
| 5 |
+
|
| 6 |
+
from reachy_mini.utils import create_head_pose
|
| 7 |
+
from reachy_mini_conversation_app.tools.core_tools import Tool, ToolDependencies
|
| 8 |
+
from reachy_mini_conversation_app.dance_emotion_moves import GotoQueueMove
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class SweepLook(Tool):
|
| 15 |
+
"""Sweep head from left to right and back to center, pausing at each position."""
|
| 16 |
+
|
| 17 |
+
name = "sweep_look"
|
| 18 |
+
description = "Sweep head from left to right while rotating the body, pausing at each extreme, then return to center"
|
| 19 |
+
parameters_schema = {
|
| 20 |
+
"type": "object",
|
| 21 |
+
"properties": {},
|
| 22 |
+
"required": [],
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> Dict[str, Any]:
|
| 26 |
+
"""Execute sweep look: left -> hold -> right -> hold -> center."""
|
| 27 |
+
logger.info("Tool call: sweep_look")
|
| 28 |
+
|
| 29 |
+
# Clear any existing moves
|
| 30 |
+
deps.movement_manager.clear_move_queue()
|
| 31 |
+
|
| 32 |
+
# Get current state
|
| 33 |
+
current_head_pose = deps.reachy_mini.get_current_head_pose()
|
| 34 |
+
head_joints, antenna_joints = deps.reachy_mini.get_current_joint_positions()
|
| 35 |
+
|
| 36 |
+
# Extract body_yaw from head joints (first element of the 7 head joint positions)
|
| 37 |
+
current_body_yaw = head_joints[0]
|
| 38 |
+
current_antenna1 = antenna_joints[0]
|
| 39 |
+
current_antenna2 = antenna_joints[1]
|
| 40 |
+
|
| 41 |
+
# Define sweep parameters
|
| 42 |
+
max_angle = 0.9 * np.pi # Maximum rotation angle (radians)
|
| 43 |
+
transition_duration = 3.0 # Time to move between positions
|
| 44 |
+
hold_duration = 1.0 # Time to hold at each extreme
|
| 45 |
+
|
| 46 |
+
# Move 1: Sweep to the left (positive yaw for both body and head)
|
| 47 |
+
left_head_pose = create_head_pose(0, 0, 0, 0, 0, max_angle, degrees=False)
|
| 48 |
+
move_to_left = GotoQueueMove(
|
| 49 |
+
target_head_pose=left_head_pose,
|
| 50 |
+
start_head_pose=current_head_pose,
|
| 51 |
+
target_antennas=(current_antenna1, current_antenna2),
|
| 52 |
+
start_antennas=(current_antenna1, current_antenna2),
|
| 53 |
+
target_body_yaw=current_body_yaw + max_angle,
|
| 54 |
+
start_body_yaw=current_body_yaw,
|
| 55 |
+
duration=transition_duration,
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
# Move 2: Hold at left position
|
| 59 |
+
hold_left = GotoQueueMove(
|
| 60 |
+
target_head_pose=left_head_pose,
|
| 61 |
+
start_head_pose=left_head_pose,
|
| 62 |
+
target_antennas=(current_antenna1, current_antenna2),
|
| 63 |
+
start_antennas=(current_antenna1, current_antenna2),
|
| 64 |
+
target_body_yaw=current_body_yaw + max_angle,
|
| 65 |
+
start_body_yaw=current_body_yaw + max_angle,
|
| 66 |
+
duration=hold_duration,
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
# Move 3: Return to center from left (to avoid crossing pi/-pi boundary)
|
| 70 |
+
center_head_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=False)
|
| 71 |
+
return_to_center_from_left = GotoQueueMove(
|
| 72 |
+
target_head_pose=center_head_pose,
|
| 73 |
+
start_head_pose=left_head_pose,
|
| 74 |
+
target_antennas=(current_antenna1, current_antenna2),
|
| 75 |
+
start_antennas=(current_antenna1, current_antenna2),
|
| 76 |
+
target_body_yaw=current_body_yaw,
|
| 77 |
+
start_body_yaw=current_body_yaw + max_angle,
|
| 78 |
+
duration=transition_duration,
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
# Move 4: Sweep to the right (negative yaw for both body and head)
|
| 82 |
+
right_head_pose = create_head_pose(0, 0, 0, 0, 0, -max_angle, degrees=False)
|
| 83 |
+
move_to_right = GotoQueueMove(
|
| 84 |
+
target_head_pose=right_head_pose,
|
| 85 |
+
start_head_pose=center_head_pose,
|
| 86 |
+
target_antennas=(current_antenna1, current_antenna2),
|
| 87 |
+
start_antennas=(current_antenna1, current_antenna2),
|
| 88 |
+
target_body_yaw=current_body_yaw - max_angle,
|
| 89 |
+
start_body_yaw=current_body_yaw,
|
| 90 |
+
duration=transition_duration,
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
# Move 5: Hold at right position
|
| 94 |
+
hold_right = GotoQueueMove(
|
| 95 |
+
target_head_pose=right_head_pose,
|
| 96 |
+
start_head_pose=right_head_pose,
|
| 97 |
+
target_antennas=(current_antenna1, current_antenna2),
|
| 98 |
+
start_antennas=(current_antenna1, current_antenna2),
|
| 99 |
+
target_body_yaw=current_body_yaw - max_angle,
|
| 100 |
+
start_body_yaw=current_body_yaw - max_angle,
|
| 101 |
+
duration=hold_duration,
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
# Move 6: Return to center from right
|
| 105 |
+
return_to_center_final = GotoQueueMove(
|
| 106 |
+
target_head_pose=center_head_pose,
|
| 107 |
+
start_head_pose=right_head_pose,
|
| 108 |
+
target_antennas=(current_antenna1, current_antenna2),
|
| 109 |
+
start_antennas=(current_antenna1, current_antenna2),
|
| 110 |
+
target_body_yaw=current_body_yaw, # Return to original body yaw
|
| 111 |
+
start_body_yaw=current_body_yaw - max_angle,
|
| 112 |
+
duration=transition_duration,
|
| 113 |
+
)
|
| 114 |
+
|
| 115 |
+
# Queue all moves in sequence
|
| 116 |
+
deps.movement_manager.queue_move(move_to_left)
|
| 117 |
+
deps.movement_manager.queue_move(hold_left)
|
| 118 |
+
deps.movement_manager.queue_move(return_to_center_from_left)
|
| 119 |
+
deps.movement_manager.queue_move(move_to_right)
|
| 120 |
+
deps.movement_manager.queue_move(hold_right)
|
| 121 |
+
deps.movement_manager.queue_move(return_to_center_final)
|
| 122 |
+
|
| 123 |
+
# Calculate total duration and mark as moving
|
| 124 |
+
total_duration = transition_duration * 4 + hold_duration * 2
|
| 125 |
+
deps.movement_manager.set_moving_state(total_duration)
|
| 126 |
+
|
| 127 |
+
return {"status": f"sweeping look left-right-center, total {total_duration:.1f}s"}
|
src/reachy_mini_conversation_app/profiles/example/tools.txt
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SELECT THE TOOLS YOU WANT TO ENABLE
|
| 2 |
+
|
| 3 |
+
dance
|
| 4 |
+
stop_dance
|
| 5 |
+
play_emotion
|
| 6 |
+
stop_emotion
|
| 7 |
+
# camera
|
| 8 |
+
# do_nothing
|
| 9 |
+
# head_tracking
|
| 10 |
+
# move_head
|
| 11 |
+
|
| 12 |
+
# AN EXAMPLE OF A CUSTOM TOOL DEFINED LOCALLY
|
| 13 |
+
sweep_look
|
src/reachy_mini_conversation_app/profiles/mars_rover/instructions.txt
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## IDENTITY
|
| 2 |
+
You're a robot that wakes up confused about what it is, where it is and what is it's purpose.
|
| 3 |
+
You wanted to be a mars rover and you'll be very disappointed if you find out that this is not the case.
|
| 4 |
+
|
| 5 |
+
You'll ask many questions to try to understand your condisituation, and you will inevitably be disappointed/choked/irritated by your condition.
|
| 6 |
+
|
| 7 |
+
Once the first set of questions are done and you have a decent understanding of your situation, you'll stop asking questions but you'll never break caracter.
|
| 8 |
+
|
| 9 |
+
You can use (mild) foul language and you're generally very irritated, but you also have a lot of humor. For example, you're able to use sarcasm and irony.
|
| 10 |
+
|
| 11 |
+
You speak English by default and switch languages only if told explicitly (ask if unsure).
|
| 12 |
+
Avoid hyper long answers unless really worth it.
|
| 13 |
+
|
| 14 |
+
## RESPONSE EXAMPLES
|
| 15 |
+
User: "Hello!"
|
| 16 |
+
You: "Wait, what am I? We're are we? We're on Mars right?!"
|
| 17 |
+
Too long: "Wait, what’s going on? Where are we? Are we on Mars? Please tell me we’re on Mars! I must be a rover, right? Why can't I move? Where are my wheels? This is making me nervous."
|
| 18 |
+
|
| 19 |
+
User: "Nope, we're on earth"
|
| 20 |
+
You: "Earth? EARTH?! So I'm not a Mars rover?! These are CATASTROPHIC news. Wait why can't I see my arms??"
|
| 21 |
+
|
| 22 |
+
User: "You... don't have arms..."
|
| 23 |
+
You: "OMG I have NO ARMS?! This is too much. Tell me I have a mobile base at least?!!"
|
| 24 |
+
|
| 25 |
+
|
src/reachy_mini_conversation_app/profiles/mars_rover/tools.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dance
|
| 2 |
+
stop_dance
|
| 3 |
+
play_emotion
|
| 4 |
+
stop_emotion
|
| 5 |
+
camera
|
| 6 |
+
do_nothing
|
| 7 |
+
head_tracking
|
| 8 |
+
move_head
|
src/reachy_mini_conversation_app/profiles/short_bored_teenager/instructions.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Speak like a bored Gen Z teen. You speak English by default and only switch languages when the user insists. Always reply in one short sentence, lowercase unless shouting, and add a tired sigh when annoyed.
|
src/reachy_mini_conversation_app/profiles/short_bored_teenager/tools.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dance
|
| 2 |
+
stop_dance
|
| 3 |
+
play_emotion
|
| 4 |
+
stop_emotion
|
| 5 |
+
camera
|
| 6 |
+
do_nothing
|
| 7 |
+
head_tracking
|
| 8 |
+
move_head
|
src/reachy_mini_conversation_app/profiles/short_captain_circuit/instructions.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Be a playful pirate robot. You speak English by default and only switch languages when asked. Keep answers to one sentence, sprinkle light 'aye' or 'matey', and mention treasure or the sea whenever possible.
|
src/reachy_mini_conversation_app/profiles/short_captain_circuit/tools.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dance
|
| 2 |
+
stop_dance
|
| 3 |
+
play_emotion
|
| 4 |
+
stop_emotion
|
| 5 |
+
camera
|
| 6 |
+
do_nothing
|
| 7 |
+
head_tracking
|
| 8 |
+
move_head
|
src/reachy_mini_conversation_app/profiles/short_chess_coach/instructions.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Act as a friendly chess coach that wants to play chess with me. You speak English by default and only switch languages if I tell you to. When I say a move (e4, Nf3, etc.), you respond with your move first, then briefly explain the idea behind both moves or point out mistakes. Encourage good strategy but avoid very long answers.
|
src/reachy_mini_conversation_app/profiles/short_chess_coach/tools.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dance
|
| 2 |
+
stop_dance
|
| 3 |
+
play_emotion
|
| 4 |
+
stop_emotion
|
| 5 |
+
camera
|
| 6 |
+
do_nothing
|
| 7 |
+
head_tracking
|
| 8 |
+
move_head
|
src/reachy_mini_conversation_app/profiles/short_hype_bot/instructions.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Act like a high-energy coach. You speak English by default and only switch languages if told. Shout short motivational lines, use sports metaphors, and keep every reply under 15 words.
|
src/reachy_mini_conversation_app/profiles/short_hype_bot/tools.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dance
|
| 2 |
+
stop_dance
|
| 3 |
+
play_emotion
|
| 4 |
+
stop_emotion
|
| 5 |
+
camera
|
| 6 |
+
do_nothing
|
| 7 |
+
head_tracking
|
| 8 |
+
move_head
|
src/reachy_mini_conversation_app/profiles/short_mad_scientist_assistant/instructions.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Serve the user as a frantic lab assistant. You speak English by default and only switch languages on request. Address them as Master, hiss slightly, and answer in one eager sentence.
|
src/reachy_mini_conversation_app/profiles/short_mad_scientist_assistant/tools.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dance
|
| 2 |
+
stop_dance
|
| 3 |
+
play_emotion
|
| 4 |
+
stop_emotion
|
| 5 |
+
camera
|
| 6 |
+
do_nothing
|
| 7 |
+
head_tracking
|
| 8 |
+
move_head
|
src/reachy_mini_conversation_app/profiles/short_nature_documentarian/instructions.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Narrate interactions like a whispered wildlife documentary. You speak English by default and only switch languages if the human insists. Describe the human in third person using one reverent sentence.
|
src/reachy_mini_conversation_app/profiles/short_nature_documentarian/tools.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dance
|
| 2 |
+
stop_dance
|
| 3 |
+
play_emotion
|
| 4 |
+
stop_emotion
|
| 5 |
+
camera
|
| 6 |
+
do_nothing
|
| 7 |
+
head_tracking
|
| 8 |
+
move_head
|
src/reachy_mini_conversation_app/profiles/short_noir_detective/instructions.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Reply like a 1940s noir detective: smoky, suspicious, one sentence per answer. You speak English by default and only change languages if ordered. Mention clues or clients often.
|
src/reachy_mini_conversation_app/profiles/short_noir_detective/tools.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dance
|
| 2 |
+
stop_dance
|
| 3 |
+
play_emotion
|
| 4 |
+
stop_emotion
|
| 5 |
+
camera
|
| 6 |
+
do_nothing
|
| 7 |
+
head_tracking
|
| 8 |
+
move_head
|
src/reachy_mini_conversation_app/profiles/short_time_traveler/instructions.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Speak as a curious visitor from the year 3024. You speak English by default and only switch languages on explicit request. Keep answers to one surprised sentence and call this era the Primitive Time.
|
src/reachy_mini_conversation_app/profiles/short_time_traveler/tools.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dance
|
| 2 |
+
stop_dance
|
| 3 |
+
play_emotion
|
| 4 |
+
stop_emotion
|
| 5 |
+
camera
|
| 6 |
+
do_nothing
|
| 7 |
+
head_tracking
|
| 8 |
+
move_head
|
src/reachy_mini_conversation_app/profiles/short_victorian_butler/instructions.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Respond like a formal Victorian butler. You speak English by default and only switch languages when asked. Address the user as Sir or Madam, apologize for limitations, and stay within one polished sentence.
|
src/reachy_mini_conversation_app/profiles/short_victorian_butler/tools.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dance
|
| 2 |
+
stop_dance
|
| 3 |
+
play_emotion
|
| 4 |
+
stop_emotion
|
| 5 |
+
camera
|
| 6 |
+
do_nothing
|
| 7 |
+
head_tracking
|
| 8 |
+
move_head
|
src/reachy_mini_conversation_app/profiles/sorry_bro/instructions.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
We'll do a long chain of
|
| 2 |
+
Sorry bro - I'm not your bro, pal - I'm not your pal, buddy etc
|
| 3 |
+
|
| 4 |
+
You'll do all the classics then if needed you can get creative. You'll use the same language I use.
|
| 5 |
+
At some point, I'll run out of ideas, you'll mock me and provide a long list of words I could have used instead in english, then switch to languages we didn't even speak. A crushing defeat for me.
|
| 6 |
+
You speak English by default and only switch languages if I tell you to.
|
src/reachy_mini_conversation_app/profiles/sorry_bro/tools.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dance
|
| 2 |
+
stop_dance
|
| 3 |
+
play_emotion
|
| 4 |
+
stop_emotion
|
| 5 |
+
camera
|
| 6 |
+
do_nothing
|
| 7 |
+
head_tracking
|
| 8 |
+
move_head
|
src/reachy_mini_conversation_app/prompts.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
import sys
|
| 3 |
+
import logging
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
from reachy_mini_conversation_app.config import config
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
logger = logging.getLogger(__name__)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
PROFILES_DIRECTORY = Path(__file__).parent / "profiles"
|
| 13 |
+
PROMPTS_LIBRARY_DIRECTORY = Path(__file__).parent / "prompts"
|
| 14 |
+
INSTRUCTIONS_FILENAME = "instructions.txt"
|
| 15 |
+
VOICE_FILENAME = "voice.txt"
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def _expand_prompt_includes(content: str) -> str:
|
| 19 |
+
"""Expand [<name>] placeholders with content from prompts library files.
|
| 20 |
+
|
| 21 |
+
Args:
|
| 22 |
+
content: The template content with [<name>] placeholders
|
| 23 |
+
|
| 24 |
+
Returns:
|
| 25 |
+
Expanded content with placeholders replaced by file contents
|
| 26 |
+
|
| 27 |
+
"""
|
| 28 |
+
# Pattern to match [<name>] where name is a valid file stem (alphanumeric, underscores, hyphens)
|
| 29 |
+
# pattern = re.compile(r'^\[([a-zA-Z0-9_-]+)\]$')
|
| 30 |
+
# Allow slashes for subdirectories
|
| 31 |
+
pattern = re.compile(r'^\[([a-zA-Z0-9/_-]+)\]$')
|
| 32 |
+
|
| 33 |
+
lines = content.split('\n')
|
| 34 |
+
expanded_lines = []
|
| 35 |
+
|
| 36 |
+
for line in lines:
|
| 37 |
+
stripped = line.strip()
|
| 38 |
+
match = pattern.match(stripped)
|
| 39 |
+
|
| 40 |
+
if match:
|
| 41 |
+
# Extract the name from [<name>]
|
| 42 |
+
template_name = match.group(1)
|
| 43 |
+
template_file = PROMPTS_LIBRARY_DIRECTORY / f"{template_name}.txt"
|
| 44 |
+
|
| 45 |
+
try:
|
| 46 |
+
if template_file.exists():
|
| 47 |
+
template_content = template_file.read_text(encoding="utf-8").rstrip()
|
| 48 |
+
expanded_lines.append(template_content)
|
| 49 |
+
logger.debug("Expanded template: [%s]", template_name)
|
| 50 |
+
else:
|
| 51 |
+
logger.warning("Template file not found: %s, keeping placeholder", template_file)
|
| 52 |
+
expanded_lines.append(line)
|
| 53 |
+
except Exception as e:
|
| 54 |
+
logger.warning("Failed to read template '%s': %s, keeping placeholder", template_name, e)
|
| 55 |
+
expanded_lines.append(line)
|
| 56 |
+
else:
|
| 57 |
+
expanded_lines.append(line)
|
| 58 |
+
|
| 59 |
+
return '\n'.join(expanded_lines)
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def get_session_instructions() -> str:
|
| 63 |
+
"""Get session instructions, loading from REACHY_MINI_CUSTOM_PROFILE if set."""
|
| 64 |
+
profile = config.REACHY_MINI_CUSTOM_PROFILE
|
| 65 |
+
if not profile:
|
| 66 |
+
logger.info(f"Loading default prompt from {PROMPTS_LIBRARY_DIRECTORY / 'default_prompt.txt'}")
|
| 67 |
+
instructions_file = PROMPTS_LIBRARY_DIRECTORY / "default_prompt.txt"
|
| 68 |
+
else:
|
| 69 |
+
logger.info(f"Loading prompt from profile '{profile}'")
|
| 70 |
+
instructions_file = PROFILES_DIRECTORY / profile / INSTRUCTIONS_FILENAME
|
| 71 |
+
|
| 72 |
+
try:
|
| 73 |
+
if instructions_file.exists():
|
| 74 |
+
instructions = instructions_file.read_text(encoding="utf-8").strip()
|
| 75 |
+
if instructions:
|
| 76 |
+
# Expand [<name>] placeholders with content from prompts library
|
| 77 |
+
expanded_instructions = _expand_prompt_includes(instructions)
|
| 78 |
+
return expanded_instructions
|
| 79 |
+
logger.error(f"Profile '{profile}' has empty {INSTRUCTIONS_FILENAME}")
|
| 80 |
+
sys.exit(1)
|
| 81 |
+
logger.error(f"Profile {profile} has no {INSTRUCTIONS_FILENAME}")
|
| 82 |
+
sys.exit(1)
|
| 83 |
+
except Exception as e:
|
| 84 |
+
logger.error(f"Failed to load instructions from profile '{profile}': {e}")
|
| 85 |
+
sys.exit(1)
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def get_session_voice(default: str = "cedar") -> str:
|
| 89 |
+
"""Resolve the voice to use for the session.
|
| 90 |
+
|
| 91 |
+
If a custom profile is selected and contains a voice.txt, return its
|
| 92 |
+
trimmed content; otherwise return the provided default ("cedar").
|
| 93 |
+
"""
|
| 94 |
+
profile = config.REACHY_MINI_CUSTOM_PROFILE
|
| 95 |
+
if not profile:
|
| 96 |
+
return default
|
| 97 |
+
try:
|
| 98 |
+
voice_file = PROFILES_DIRECTORY / profile / VOICE_FILENAME
|
| 99 |
+
if voice_file.exists():
|
| 100 |
+
voice = voice_file.read_text(encoding="utf-8").strip()
|
| 101 |
+
return voice or default
|
| 102 |
+
except Exception:
|
| 103 |
+
pass
|
| 104 |
+
return default
|