RemiFabre commited on
Commit
a238f2e
·
0 Parent(s):

Initial commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .env.example +15 -0
  2. .gitattributes +41 -0
  3. .gitignore +61 -0
  4. LICENSE +201 -0
  5. README.md +27 -0
  6. README_OLD.md +247 -0
  7. docs/assets/conversation_app_arch.svg +3 -0
  8. docs/assets/reachy_mini_dance.gif +3 -0
  9. docs/scheme.mmd +58 -0
  10. index.html +42 -0
  11. pyproject.toml +72 -0
  12. src/test_conv_pipe/__init__.py +1 -0
  13. src/test_conv_pipe/audio/__init__.py +1 -0
  14. src/test_conv_pipe/audio/head_wobbler.py +181 -0
  15. src/test_conv_pipe/audio/speech_tapper.py +268 -0
  16. src/test_conv_pipe/camera_worker.py +241 -0
  17. src/test_conv_pipe/config.py +80 -0
  18. src/test_conv_pipe/console.py +502 -0
  19. src/test_conv_pipe/dance_emotion_moves.py +154 -0
  20. src/test_conv_pipe/gradio_personality.py +316 -0
  21. src/test_conv_pipe/headless_personality.py +102 -0
  22. src/test_conv_pipe/headless_personality_ui.py +287 -0
  23. src/test_conv_pipe/images/reachymini_avatar.png +3 -0
  24. src/test_conv_pipe/images/user_avatar.png +3 -0
  25. src/test_conv_pipe/main.py +253 -0
  26. src/test_conv_pipe/moves.py +849 -0
  27. src/test_conv_pipe/openai_realtime.py +719 -0
  28. src/test_conv_pipe/profiles/__init__.py +1 -0
  29. src/test_conv_pipe/profiles/_test_conv_pipe_locked_profile/custom_tool.py +38 -0
  30. src/test_conv_pipe/profiles/_test_conv_pipe_locked_profile/instructions.txt +3 -0
  31. src/test_conv_pipe/profiles/_test_conv_pipe_locked_profile/sweep_look.py +127 -0
  32. src/test_conv_pipe/profiles/_test_conv_pipe_locked_profile/tools.txt +18 -0
  33. src/test_conv_pipe/prompts.py +104 -0
  34. src/test_conv_pipe/prompts/behaviors/silent_robot.txt +6 -0
  35. src/test_conv_pipe/prompts/default_prompt.txt +47 -0
  36. src/test_conv_pipe/prompts/identities/basic_info.txt +4 -0
  37. src/test_conv_pipe/prompts/identities/witty_identity.txt +4 -0
  38. src/test_conv_pipe/prompts/passion_for_lobster_jokes.txt +1 -0
  39. src/test_conv_pipe/static/index.html +54 -0
  40. src/test_conv_pipe/static/main.js +136 -0
  41. src/test_conv_pipe/static/style.css +210 -0
  42. src/test_conv_pipe/tools/__init__.py +4 -0
  43. src/test_conv_pipe/tools/camera.py +68 -0
  44. src/test_conv_pipe/tools/core_tools.py +224 -0
  45. src/test_conv_pipe/tools/dance.py +86 -0
  46. src/test_conv_pipe/tools/do_nothing.py +30 -0
  47. src/test_conv_pipe/tools/head_tracking.py +31 -0
  48. src/test_conv_pipe/tools/move_head.py +79 -0
  49. src/test_conv_pipe/tools/play_emotion.py +84 -0
  50. src/test_conv_pipe/tools/stop_dance.py +31 -0
.env.example ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ OPENAI_API_KEY=
2
+ MODEL_NAME="gpt-realtime"
3
+
4
+ # Local vision model (only used with --local-vision CLI flag)
5
+ # By default, vision is handled by gpt-realtime when the camera tool is used
6
+ LOCAL_VISION_MODEL=HuggingFaceTB/SmolVLM2-2.2B-Instruct
7
+
8
+ # Cache for local VLM (only used with --local-vision CLI flag)
9
+ HF_HOME=./cache
10
+
11
+ # Hugging Face token for accessing datasets/models
12
+ HF_TOKEN=
13
+
14
+ # To select a specific profile with custom instructions and tools, to be placed in profiles/<myprofile>/__init__.py
15
+ REACHY_MINI_CUSTOM_PROFILE="example"
.gitattributes ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Macro for all binary files that should use Git LFS.
2
+ [attr]lfs -text filter=lfs diff=lfs merge=lfs
3
+
4
+ # Image
5
+ *.jpg lfs
6
+ *.jpeg lfs
7
+ *.png lfs
8
+ *.apng lfs
9
+ *.atsc lfs
10
+ *.gif lfs
11
+ *.bmp lfs
12
+ *.exr lfs
13
+ *.tga lfs
14
+ *.tiff lfs
15
+ *.tif lfs
16
+ *.iff lfs
17
+ *.pict lfs
18
+ *.dds lfs
19
+ *.xcf lfs
20
+ *.leo lfs
21
+ *.kra lfs
22
+ *.kpp lfs
23
+ *.clip lfs
24
+ *.webm lfs
25
+ *.webp lfs
26
+ *.svg lfs
27
+ *.svgz lfs
28
+ *.psd lfs
29
+ *.afphoto lfs
30
+ *.afdesign lfs
31
+ # Models
32
+ *.pth lfs
33
+ # Binaries
34
+ *.bin lfs
35
+ *.pkl lfs
36
+ *.pckl lfs
37
+ # 3D
38
+ *.ply lfs
39
+ *.vis lfs
40
+ *.db lfs
41
+ *.ply lfs
.gitignore ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+
7
+ # Virtual environments
8
+ .venv/
9
+ venv/
10
+ ENV/
11
+ env/
12
+
13
+ # Environment variables
14
+ .env
15
+
16
+ # Build and distribution
17
+ build/
18
+ dist/
19
+ *.egg-info/
20
+ .eggs/
21
+
22
+ # Testing
23
+ .pytest_cache/
24
+ .coverage
25
+ .hypothesis/
26
+ htmlcov/
27
+ coverage.xml
28
+ *.cover
29
+
30
+ # Linting and formatting
31
+ .ruff_cache/
32
+ .mypy_cache/
33
+
34
+ # IDE
35
+ .vscode/
36
+ .idea/
37
+ *.swp
38
+ *.swo
39
+
40
+ # Security
41
+ *.key
42
+ *.pem
43
+ *.crt
44
+ *.csr
45
+
46
+ # Temporary files
47
+ tmp/
48
+ *.log
49
+ cache/
50
+
51
+ # macOS
52
+ .DS_Store
53
+
54
+ # Linux
55
+ *~
56
+ .directory
57
+ .Trash-*
58
+ .nfs*
59
+
60
+ # User-created personalities (managed by UI)
61
+ src/test_conv_pipe/profiles/user_personalities/
LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
README.md ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Test Conv Pipe
3
+ emoji: 🤖
4
+ colorFrom: purple
5
+ colorTo: gray
6
+ sdk: static
7
+ pinned: false
8
+ tags:
9
+ - reachy_mini
10
+ - reachy_mini_python_app
11
+ ---
12
+
13
+ # Test Conv Pipe
14
+
15
+ Forked from the Reachy Mini conversation app.
16
+
17
+ Use the `src/test_conv_pipe/profiles/_test_conv_pipe_locked_profile` folder to customize your own app from this template:
18
+ - Edit instructions `_test_conv_pipe_locked_profile/instructions.txt`
19
+ - Edit available tools in `_test_conv_pipe_locked_profile/tools.txt`
20
+ - You can create your own tools in `_test_conv_pipe_locked_profile` by subclassing the `Tool` class.
21
+
22
+ Do not forget to customize:
23
+ - this `README.md` file
24
+ - the `index.html` file (Hugging Face Spaces landing page)
25
+ - the `src/test_conv_pipe/static/index.html` (the web app parameters page)
26
+
27
+ The original README from the conversation app is available in `README_OLD.md`.
README_OLD.md ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Reachy Mini Conversation App
3
+ emoji: 🎤
4
+ colorFrom: red
5
+ colorTo: blue
6
+ sdk: static
7
+ pinned: false
8
+ short_description: Talk with Reachy Mini !
9
+ tags:
10
+ - reachy_mini
11
+ - reachy_mini_python_app
12
+ ---
13
+
14
+ # Reachy Mini conversation app
15
+
16
+ Conversational app for the Reachy Mini robot combining OpenAI's realtime APIs, vision pipelines, and choreographed motion libraries.
17
+
18
+ ![Reachy Mini Dance](docs/assets/reachy_mini_dance.gif)
19
+
20
+ ## Architecture
21
+
22
+ The app follows a layered architecture connecting the user, AI services, and robot hardware:
23
+
24
+ <p align="center">
25
+ <img src="docs/assets/conversation_app_arch.svg" alt="Architecture Diagram" width="600"/>
26
+ </p>
27
+
28
+ ## Overview
29
+ - Real-time audio conversation loop powered by the OpenAI realtime API and `fastrtc` for low-latency streaming.
30
+ - Vision processing uses gpt-realtime by default (when camera tool is used), with optional local vision processing using SmolVLM2 model running on-device (CPU/GPU/MPS) via `--local-vision` flag.
31
+ - Layered motion system queues primary moves (dances, emotions, goto poses, breathing) while blending speech-reactive wobble and face-tracking.
32
+ - Async tool dispatch integrates robot motion, camera capture, and optional face-tracking capabilities through a Gradio web UI with live transcripts.
33
+
34
+ ## Installation
35
+
36
+ > [!IMPORTANT]
37
+ > Before using this app, you need to install [Reachy Mini's SDK](https://github.com/pollen-robotics/reachy_mini/).<br>
38
+ > Windows support is currently experimental and has not been extensively tested. Use with caution.
39
+
40
+ ### Using uv
41
+ You can set up the project quickly using [uv](https://docs.astral.sh/uv/):
42
+
43
+ ```bash
44
+ # macOS (Homebrew)
45
+ uv venv --python /opt/homebrew/bin/python3.12 .venv
46
+
47
+ # Linux / Windows (Python in PATH)
48
+ uv venv --python python3.12 .venv
49
+
50
+ source .venv/bin/activate
51
+ uv sync
52
+ ```
53
+
54
+ > [!NOTE]
55
+ > To reproduce the exact dependency set from this repo's `uv.lock`, run `uv sync --frozen`. This ensures `uv` installs directly from the lockfile without re-resolving or updating any versions.
56
+
57
+ To include optional dependencies:
58
+ ```bash
59
+ uv sync --extra reachy_mini_wireless # For wireless Reachy Mini with GStreamer support
60
+ uv sync --extra local_vision # For local PyTorch/Transformers vision
61
+ uv sync --extra yolo_vision # For YOLO-based vision
62
+ uv sync --extra mediapipe_vision # For MediaPipe-based vision
63
+ uv sync --extra all_vision # For all vision features
64
+ ```
65
+
66
+ You can combine extras or include dev dependencies:
67
+ ```
68
+ uv sync --extra all_vision --group dev
69
+ ```
70
+
71
+ ### Using pip
72
+
73
+ ```bash
74
+ python -m venv .venv # Create a virtual environment
75
+ source .venv/bin/activate
76
+ pip install -e .
77
+ ```
78
+
79
+ Install optional extras depending on the feature set you need:
80
+
81
+ ```bash
82
+ # Wireless Reachy Mini support
83
+ pip install -e .[reachy_mini_wireless]
84
+
85
+ # Vision stacks (choose at least one if you plan to run face tracking)
86
+ pip install -e .[local_vision]
87
+ pip install -e .[yolo_vision]
88
+ pip install -e .[mediapipe_vision]
89
+ pip install -e .[all_vision] # installs every vision extra
90
+
91
+ # Tooling for development workflows
92
+ pip install -e .[dev]
93
+ ```
94
+
95
+ Some wheels (e.g. PyTorch) are large and require compatible CUDA or CPU builds—make sure your platform matches the binaries pulled in by each extra.
96
+
97
+ ## Optional dependency groups
98
+
99
+ | Extra | Purpose | Notes |
100
+ |-------|---------|-------|
101
+ | `reachy_mini_wireless` | Wireless Reachy Mini with GStreamer support. | Required for wireless versions of Reachy Mini, includes GStreamer dependencies.
102
+ | `local_vision` | Run the local VLM (SmolVLM2) through PyTorch/Transformers. | GPU recommended; ensure compatible PyTorch builds for your platform.
103
+ | `yolo_vision` | YOLOv8 tracking via `ultralytics` and `supervision`. | CPU friendly; supports the `--head-tracker yolo` option.
104
+ | `mediapipe_vision` | Lightweight landmark tracking with MediaPipe. | Works on CPU; enables `--head-tracker mediapipe`.
105
+ | `all_vision` | Convenience alias installing every vision extra. | Install when you want the flexibility to experiment with every provider.
106
+ | `dev` | Developer tooling (`pytest`, `ruff`, `mypy`). | Development-only dependencies. Use `--group dev` with uv or `[dev]` with pip.
107
+
108
+ **Note:** `dev` is a dependency group (not an optional dependency). With uv, use `--group dev`. With pip, use `[dev]`.
109
+
110
+ ## Configuration
111
+
112
+ 1. Copy `.env.example` to `.env`.
113
+ 2. Fill in the required values, notably the OpenAI API key.
114
+
115
+ | Variable | Description |
116
+ |----------|-------------|
117
+ | `OPENAI_API_KEY` | Required. Grants access to the OpenAI realtime endpoint.
118
+ | `MODEL_NAME` | Override the realtime model (defaults to `gpt-realtime`). Used for both conversation and vision (unless `--local-vision` flag is used).
119
+ | `HF_HOME` | Cache directory for local Hugging Face downloads (only used with `--local-vision` flag, defaults to `./cache`).
120
+ | `HF_TOKEN` | Optional token for Hugging Face models (only used with `--local-vision` flag, falls back to `huggingface-cli login`).
121
+ | `LOCAL_VISION_MODEL` | Hugging Face model path for local vision processing (only used with `--local-vision` flag, defaults to `HuggingFaceTB/SmolVLM2-2.2B-Instruct`).
122
+
123
+ ## Running the app
124
+
125
+ Activate your virtual environment, ensure the Reachy Mini robot (or simulator) is reachable, then launch:
126
+
127
+ ```bash
128
+ reachy-mini-conversation-app
129
+ ```
130
+
131
+ By default, the app runs in console mode for direct audio interaction. Use the `--gradio` flag to launch a web UI served locally at http://127.0.0.1:7860/ (required when running in simulation mode). With a camera attached, vision is handled by the gpt-realtime model when the camera tool is used. For local vision processing, use the `--local-vision` flag to process frames periodically using the SmolVLM2 model. Additionally, you can enable face tracking via YOLO or MediaPipe pipelines depending on the extras you installed.
132
+
133
+ ### CLI options
134
+
135
+ | Option | Default | Description |
136
+ |--------|---------|-------------|
137
+ | `--head-tracker {yolo,mediapipe}` | `None` | Select a face-tracking backend when a camera is available. YOLO is implemented locally, MediaPipe comes from the `reachy_mini_toolbox` package. Requires the matching optional extra. |
138
+ | `--no-camera` | `False` | Run without camera capture or face tracking. |
139
+ | `--local-vision` | `False` | Use local vision model (SmolVLM2) for periodic image processing instead of gpt-realtime vision. Requires `local_vision` extra to be installed. |
140
+ | `--gradio` | `False` | Launch the Gradio web UI. Without this flag, runs in console mode. Required when running in simulation mode. |
141
+ | `--debug` | `False` | Enable verbose logging for troubleshooting. |
142
+
143
+
144
+ ### Examples
145
+ - Run on hardware with MediaPipe face tracking:
146
+
147
+ ```bash
148
+ reachy-mini-conversation-app --head-tracker mediapipe
149
+ ```
150
+
151
+ - Run with local vision processing (requires `local_vision` extra):
152
+
153
+ ```bash
154
+ reachy-mini-conversation-app --local-vision
155
+ ```
156
+
157
+ - Disable the camera pipeline (audio-only conversation):
158
+
159
+ ```bash
160
+ reachy-mini-conversation-app --no-camera
161
+ ```
162
+
163
+ - Run with Gradio web interface:
164
+
165
+ ```bash
166
+ reachy-mini-conversation-app --gradio
167
+ ```
168
+
169
+ ### Troubleshooting
170
+
171
+ - Timeout error:
172
+ If you get an error like this:
173
+ ```bash
174
+ TimeoutError: Timeout while waiting for connection with the server.
175
+ ```
176
+ It probably means that the Reachy Mini's daemon isn't running. Install [Reachy Mini's SDK](https://github.com/pollen-robotics/reachy_mini/) and start the daemon.
177
+
178
+ ## LLM tools exposed to the assistant
179
+
180
+ | Tool | Action | Dependencies |
181
+ |------|--------|--------------|
182
+ | `move_head` | Queue a head pose change (left/right/up/down/front). | Core install only. |
183
+ | `camera` | Capture the latest camera frame and send it to gpt-realtime for vision analysis. | Requires camera worker; uses gpt-realtime vision by default. |
184
+ | `head_tracking` | Enable or disable face-tracking offsets (not facial recognition - only detects and tracks face position). | Camera worker with configured head tracker. |
185
+ | `dance` | Queue a dance from `reachy_mini_dances_library`. | Core install only. |
186
+ | `stop_dance` | Clear queued dances. | Core install only. |
187
+ | `play_emotion` | Play a recorded emotion clip via Hugging Face assets. | Needs `HF_TOKEN` for the recorded emotions dataset. |
188
+ | `stop_emotion` | Clear queued emotions. | Core install only. |
189
+ | `do_nothing` | Explicitly remain idle. | Core install only. |
190
+
191
+ ## Using custom profiles
192
+ Create custom profiles with dedicated instructions and enabled tools!
193
+
194
+ Set `REACHY_MINI_CUSTOM_PROFILE=<name>` to load `src/reachy_mini_conversation_app/profiles/<name>/` (see `.env.example`). If unset, the `default` profile is used.
195
+
196
+ Each profile requires two files: `instructions.txt` (prompt text) and `tools.txt` (list of allowed tools), and optionally contains custom tools implementations.
197
+
198
+ ### Custom instructions
199
+ Write plain-text prompts in `instructions.txt`. To reuse shared prompt pieces, add lines like:
200
+ ```
201
+ [passion_for_lobster_jokes]
202
+ [identities/witty_identity]
203
+ ```
204
+ Each placeholder pulls the matching file under `src/reachy_mini_conversation_app/prompts/` (nested paths allowed). See `src/reachy_mini_conversation_app/profiles/example/` for a reference layout.
205
+
206
+ ### Enabling tools
207
+ List enabled tools in `tools.txt`, one per line; prefix with `#` to comment out. For example:
208
+
209
+ ```
210
+ play_emotion
211
+ # move_head
212
+
213
+ # My custom tool defined locally
214
+ sweep_look
215
+ ```
216
+ Tools are resolved first from Python files in the profile folder (custom tools), then from the shared library `src/reachy_mini_conversation_app/tools/` (e.g., `dance`, `head_tracking`).
217
+
218
+ ### Custom tools
219
+ On top of built-in tools found in the shared library, you can implement custom tools specific to your profile by adding Python files in the profile folder.
220
+ Custom tools must subclass `reachy_mini_conversation_app.tools.core_tools.Tool` (see `profiles/example/sweep_look.py`).
221
+
222
+ ### Edit personalities from the UI
223
+ When running with `--gradio`, open the "Personality" accordion:
224
+ - Select among available profiles (folders under `src/reachy_mini_conversation_app/profiles/`) or the built‑in default.
225
+ - Click "Apply" to update the current session instructions live.
226
+ - Create a new personality by entering a name and instructions text; it stores files under `profiles/<name>/` and copies `tools.txt` from the `default` profile.
227
+
228
+ Note: The "Personality" panel updates the conversation instructions. Tool sets are loaded at startup from `tools.txt` and are not hot‑reloaded.
229
+
230
+ ### Locked profile mode
231
+
232
+ To create a locked variant of the app that cannot switch profiles, edit `src/reachy_mini_conversation_app/config.py` and set the `LOCKED_PROFILE` constant to the desired profile name:
233
+ ```python
234
+ LOCKED_PROFILE: str | None = "mars_rover" # Lock to this profile
235
+ ```
236
+ When `LOCKED_PROFILE` is set, the app always uses that profile, ignoring `REACHY_MINI_CUSTOM_PROFILE` env var & the Gradio UI shows "(locked)" and disables all profile editing controls.
237
+ This is useful for creating dedicated clones of the app with a fixed personality. Clone scripts can simply edit this constant to lock the variant.
238
+
239
+
240
+ ## Development workflow
241
+ - Install the dev group extras: `uv sync --group dev` or `pip install -e .[dev]`.
242
+ - Run formatting and linting: `ruff check .`.
243
+ - Execute the test suite: `pytest`.
244
+ - When iterating on robot motions, keep the control loop responsive => offload blocking work using the helpers in `tools.py`.
245
+
246
+ ## License
247
+ Apache 2.0
docs/assets/conversation_app_arch.svg ADDED

Git LFS Details

  • SHA256: 2d3251bc98d5a0bf1d41d0332b76e7e86496745b2a0999f228b7d8647dd453a2
  • Pointer size: 131 Bytes
  • Size of remote file: 122 kB
docs/assets/reachy_mini_dance.gif ADDED

Git LFS Details

  • SHA256: 75914c3cb7af982e0b1c6369e25fc46d8c08a0ab5ad022240ae9c1a0d93967c3
  • Pointer size: 132 Bytes
  • Size of remote file: 3.93 MB
docs/scheme.mmd ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ config:
3
+ layout: dagre
4
+ flowchart:
5
+ htmlLabels: true
6
+ ---
7
+ flowchart TB
8
+ User(["<span style='font-size:16px;font-weight:bold;'>User</span><br><span style='font-size:13px;color:#01579b;'>Person interacting with system</span>"])
9
+ -- audio stream -->
10
+ UI@{ label: "<span style='font-size:16px;font-weight:bold;'>UI Layer</span><br><span style='font-size:13px;color:#0277bd;'>Gradio/Console</span>" }
11
+
12
+ UI -- audio stream -->
13
+ OpenAI@{ label: "<span style='font-size:17px;font-weight:bold;'>gpt-realtime API</span><br><span style='font-size:13px; color:#7b1fa2;'>Audio+Tool Calls+Vision</span>" }
14
+
15
+ OpenAI -- audio stream -->
16
+ Motion@{ label: "<span style='font-size:16px;font-weight:bold;'>Motion Control</span><br><span style='font-size:13px;color:#f57f17;'>Audio Sync + Tracking</span>" }
17
+
18
+ OpenAI -- tool calls -->
19
+ Handlers@{ label: "<span style='font-size:16px;font-weight:bold;'>Tool Handlers</span><br><span style='font-size:12px;color:#f9a825;'>move_head, camera, head_tracking,<br/>dance, play_emotion, do_nothing</span>" }
20
+
21
+ Handlers -- movement
22
+ requests --> Motion
23
+
24
+ Handlers -- camera frames, face tracking -->
25
+ Camera@{ label: "<span style='font-size:16px;font-weight:bold;'>Camera Worker</span><br><span style='font-size:13px;color:#f57f17;'>Frame Buffer + Face Tracking</span>" }
26
+
27
+ Handlers -. image for
28
+ analysis .-> OpenAI
29
+
30
+ Camera -- face tracking --> Motion
31
+
32
+ Camera -. frames .->
33
+ Vision@{ label: "<span style='font-size:16px;font-weight:bold;'>Vision Processor</span><br><span style='font-size:13px;color:#7b1fa2;'>Local VLM (optional)</span>" }
34
+
35
+ Vision -. description .-> Handlers
36
+
37
+ Robot@{ label: "<span style='font-size:16px;font-weight:bold;'>reachy_mini</span><br><span style='font-size:13px;color:#c62828;'>Robot Control Library</span>" }
38
+ -- camera
39
+ frames --> Camera
40
+
41
+ Motion -- commands --> Robot
42
+
43
+ Handlers -- results --> OpenAI
44
+
45
+ User:::userStyle
46
+ UI:::uiStyle
47
+ OpenAI:::aiStyle
48
+ Motion:::coreStyle
49
+ Handlers:::toolStyle
50
+ Camera:::coreStyle
51
+ Vision:::aiStyle
52
+ Robot:::hardwareStyle
53
+ classDef userStyle fill:#e1f5fe,stroke:#01579b,stroke-width:3px
54
+ classDef uiStyle fill:#b3e5fc,stroke:#0277bd,stroke-width:2px
55
+ classDef aiStyle fill:#e1bee7,stroke:#7b1fa2,stroke-width:3px
56
+ classDef coreStyle fill:#fff9c4,stroke:#f57f17,stroke-width:2px
57
+ classDef hardwareStyle fill:#ef9a9a,stroke:#c62828,stroke-width:3px
58
+ classDef toolStyle fill:#fffde7,stroke:#f9a825,stroke-width:1px
index.html ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!doctype html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="utf-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1" />
6
+ <title>Test Conv Pipe</title>
7
+ <link rel="stylesheet" href="style.css" />
8
+ </head>
9
+ <body>
10
+ <div class="ambient"></div>
11
+ <div class="container">
12
+ <header class="hero">
13
+ <div class="pill">Reachy Mini App</div>
14
+ <h1>Test Conv Pipe</h1>
15
+ <p class="subtitle">A conversation app for Reachy Mini robot.</p>
16
+ </header>
17
+
18
+ <div class="panel">
19
+ <div class="panel-heading">
20
+ <div>
21
+ <p class="eyebrow">Getting Started</p>
22
+ <h2>Installation</h2>
23
+ </div>
24
+ </div>
25
+ <p class="muted">Install this app on your Reachy Mini using the app store, or run it locally:</p>
26
+ <pre><code>uv sync
27
+ reachy-mini-daemon --sim # in another terminal
28
+ python -m test_conv_pipe</code></pre>
29
+ </div>
30
+
31
+ <div class="panel">
32
+ <div class="panel-heading">
33
+ <div>
34
+ <p class="eyebrow">Configuration</p>
35
+ <h2>OpenAI API Key</h2>
36
+ </div>
37
+ </div>
38
+ <p class="muted">This app requires an OpenAI API key for voice conversations. Set it via the web interface or environment variable.</p>
39
+ </div>
40
+ </div>
41
+ </body>
42
+ </html>
pyproject.toml ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = [ "setuptools",]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "test_conv_pipe"
7
+ version = "0.2.2"
8
+ description = ""
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ dependencies = [ "aiortc>=1.13.0", "fastrtc>=0.0.34", "gradio==5.50.1.dev1", "huggingface-hub==1.3.0", "opencv-python>=4.12.0.88", "python-dotenv", "openai>=2.1", "reachy_mini_dances_library", "reachy_mini_toolbox", "reachy-mini >= 1.2.11", "eclipse-zenoh~=1.7.0", "gradio_client>=1.13.3",]
12
+ [[project.authors]]
13
+ name = "Pollen Robotics"
14
+ email = "contact@pollen-robotics.com"
15
+
16
+ [dependency-groups]
17
+ dev = [ "pytest", "pytest-asyncio", "ruff==0.12.0", "mypy==1.18.2", "pre-commit", "types-requests", "python-semantic-release>=10.5.3",]
18
+
19
+ [project.optional-dependencies]
20
+ reachy_mini_wireless = [ "PyGObject>=3.42.2,<=3.46.0", "gst-signalling>=1.1.2",]
21
+ local_vision = [ "torch>=2.1", "transformers==5.0.0rc2", "num2words",]
22
+ yolo_vision = [ "ultralytics", "supervision",]
23
+ mediapipe_vision = [ "mediapipe==0.10.14",]
24
+ all_vision = [ "torch>=2.1", "transformers==5.0.0rc2", "num2words", "ultralytics", "supervision", "mediapipe==0.10.14",]
25
+
26
+ [project.scripts]
27
+ test-conv-pipe = "test_conv_pipe.main:main"
28
+
29
+ [tool.setuptools]
30
+ include-package-data = true
31
+
32
+ [tool.ruff]
33
+ line-length = 119
34
+ exclude = [ ".venv", "dist", "build", "**/__pycache__", "*.egg-info", ".mypy_cache", ".pytest_cache",]
35
+
36
+ [tool.mypy]
37
+ python_version = "3.12"
38
+ files = [ "src/",]
39
+ ignore_missing_imports = true
40
+ strict = true
41
+ show_error_codes = true
42
+ warn_unused_ignores = true
43
+
44
+ [project.entry-points.reachy_mini_apps]
45
+ test_conv_pipe = "test_conv_pipe.main:TestConvPipe"
46
+
47
+ [tool.setuptools.package-dir]
48
+ "" = "src"
49
+
50
+ [tool.setuptools.package-data]
51
+ test_conv_pipe = [ "images/*", "static/*", ".env.example", "demos/**/*.txt", "prompts_library/*.txt", "profiles/**/*.txt", "prompts/**/*.txt",]
52
+
53
+ [tool.ruff.lint]
54
+ select = [ "E", "F", "W", "I", "C4", "D",]
55
+ ignore = [ "E501", "D100", "D203", "D213",]
56
+
57
+ [tool.ruff.format]
58
+ quote-style = "double"
59
+ indent-style = "space"
60
+ skip-magic-trailing-comma = false
61
+ line-ending = "auto"
62
+
63
+ [tool.setuptools.packages.find]
64
+ where = [ "src",]
65
+
66
+ [tool.ruff.lint.isort]
67
+ length-sort = true
68
+ lines-after-imports = 2
69
+ no-lines-before = [ "standard-library", "local-folder",]
70
+ known-local-folder = [ "test_conv_pipe",]
71
+ known-first-party = [ "reachy_mini", "reachy_mini_dances_library", "reachy_mini_toolbox",]
72
+ split-on-trailing-comma = true
src/test_conv_pipe/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Nothing (for ruff)."""
src/test_conv_pipe/audio/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Nothing (for ruff)."""
src/test_conv_pipe/audio/head_wobbler.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Moves head given audio samples."""
2
+
3
+ import time
4
+ import queue
5
+ import base64
6
+ import logging
7
+ import threading
8
+ from typing import Tuple
9
+ from collections.abc import Callable
10
+
11
+ import numpy as np
12
+ from numpy.typing import NDArray
13
+
14
+ from test_conv_pipe.audio.speech_tapper import HOP_MS, SwayRollRT
15
+
16
+
17
+ SAMPLE_RATE = 24000
18
+ MOVEMENT_LATENCY_S = 0.2 # seconds between audio and robot movement
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class HeadWobbler:
23
+ """Converts audio deltas (base64) into head movement offsets."""
24
+
25
+ def __init__(self, set_speech_offsets: Callable[[Tuple[float, float, float, float, float, float]], None]) -> None:
26
+ """Initialize the head wobbler."""
27
+ self._apply_offsets = set_speech_offsets
28
+ self._base_ts: float | None = None
29
+ self._hops_done: int = 0
30
+
31
+ self.audio_queue: "queue.Queue[Tuple[int, int, NDArray[np.int16]]]" = queue.Queue()
32
+ self.sway = SwayRollRT()
33
+
34
+ # Synchronization primitives
35
+ self._state_lock = threading.Lock()
36
+ self._sway_lock = threading.Lock()
37
+ self._generation = 0
38
+
39
+ self._stop_event = threading.Event()
40
+ self._thread: threading.Thread | None = None
41
+
42
+ def feed(self, delta_b64: str) -> None:
43
+ """Thread-safe: push audio into the consumer queue."""
44
+ buf = np.frombuffer(base64.b64decode(delta_b64), dtype=np.int16).reshape(1, -1)
45
+ with self._state_lock:
46
+ generation = self._generation
47
+ self.audio_queue.put((generation, SAMPLE_RATE, buf))
48
+
49
+ def start(self) -> None:
50
+ """Start the head wobbler loop in a thread."""
51
+ self._stop_event.clear()
52
+ self._thread = threading.Thread(target=self.working_loop, daemon=True)
53
+ self._thread.start()
54
+ logger.debug("Head wobbler started")
55
+
56
+ def stop(self) -> None:
57
+ """Stop the head wobbler loop."""
58
+ self._stop_event.set()
59
+ if self._thread is not None:
60
+ self._thread.join()
61
+ logger.debug("Head wobbler stopped")
62
+
63
+ def working_loop(self) -> None:
64
+ """Convert audio deltas into head movement offsets."""
65
+ hop_dt = HOP_MS / 1000.0
66
+
67
+ logger.debug("Head wobbler thread started")
68
+ while not self._stop_event.is_set():
69
+ queue_ref = self.audio_queue
70
+ try:
71
+ chunk_generation, sr, chunk = queue_ref.get_nowait() # (gen, sr, data)
72
+ except queue.Empty:
73
+ # avoid while to never exit
74
+ time.sleep(MOVEMENT_LATENCY_S)
75
+ continue
76
+
77
+ try:
78
+ with self._state_lock:
79
+ current_generation = self._generation
80
+ if chunk_generation != current_generation:
81
+ continue
82
+
83
+ if self._base_ts is None:
84
+ with self._state_lock:
85
+ if self._base_ts is None:
86
+ self._base_ts = time.monotonic()
87
+
88
+ pcm = np.asarray(chunk).squeeze(0)
89
+ with self._sway_lock:
90
+ results = self.sway.feed(pcm, sr)
91
+
92
+ i = 0
93
+ while i < len(results):
94
+ with self._state_lock:
95
+ if self._generation != current_generation:
96
+ break
97
+ base_ts = self._base_ts
98
+ hops_done = self._hops_done
99
+
100
+ if base_ts is None:
101
+ base_ts = time.monotonic()
102
+ with self._state_lock:
103
+ if self._base_ts is None:
104
+ self._base_ts = base_ts
105
+ hops_done = self._hops_done
106
+
107
+ target = base_ts + MOVEMENT_LATENCY_S + hops_done * hop_dt
108
+ now = time.monotonic()
109
+
110
+ if now - target >= hop_dt:
111
+ lag_hops = int((now - target) / hop_dt)
112
+ drop = min(lag_hops, len(results) - i - 1)
113
+ if drop > 0:
114
+ with self._state_lock:
115
+ self._hops_done += drop
116
+ hops_done = self._hops_done
117
+ i += drop
118
+ continue
119
+
120
+ if target > now:
121
+ time.sleep(target - now)
122
+ with self._state_lock:
123
+ if self._generation != current_generation:
124
+ break
125
+
126
+ r = results[i]
127
+ offsets = (
128
+ r["x_mm"] / 1000.0,
129
+ r["y_mm"] / 1000.0,
130
+ r["z_mm"] / 1000.0,
131
+ r["roll_rad"],
132
+ r["pitch_rad"],
133
+ r["yaw_rad"],
134
+ )
135
+
136
+ with self._state_lock:
137
+ if self._generation != current_generation:
138
+ break
139
+
140
+ self._apply_offsets(offsets)
141
+
142
+ with self._state_lock:
143
+ self._hops_done += 1
144
+ i += 1
145
+ finally:
146
+ queue_ref.task_done()
147
+ logger.debug("Head wobbler thread exited")
148
+
149
+ '''
150
+ def drain_audio_queue(self) -> None:
151
+ """Empty the audio queue."""
152
+ try:
153
+ while True:
154
+ self.audio_queue.get_nowait()
155
+ except QueueEmpty:
156
+ pass
157
+ '''
158
+
159
+ def reset(self) -> None:
160
+ """Reset the internal state."""
161
+ with self._state_lock:
162
+ self._generation += 1
163
+ self._base_ts = None
164
+ self._hops_done = 0
165
+
166
+ # Drain any queued audio chunks from previous generations
167
+ drained_any = False
168
+ while True:
169
+ try:
170
+ _, _, _ = self.audio_queue.get_nowait()
171
+ except queue.Empty:
172
+ break
173
+ else:
174
+ drained_any = True
175
+ self.audio_queue.task_done()
176
+
177
+ with self._sway_lock:
178
+ self.sway.reset()
179
+
180
+ if drained_any:
181
+ logger.debug("Head wobbler queue drained during reset")
src/test_conv_pipe/audio/speech_tapper.py ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import math
3
+ from typing import Any, Dict, List
4
+ from itertools import islice
5
+ from collections import deque
6
+
7
+ import numpy as np
8
+ from numpy.typing import NDArray
9
+
10
+
11
+ # Tunables
12
+ SR = 16_000
13
+ FRAME_MS = 20
14
+ HOP_MS = 50
15
+
16
+ SWAY_MASTER = 1.5
17
+ SENS_DB_OFFSET = +4.0
18
+ VAD_DB_ON = -35.0
19
+ VAD_DB_OFF = -45.0
20
+ VAD_ATTACK_MS = 40
21
+ VAD_RELEASE_MS = 250
22
+ ENV_FOLLOW_GAIN = 0.65
23
+
24
+ SWAY_F_PITCH = 2.2
25
+ SWAY_A_PITCH_DEG = 4.5
26
+ SWAY_F_YAW = 0.6
27
+ SWAY_A_YAW_DEG = 7.5
28
+ SWAY_F_ROLL = 1.3
29
+ SWAY_A_ROLL_DEG = 2.25
30
+ SWAY_F_X = 0.35
31
+ SWAY_A_X_MM = 4.5
32
+ SWAY_F_Y = 0.45
33
+ SWAY_A_Y_MM = 3.75
34
+ SWAY_F_Z = 0.25
35
+ SWAY_A_Z_MM = 2.25
36
+
37
+ SWAY_DB_LOW = -46.0
38
+ SWAY_DB_HIGH = -18.0
39
+ LOUDNESS_GAMMA = 0.9
40
+ SWAY_ATTACK_MS = 50
41
+ SWAY_RELEASE_MS = 250
42
+
43
+ # Derived
44
+ FRAME = int(SR * FRAME_MS / 1000)
45
+ HOP = int(SR * HOP_MS / 1000)
46
+ ATTACK_FR = max(1, int(VAD_ATTACK_MS / HOP_MS))
47
+ RELEASE_FR = max(1, int(VAD_RELEASE_MS / HOP_MS))
48
+ SWAY_ATTACK_FR = max(1, int(SWAY_ATTACK_MS / HOP_MS))
49
+ SWAY_RELEASE_FR = max(1, int(SWAY_RELEASE_MS / HOP_MS))
50
+
51
+
52
+ def _rms_dbfs(x: NDArray[np.float32]) -> float:
53
+ """Root-mean-square in dBFS for float32 mono array in [-1,1]."""
54
+ # numerically stable rms (avoid overflow)
55
+ x = x.astype(np.float32, copy=False)
56
+ rms = np.sqrt(np.mean(x * x, dtype=np.float32) + 1e-12, dtype=np.float32)
57
+ return float(20.0 * math.log10(float(rms) + 1e-12))
58
+
59
+
60
+ def _loudness_gain(db: float, offset: float = SENS_DB_OFFSET) -> float:
61
+ """Normalize dB into [0,1] with gamma; clipped to [0,1]."""
62
+ t = (db + offset - SWAY_DB_LOW) / (SWAY_DB_HIGH - SWAY_DB_LOW)
63
+ if t < 0.0:
64
+ t = 0.0
65
+ elif t > 1.0:
66
+ t = 1.0
67
+ return t**LOUDNESS_GAMMA if LOUDNESS_GAMMA != 1.0 else t
68
+
69
+
70
+ def _to_float32_mono(x: NDArray[Any]) -> NDArray[np.float32]:
71
+ """Convert arbitrary PCM array to float32 mono in [-1,1].
72
+
73
+ Accepts shapes: (N,), (1,N), (N,1), (C,N), (N,C).
74
+ """
75
+ a = np.asarray(x)
76
+ if a.ndim == 0:
77
+ return np.zeros(0, dtype=np.float32)
78
+
79
+ # If 2D, decide which axis is channels (prefer small first dim)
80
+ if a.ndim == 2:
81
+ # e.g., (channels, samples) if channels is small (<=8)
82
+ if a.shape[0] <= 8 and a.shape[0] <= a.shape[1]:
83
+ a = np.mean(a, axis=0)
84
+ else:
85
+ a = np.mean(a, axis=1)
86
+ elif a.ndim > 2:
87
+ a = np.mean(a.reshape(a.shape[0], -1), axis=0)
88
+
89
+ # Now 1D, cast/scale
90
+ if np.issubdtype(a.dtype, np.floating):
91
+ return a.astype(np.float32, copy=False)
92
+ # integer PCM
93
+ info = np.iinfo(a.dtype)
94
+ scale = float(max(-info.min, info.max))
95
+ return a.astype(np.float32) / (scale if scale != 0.0 else 1.0)
96
+
97
+
98
+ def _resample_linear(x: NDArray[np.float32], sr_in: int, sr_out: int) -> NDArray[np.float32]:
99
+ """Lightweight linear resampler for short buffers."""
100
+ if sr_in == sr_out or x.size == 0:
101
+ return x
102
+ # guard tiny sizes
103
+ n_out = int(round(x.size * sr_out / sr_in))
104
+ if n_out <= 1:
105
+ return np.zeros(0, dtype=np.float32)
106
+ t_in = np.linspace(0.0, 1.0, num=x.size, dtype=np.float32, endpoint=True)
107
+ t_out = np.linspace(0.0, 1.0, num=n_out, dtype=np.float32, endpoint=True)
108
+ return np.interp(t_out, t_in, x).astype(np.float32, copy=False)
109
+
110
+
111
+ class SwayRollRT:
112
+ """Feed audio chunks → per-hop sway outputs.
113
+
114
+ Usage:
115
+ rt = SwayRollRT()
116
+ rt.feed(pcm_int16_or_float, sr) -> List[dict]
117
+ """
118
+
119
+ def __init__(self, rng_seed: int = 7):
120
+ """Initialize state."""
121
+ self._seed = int(rng_seed)
122
+ self.samples: deque[float] = deque(maxlen=10 * SR) # sliding window for VAD/env
123
+ self.carry: NDArray[np.float32] = np.zeros(0, dtype=np.float32)
124
+
125
+ self.vad_on = False
126
+ self.vad_above = 0
127
+ self.vad_below = 0
128
+
129
+ self.sway_env = 0.0
130
+ self.sway_up = 0
131
+ self.sway_down = 0
132
+
133
+ rng = np.random.default_rng(self._seed)
134
+ self.phase_pitch = float(rng.random() * 2 * math.pi)
135
+ self.phase_yaw = float(rng.random() * 2 * math.pi)
136
+ self.phase_roll = float(rng.random() * 2 * math.pi)
137
+ self.phase_x = float(rng.random() * 2 * math.pi)
138
+ self.phase_y = float(rng.random() * 2 * math.pi)
139
+ self.phase_z = float(rng.random() * 2 * math.pi)
140
+ self.t = 0.0
141
+
142
+ def reset(self) -> None:
143
+ """Reset state (VAD/env/buffers/time) but keep initial phases/seed."""
144
+ self.samples.clear()
145
+ self.carry = np.zeros(0, dtype=np.float32)
146
+ self.vad_on = False
147
+ self.vad_above = 0
148
+ self.vad_below = 0
149
+ self.sway_env = 0.0
150
+ self.sway_up = 0
151
+ self.sway_down = 0
152
+ self.t = 0.0
153
+
154
+ def feed(self, pcm: NDArray[Any], sr: int | None) -> List[Dict[str, float]]:
155
+ """Stream in PCM chunk. Returns a list of sway dicts, one per hop (HOP_MS).
156
+
157
+ Args:
158
+ pcm: np.ndarray, shape (N,) or (C,N)/(N,C); int or float.
159
+ sr: sample rate of `pcm` (None -> assume SR).
160
+
161
+ """
162
+ sr_in = SR if sr is None else int(sr)
163
+ x = _to_float32_mono(pcm)
164
+ if x.size == 0:
165
+ return []
166
+ if sr_in != SR:
167
+ x = _resample_linear(x, sr_in, SR)
168
+ if x.size == 0:
169
+ return []
170
+
171
+ # append to carry and consume fixed HOP chunks
172
+ if self.carry.size:
173
+ self.carry = np.concatenate([self.carry, x])
174
+ else:
175
+ self.carry = x
176
+
177
+ out: List[Dict[str, float]] = []
178
+
179
+ while self.carry.size >= HOP:
180
+ hop = self.carry[:HOP]
181
+ remaining: NDArray[np.float32] = self.carry[HOP:]
182
+ self.carry = remaining
183
+
184
+ # keep sliding window for VAD/env computation
185
+ # (deque accepts any iterable; list() for small HOP is fine)
186
+ self.samples.extend(hop.tolist())
187
+ if len(self.samples) < FRAME:
188
+ self.t += HOP_MS / 1000.0
189
+ continue
190
+
191
+ frame = np.fromiter(
192
+ islice(self.samples, len(self.samples) - FRAME, len(self.samples)),
193
+ dtype=np.float32,
194
+ count=FRAME,
195
+ )
196
+ db = _rms_dbfs(frame)
197
+
198
+ # VAD with hysteresis + attack/release
199
+ if db >= VAD_DB_ON:
200
+ self.vad_above += 1
201
+ self.vad_below = 0
202
+ if not self.vad_on and self.vad_above >= ATTACK_FR:
203
+ self.vad_on = True
204
+ elif db <= VAD_DB_OFF:
205
+ self.vad_below += 1
206
+ self.vad_above = 0
207
+ if self.vad_on and self.vad_below >= RELEASE_FR:
208
+ self.vad_on = False
209
+
210
+ if self.vad_on:
211
+ self.sway_up = min(SWAY_ATTACK_FR, self.sway_up + 1)
212
+ self.sway_down = 0
213
+ else:
214
+ self.sway_down = min(SWAY_RELEASE_FR, self.sway_down + 1)
215
+ self.sway_up = 0
216
+
217
+ up = self.sway_up / SWAY_ATTACK_FR
218
+ down = 1.0 - (self.sway_down / SWAY_RELEASE_FR)
219
+ target = up if self.vad_on else down
220
+ self.sway_env += ENV_FOLLOW_GAIN * (target - self.sway_env)
221
+ # clamp
222
+ if self.sway_env < 0.0:
223
+ self.sway_env = 0.0
224
+ elif self.sway_env > 1.0:
225
+ self.sway_env = 1.0
226
+
227
+ loud = _loudness_gain(db) * SWAY_MASTER
228
+ env = self.sway_env
229
+ self.t += HOP_MS / 1000.0
230
+
231
+ # oscillators
232
+ pitch = (
233
+ math.radians(SWAY_A_PITCH_DEG)
234
+ * loud
235
+ * env
236
+ * math.sin(2 * math.pi * SWAY_F_PITCH * self.t + self.phase_pitch)
237
+ )
238
+ yaw = (
239
+ math.radians(SWAY_A_YAW_DEG)
240
+ * loud
241
+ * env
242
+ * math.sin(2 * math.pi * SWAY_F_YAW * self.t + self.phase_yaw)
243
+ )
244
+ roll = (
245
+ math.radians(SWAY_A_ROLL_DEG)
246
+ * loud
247
+ * env
248
+ * math.sin(2 * math.pi * SWAY_F_ROLL * self.t + self.phase_roll)
249
+ )
250
+ x_mm = SWAY_A_X_MM * loud * env * math.sin(2 * math.pi * SWAY_F_X * self.t + self.phase_x)
251
+ y_mm = SWAY_A_Y_MM * loud * env * math.sin(2 * math.pi * SWAY_F_Y * self.t + self.phase_y)
252
+ z_mm = SWAY_A_Z_MM * loud * env * math.sin(2 * math.pi * SWAY_F_Z * self.t + self.phase_z)
253
+
254
+ out.append(
255
+ {
256
+ "pitch_rad": pitch,
257
+ "yaw_rad": yaw,
258
+ "roll_rad": roll,
259
+ "pitch_deg": math.degrees(pitch),
260
+ "yaw_deg": math.degrees(yaw),
261
+ "roll_deg": math.degrees(roll),
262
+ "x_mm": x_mm,
263
+ "y_mm": y_mm,
264
+ "z_mm": z_mm,
265
+ },
266
+ )
267
+
268
+ return out
src/test_conv_pipe/camera_worker.py ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Camera worker thread with frame buffering and face tracking.
2
+
3
+ Ported from main_works.py camera_worker() function to provide:
4
+ - 30Hz+ camera polling with thread-safe frame buffering
5
+ - Face tracking integration with smooth interpolation
6
+ - Latest frame always available for tools
7
+ """
8
+
9
+ import time
10
+ import logging
11
+ import threading
12
+ from typing import Any, List, Tuple
13
+
14
+ import numpy as np
15
+ from numpy.typing import NDArray
16
+ from scipy.spatial.transform import Rotation as R
17
+
18
+ from reachy_mini import ReachyMini
19
+ from reachy_mini.utils.interpolation import linear_pose_interpolation
20
+
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ class CameraWorker:
26
+ """Thread-safe camera worker with frame buffering and face tracking."""
27
+
28
+ def __init__(self, reachy_mini: ReachyMini, head_tracker: Any = None) -> None:
29
+ """Initialize."""
30
+ self.reachy_mini = reachy_mini
31
+ self.head_tracker = head_tracker
32
+
33
+ # Thread-safe frame storage
34
+ self.latest_frame: NDArray[np.uint8] | None = None
35
+ self.frame_lock = threading.Lock()
36
+ self._stop_event = threading.Event()
37
+ self._thread: threading.Thread | None = None
38
+
39
+ # Face tracking state
40
+ self.is_head_tracking_enabled = True
41
+ self.face_tracking_offsets: List[float] = [
42
+ 0.0,
43
+ 0.0,
44
+ 0.0,
45
+ 0.0,
46
+ 0.0,
47
+ 0.0,
48
+ ] # x, y, z, roll, pitch, yaw
49
+ self.face_tracking_lock = threading.Lock()
50
+
51
+ # Face tracking timing variables (same as main_works.py)
52
+ self.last_face_detected_time: float | None = None
53
+ self.interpolation_start_time: float | None = None
54
+ self.interpolation_start_pose: NDArray[np.float32] | None = None
55
+ self.face_lost_delay = 2.0 # seconds to wait before starting interpolation
56
+ self.interpolation_duration = 1.0 # seconds to interpolate back to neutral
57
+
58
+ # Track state changes
59
+ self.previous_head_tracking_state = self.is_head_tracking_enabled
60
+
61
+ def get_latest_frame(self) -> NDArray[np.uint8] | None:
62
+ """Get the latest frame (thread-safe)."""
63
+ with self.frame_lock:
64
+ if self.latest_frame is None:
65
+ return None
66
+ # Return a copy in original BGR format (OpenCV native)
67
+ return self.latest_frame.copy()
68
+
69
+ def get_face_tracking_offsets(
70
+ self,
71
+ ) -> Tuple[float, float, float, float, float, float]:
72
+ """Get current face tracking offsets (thread-safe)."""
73
+ with self.face_tracking_lock:
74
+ offsets = self.face_tracking_offsets
75
+ return (offsets[0], offsets[1], offsets[2], offsets[3], offsets[4], offsets[5])
76
+
77
+ def set_head_tracking_enabled(self, enabled: bool) -> None:
78
+ """Enable/disable head tracking."""
79
+ self.is_head_tracking_enabled = enabled
80
+ logger.info(f"Head tracking {'enabled' if enabled else 'disabled'}")
81
+
82
+ def start(self) -> None:
83
+ """Start the camera worker loop in a thread."""
84
+ self._stop_event.clear()
85
+ self._thread = threading.Thread(target=self.working_loop, daemon=True)
86
+ self._thread.start()
87
+ logger.debug("Camera worker started")
88
+
89
+ def stop(self) -> None:
90
+ """Stop the camera worker loop."""
91
+ self._stop_event.set()
92
+ if self._thread is not None:
93
+ self._thread.join()
94
+
95
+ logger.debug("Camera worker stopped")
96
+
97
+ def working_loop(self) -> None:
98
+ """Enable the camera worker loop.
99
+
100
+ Ported from main_works.py camera_worker() with same logic.
101
+ """
102
+ logger.debug("Starting camera working loop")
103
+
104
+ # Initialize head tracker if available
105
+ neutral_pose = np.eye(4) # Neutral pose (identity matrix)
106
+ self.previous_head_tracking_state = self.is_head_tracking_enabled
107
+
108
+ while not self._stop_event.is_set():
109
+ try:
110
+ current_time = time.time()
111
+
112
+ # Get frame from robot
113
+ frame = self.reachy_mini.media.get_frame()
114
+
115
+ if frame is not None:
116
+ # Thread-safe frame storage
117
+ with self.frame_lock:
118
+ self.latest_frame = frame # .copy()
119
+
120
+ # Check if face tracking was just disabled
121
+ if self.previous_head_tracking_state and not self.is_head_tracking_enabled:
122
+ # Face tracking was just disabled - start interpolation to neutral
123
+ self.last_face_detected_time = current_time # Trigger the face-lost logic
124
+ self.interpolation_start_time = None # Will be set by the face-lost interpolation
125
+ self.interpolation_start_pose = None
126
+
127
+ # Update tracking state
128
+ self.previous_head_tracking_state = self.is_head_tracking_enabled
129
+
130
+ # Handle face tracking if enabled and head tracker available
131
+ if self.is_head_tracking_enabled and self.head_tracker is not None:
132
+ eye_center, _ = self.head_tracker.get_head_position(frame)
133
+
134
+ if eye_center is not None:
135
+ # Face detected - immediately switch to tracking
136
+ self.last_face_detected_time = current_time
137
+ self.interpolation_start_time = None # Stop any interpolation
138
+
139
+ # Convert normalized coordinates to pixel coordinates
140
+ h, w, _ = frame.shape
141
+ eye_center_norm = (eye_center + 1) / 2
142
+ eye_center_pixels = [
143
+ eye_center_norm[0] * w,
144
+ eye_center_norm[1] * h,
145
+ ]
146
+
147
+ # Get the head pose needed to look at the target, but don't perform movement
148
+ target_pose = self.reachy_mini.look_at_image(
149
+ eye_center_pixels[0],
150
+ eye_center_pixels[1],
151
+ duration=0.0,
152
+ perform_movement=False,
153
+ )
154
+
155
+ # Extract translation and rotation from the target pose directly
156
+ translation = target_pose[:3, 3]
157
+ rotation = R.from_matrix(target_pose[:3, :3]).as_euler("xyz", degrees=False)
158
+
159
+ # Scale down translation and rotation because smaller FOV
160
+ translation *= 0.6
161
+ rotation *= 0.6
162
+
163
+ # Thread-safe update of face tracking offsets (use pose as-is)
164
+ with self.face_tracking_lock:
165
+ self.face_tracking_offsets = [
166
+ translation[0],
167
+ translation[1],
168
+ translation[2], # x, y, z
169
+ rotation[0],
170
+ rotation[1],
171
+ rotation[2], # roll, pitch, yaw
172
+ ]
173
+
174
+ # No face detected while tracking enabled - set face lost timestamp
175
+ elif self.last_face_detected_time is None or self.last_face_detected_time == current_time:
176
+ # Only update if we haven't already set a face lost time
177
+ # (current_time check prevents overriding the disable-triggered timestamp)
178
+ pass
179
+
180
+ # Handle smooth interpolation (works for both face-lost and tracking-disabled cases)
181
+ if self.last_face_detected_time is not None:
182
+ time_since_face_lost = current_time - self.last_face_detected_time
183
+
184
+ if time_since_face_lost >= self.face_lost_delay:
185
+ # Start interpolation if not already started
186
+ if self.interpolation_start_time is None:
187
+ self.interpolation_start_time = current_time
188
+ # Capture current pose as start of interpolation
189
+ with self.face_tracking_lock:
190
+ current_translation = self.face_tracking_offsets[:3]
191
+ current_rotation_euler = self.face_tracking_offsets[3:]
192
+ # Convert to 4x4 pose matrix
193
+ pose_matrix = np.eye(4, dtype=np.float32)
194
+ pose_matrix[:3, 3] = current_translation
195
+ pose_matrix[:3, :3] = R.from_euler(
196
+ "xyz",
197
+ current_rotation_euler,
198
+ ).as_matrix()
199
+ self.interpolation_start_pose = pose_matrix
200
+
201
+ # Calculate interpolation progress (t from 0 to 1)
202
+ elapsed_interpolation = current_time - self.interpolation_start_time
203
+ t = min(1.0, elapsed_interpolation / self.interpolation_duration)
204
+
205
+ # Interpolate between current pose and neutral pose
206
+ interpolated_pose = linear_pose_interpolation(
207
+ self.interpolation_start_pose,
208
+ neutral_pose,
209
+ t,
210
+ )
211
+
212
+ # Extract translation and rotation from interpolated pose
213
+ translation = interpolated_pose[:3, 3]
214
+ rotation = R.from_matrix(interpolated_pose[:3, :3]).as_euler("xyz", degrees=False)
215
+
216
+ # Thread-safe update of face tracking offsets
217
+ with self.face_tracking_lock:
218
+ self.face_tracking_offsets = [
219
+ translation[0],
220
+ translation[1],
221
+ translation[2], # x, y, z
222
+ rotation[0],
223
+ rotation[1],
224
+ rotation[2], # roll, pitch, yaw
225
+ ]
226
+
227
+ # If interpolation is complete, reset timing
228
+ if t >= 1.0:
229
+ self.last_face_detected_time = None
230
+ self.interpolation_start_time = None
231
+ self.interpolation_start_pose = None
232
+ # else: Keep current offsets (within 2s delay period)
233
+
234
+ # Small sleep to prevent excessive CPU usage (same as main_works.py)
235
+ time.sleep(0.04)
236
+
237
+ except Exception as e:
238
+ logger.error(f"Camera worker error: {e}")
239
+ time.sleep(0.1) # Longer sleep on error
240
+
241
+ logger.debug("Camera worker thread exited")
src/test_conv_pipe/config.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import logging
4
+ from pathlib import Path
5
+
6
+ from dotenv import find_dotenv, load_dotenv
7
+
8
+
9
+ # Locked profile: set to a profile name (e.g., "astronomer") to lock the app
10
+ # to that profile and disable all profile switching. Leave as None for normal behavior.
11
+ LOCKED_PROFILE: str | None = "_test_conv_pipe_locked_profile"
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ # Validate LOCKED_PROFILE at startup
16
+ if LOCKED_PROFILE is not None:
17
+ _profiles_dir = Path(__file__).parent / "profiles"
18
+ _profile_path = _profiles_dir / LOCKED_PROFILE
19
+ _instructions_file = _profile_path / "instructions.txt"
20
+ if not _profile_path.is_dir():
21
+ print(f"Error: LOCKED_PROFILE '{LOCKED_PROFILE}' does not exist in {_profiles_dir}", file=sys.stderr)
22
+ sys.exit(1)
23
+ if not _instructions_file.is_file():
24
+ print(f"Error: LOCKED_PROFILE '{LOCKED_PROFILE}' has no instructions.txt", file=sys.stderr)
25
+ sys.exit(1)
26
+
27
+ # Locate .env file (search upward from current working directory)
28
+ dotenv_path = find_dotenv(usecwd=True)
29
+
30
+ if dotenv_path:
31
+ # Load .env and override environment variables
32
+ load_dotenv(dotenv_path=dotenv_path, override=True)
33
+ logger.info(f"Configuration loaded from {dotenv_path}")
34
+ else:
35
+ logger.warning("No .env file found, using environment variables")
36
+
37
+
38
+ class Config:
39
+ """Configuration class for the conversation app."""
40
+
41
+ # Required
42
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") # The key is downloaded in console.py if needed
43
+
44
+ # Optional
45
+ MODEL_NAME = os.getenv("MODEL_NAME", "gpt-realtime")
46
+ HF_HOME = os.getenv("HF_HOME", "./cache")
47
+ LOCAL_VISION_MODEL = os.getenv("LOCAL_VISION_MODEL", "HuggingFaceTB/SmolVLM2-2.2B-Instruct")
48
+ HF_TOKEN = os.getenv("HF_TOKEN") # Optional, falls back to hf auth login if not set
49
+
50
+ logger.debug(f"Model: {MODEL_NAME}, HF_HOME: {HF_HOME}, Vision Model: {LOCAL_VISION_MODEL}")
51
+
52
+ REACHY_MINI_CUSTOM_PROFILE = LOCKED_PROFILE or os.getenv("REACHY_MINI_CUSTOM_PROFILE")
53
+ logger.debug(f"Custom Profile: {REACHY_MINI_CUSTOM_PROFILE}")
54
+
55
+
56
+ config = Config()
57
+
58
+
59
+ def set_custom_profile(profile: str | None) -> None:
60
+ """Update the selected custom profile at runtime and expose it via env.
61
+
62
+ This ensures modules that read `config` and code that inspects the
63
+ environment see a consistent value.
64
+ """
65
+ if LOCKED_PROFILE is not None:
66
+ return
67
+ try:
68
+ config.REACHY_MINI_CUSTOM_PROFILE = profile
69
+ except Exception:
70
+ pass
71
+ try:
72
+ import os as _os
73
+
74
+ if profile:
75
+ _os.environ["REACHY_MINI_CUSTOM_PROFILE"] = profile
76
+ else:
77
+ # Remove to reflect default
78
+ _os.environ.pop("REACHY_MINI_CUSTOM_PROFILE", None)
79
+ except Exception:
80
+ pass
src/test_conv_pipe/console.py ADDED
@@ -0,0 +1,502 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Bidirectional local audio stream with optional settings UI.
2
+
3
+ In headless mode, there is no Gradio UI. If the OpenAI API key is not
4
+ available via environment/.env, we expose a minimal settings page via the
5
+ Reachy Mini Apps settings server to let non-technical users enter it.
6
+
7
+ The settings UI is served from this package's ``static/`` folder and offers a
8
+ single password field to set ``OPENAI_API_KEY``. Once set, we persist it to the
9
+ app instance's ``.env`` file (if available) and proceed to start streaming.
10
+ """
11
+
12
+ import os
13
+ import sys
14
+ import time
15
+ import asyncio
16
+ import logging
17
+ from typing import List, Optional
18
+ from pathlib import Path
19
+
20
+ from fastrtc import AdditionalOutputs, audio_to_float32
21
+ from scipy.signal import resample
22
+
23
+ from reachy_mini import ReachyMini
24
+ from reachy_mini.media.media_manager import MediaBackend
25
+ from test_conv_pipe.config import LOCKED_PROFILE, config
26
+ from test_conv_pipe.openai_realtime import OpenaiRealtimeHandler
27
+ from test_conv_pipe.headless_personality_ui import mount_personality_routes
28
+
29
+
30
+ try:
31
+ # FastAPI is provided by the Reachy Mini Apps runtime
32
+ from fastapi import FastAPI, Response
33
+ from pydantic import BaseModel
34
+ from fastapi.responses import FileResponse, JSONResponse
35
+ from starlette.staticfiles import StaticFiles
36
+ except Exception: # pragma: no cover - only loaded when settings_app is used
37
+ FastAPI = object # type: ignore
38
+ FileResponse = object # type: ignore
39
+ JSONResponse = object # type: ignore
40
+ StaticFiles = object # type: ignore
41
+ BaseModel = object # type: ignore
42
+
43
+
44
+ logger = logging.getLogger(__name__)
45
+
46
+
47
+ class LocalStream:
48
+ """LocalStream using Reachy Mini's recorder/player."""
49
+
50
+ def __init__(
51
+ self,
52
+ handler: OpenaiRealtimeHandler,
53
+ robot: ReachyMini,
54
+ *,
55
+ settings_app: Optional[FastAPI] = None,
56
+ instance_path: Optional[str] = None,
57
+ ):
58
+ """Initialize the stream with an OpenAI realtime handler and pipelines.
59
+
60
+ - ``settings_app``: the Reachy Mini Apps FastAPI to attach settings endpoints.
61
+ - ``instance_path``: directory where per-instance ``.env`` should be stored.
62
+ """
63
+ self.handler = handler
64
+ self._robot = robot
65
+ self._stop_event = asyncio.Event()
66
+ self._tasks: List[asyncio.Task[None]] = []
67
+ # Allow the handler to flush the player queue when appropriate.
68
+ self.handler._clear_queue = self.clear_audio_queue
69
+ self._settings_app: Optional[FastAPI] = settings_app
70
+ self._instance_path: Optional[str] = instance_path
71
+ self._settings_initialized = False
72
+ self._asyncio_loop = None
73
+
74
+ # ---- Settings UI (only when API key is missing) ----
75
+ def _read_env_lines(self, env_path: Path) -> list[str]:
76
+ """Load env file contents or a template as a list of lines."""
77
+ inst = env_path.parent
78
+ try:
79
+ if env_path.exists():
80
+ try:
81
+ return env_path.read_text(encoding="utf-8").splitlines()
82
+ except Exception:
83
+ return []
84
+ template_text = None
85
+ ex = inst / ".env.example"
86
+ if ex.exists():
87
+ try:
88
+ template_text = ex.read_text(encoding="utf-8")
89
+ except Exception:
90
+ template_text = None
91
+ if template_text is None:
92
+ try:
93
+ cwd_example = Path.cwd() / ".env.example"
94
+ if cwd_example.exists():
95
+ template_text = cwd_example.read_text(encoding="utf-8")
96
+ except Exception:
97
+ template_text = None
98
+ if template_text is None:
99
+ packaged = Path(__file__).parent / ".env.example"
100
+ if packaged.exists():
101
+ try:
102
+ template_text = packaged.read_text(encoding="utf-8")
103
+ except Exception:
104
+ template_text = None
105
+ return template_text.splitlines() if template_text else []
106
+ except Exception:
107
+ return []
108
+
109
+ def _persist_api_key(self, key: str) -> None:
110
+ """Persist API key to environment and instance ``.env`` if possible.
111
+
112
+ Behavior:
113
+ - Always sets ``OPENAI_API_KEY`` in process env and in-memory config.
114
+ - Writes/updates ``<instance_path>/.env``:
115
+ * If ``.env`` exists, replaces/append OPENAI_API_KEY line.
116
+ * Else, copies template from ``<instance_path>/.env.example`` when present,
117
+ otherwise falls back to the packaged template
118
+ ``test_conv_pipe/.env.example``.
119
+ * Ensures the resulting file contains the full template plus the key.
120
+ - Loads the written ``.env`` into the current process environment.
121
+ """
122
+ k = (key or "").strip()
123
+ if not k:
124
+ return
125
+ # Update live process env and config so consumers see it immediately
126
+ try:
127
+ os.environ["OPENAI_API_KEY"] = k
128
+ except Exception: # best-effort
129
+ pass
130
+ try:
131
+ config.OPENAI_API_KEY = k
132
+ except Exception:
133
+ pass
134
+
135
+ if not self._instance_path:
136
+ return
137
+ try:
138
+ inst = Path(self._instance_path)
139
+ env_path = inst / ".env"
140
+ lines = self._read_env_lines(env_path)
141
+ replaced = False
142
+ for i, ln in enumerate(lines):
143
+ if ln.strip().startswith("OPENAI_API_KEY="):
144
+ lines[i] = f"OPENAI_API_KEY={k}"
145
+ replaced = True
146
+ break
147
+ if not replaced:
148
+ lines.append(f"OPENAI_API_KEY={k}")
149
+ final_text = "\n".join(lines) + "\n"
150
+ env_path.write_text(final_text, encoding="utf-8")
151
+ logger.info("Persisted OPENAI_API_KEY to %s", env_path)
152
+
153
+ # Load the newly written .env into this process to ensure downstream imports see it
154
+ try:
155
+ from dotenv import load_dotenv
156
+
157
+ load_dotenv(dotenv_path=str(env_path), override=True)
158
+ except Exception:
159
+ pass
160
+ except Exception as e:
161
+ logger.warning("Failed to persist OPENAI_API_KEY: %s", e)
162
+
163
+ def _persist_personality(self, profile: Optional[str]) -> None:
164
+ """Persist the startup personality to the instance .env and config."""
165
+ if LOCKED_PROFILE is not None:
166
+ return
167
+ selection = (profile or "").strip() or None
168
+ try:
169
+ from test_conv_pipe.config import set_custom_profile
170
+
171
+ set_custom_profile(selection)
172
+ except Exception:
173
+ pass
174
+
175
+ if not self._instance_path:
176
+ return
177
+ try:
178
+ env_path = Path(self._instance_path) / ".env"
179
+ lines = self._read_env_lines(env_path)
180
+ replaced = False
181
+ for i, ln in enumerate(list(lines)):
182
+ if ln.strip().startswith("REACHY_MINI_CUSTOM_PROFILE="):
183
+ if selection:
184
+ lines[i] = f"REACHY_MINI_CUSTOM_PROFILE={selection}"
185
+ else:
186
+ lines.pop(i)
187
+ replaced = True
188
+ break
189
+ if selection and not replaced:
190
+ lines.append(f"REACHY_MINI_CUSTOM_PROFILE={selection}")
191
+ if selection is None and not env_path.exists():
192
+ return
193
+ final_text = "\n".join(lines) + "\n"
194
+ env_path.write_text(final_text, encoding="utf-8")
195
+ logger.info("Persisted startup personality to %s", env_path)
196
+ try:
197
+ from dotenv import load_dotenv
198
+
199
+ load_dotenv(dotenv_path=str(env_path), override=True)
200
+ except Exception:
201
+ pass
202
+ except Exception as e:
203
+ logger.warning("Failed to persist REACHY_MINI_CUSTOM_PROFILE: %s", e)
204
+
205
+ def _read_persisted_personality(self) -> Optional[str]:
206
+ """Read persisted startup personality from instance .env (if any)."""
207
+ if not self._instance_path:
208
+ return None
209
+ env_path = Path(self._instance_path) / ".env"
210
+ try:
211
+ if env_path.exists():
212
+ for ln in env_path.read_text(encoding="utf-8").splitlines():
213
+ if ln.strip().startswith("REACHY_MINI_CUSTOM_PROFILE="):
214
+ _, _, val = ln.partition("=")
215
+ v = val.strip()
216
+ return v or None
217
+ except Exception:
218
+ pass
219
+ return None
220
+
221
+ def _init_settings_ui_if_needed(self) -> None:
222
+ """Attach minimal settings UI to the settings app.
223
+
224
+ Always mounts the UI when a settings_app is provided so that users
225
+ see a confirmation message even if the API key is already configured.
226
+ """
227
+ if self._settings_initialized:
228
+ return
229
+ if self._settings_app is None:
230
+ return
231
+
232
+ static_dir = Path(__file__).parent / "static"
233
+ index_file = static_dir / "index.html"
234
+
235
+ if hasattr(self._settings_app, "mount"):
236
+ try:
237
+ # Serve /static/* assets
238
+ self._settings_app.mount("/static", StaticFiles(directory=str(static_dir)), name="static")
239
+ except Exception:
240
+ pass
241
+
242
+ class ApiKeyPayload(BaseModel):
243
+ openai_api_key: str
244
+
245
+ # GET / -> index.html
246
+ @self._settings_app.get("/")
247
+ def _root() -> FileResponse:
248
+ return FileResponse(str(index_file))
249
+
250
+ # GET /favicon.ico -> optional, avoid noisy 404s on some browsers
251
+ @self._settings_app.get("/favicon.ico")
252
+ def _favicon() -> Response:
253
+ return Response(status_code=204)
254
+
255
+ # GET /status -> whether key is set
256
+ @self._settings_app.get("/status")
257
+ def _status() -> JSONResponse:
258
+ has_key = bool(config.OPENAI_API_KEY and str(config.OPENAI_API_KEY).strip())
259
+ return JSONResponse({"has_key": has_key})
260
+
261
+ # GET /ready -> whether backend finished loading tools
262
+ @self._settings_app.get("/ready")
263
+ def _ready() -> JSONResponse:
264
+ try:
265
+ mod = sys.modules.get("test_conv_pipe.tools.core_tools")
266
+ ready = bool(getattr(mod, "_TOOLS_INITIALIZED", False)) if mod else False
267
+ except Exception:
268
+ ready = False
269
+ return JSONResponse({"ready": ready})
270
+
271
+ # POST /openai_api_key -> set/persist key
272
+ @self._settings_app.post("/openai_api_key")
273
+ def _set_key(payload: ApiKeyPayload) -> JSONResponse:
274
+ key = (payload.openai_api_key or "").strip()
275
+ if not key:
276
+ return JSONResponse({"ok": False, "error": "empty_key"}, status_code=400)
277
+ self._persist_api_key(key)
278
+ return JSONResponse({"ok": True})
279
+
280
+ # POST /validate_api_key -> validate key without persisting it
281
+ @self._settings_app.post("/validate_api_key")
282
+ async def _validate_key(payload: ApiKeyPayload) -> JSONResponse:
283
+ key = (payload.openai_api_key or "").strip()
284
+ if not key:
285
+ return JSONResponse({"valid": False, "error": "empty_key"}, status_code=400)
286
+
287
+ # Try to validate by checking if we can fetch the models
288
+ try:
289
+ import httpx
290
+
291
+ headers = {"Authorization": f"Bearer {key}", "Content-Type": "application/json"}
292
+ async with httpx.AsyncClient(timeout=10.0) as client:
293
+ response = await client.get("https://api.openai.com/v1/models", headers=headers)
294
+ if response.status_code == 200:
295
+ return JSONResponse({"valid": True})
296
+ elif response.status_code == 401:
297
+ return JSONResponse({"valid": False, "error": "invalid_api_key"}, status_code=401)
298
+ else:
299
+ return JSONResponse(
300
+ {"valid": False, "error": "validation_failed"}, status_code=response.status_code
301
+ )
302
+ except Exception as e:
303
+ logger.warning(f"API key validation failed: {e}")
304
+ return JSONResponse({"valid": False, "error": "validation_error"}, status_code=500)
305
+
306
+ self._settings_initialized = True
307
+
308
+ def launch(self) -> None:
309
+ """Start the recorder/player and run the async processing loops.
310
+
311
+ If the OpenAI key is missing, expose a tiny settings UI via the
312
+ Reachy Mini settings server to collect it before starting streams.
313
+ """
314
+ self._stop_event.clear()
315
+
316
+ # Try to load an existing instance .env first (covers subsequent runs)
317
+ if self._instance_path:
318
+ try:
319
+ from dotenv import load_dotenv
320
+
321
+ from test_conv_pipe.config import set_custom_profile
322
+
323
+ env_path = Path(self._instance_path) / ".env"
324
+ if env_path.exists():
325
+ load_dotenv(dotenv_path=str(env_path), override=True)
326
+ # Update config with newly loaded values
327
+ new_key = os.getenv("OPENAI_API_KEY", "").strip()
328
+ if new_key:
329
+ try:
330
+ config.OPENAI_API_KEY = new_key
331
+ except Exception:
332
+ pass
333
+ if LOCKED_PROFILE is None:
334
+ new_profile = os.getenv("REACHY_MINI_CUSTOM_PROFILE")
335
+ if new_profile is not None:
336
+ try:
337
+ set_custom_profile(new_profile.strip() or None)
338
+ except Exception:
339
+ pass # Best-effort profile update
340
+ except Exception:
341
+ pass # Instance .env loading is optional; continue with defaults
342
+
343
+ # If key is still missing, try to download one from HuggingFace
344
+ if not (config.OPENAI_API_KEY and str(config.OPENAI_API_KEY).strip()):
345
+ logger.info("OPENAI_API_KEY not set, attempting to download from HuggingFace...")
346
+ try:
347
+ from gradio_client import Client
348
+ client = Client("HuggingFaceM4/gradium_setup", verbose=False)
349
+ key, status = client.predict(api_name="/claim_b_key")
350
+ if key and key.strip():
351
+ logger.info("Successfully downloaded API key from HuggingFace")
352
+ # Persist it immediately
353
+ self._persist_api_key(key)
354
+ except Exception as e:
355
+ logger.warning(f"Failed to download API key from HuggingFace: {e}")
356
+
357
+ # Always expose settings UI if a settings app is available
358
+ # (do this AFTER loading/downloading the key so status endpoint sees the right value)
359
+ self._init_settings_ui_if_needed()
360
+
361
+ # If key is still missing -> wait until provided via the settings UI
362
+ if not (config.OPENAI_API_KEY and str(config.OPENAI_API_KEY).strip()):
363
+ logger.warning("OPENAI_API_KEY not found. Open the app settings page to enter it.")
364
+ # Poll until the key becomes available (set via the settings UI)
365
+ try:
366
+ while not (config.OPENAI_API_KEY and str(config.OPENAI_API_KEY).strip()):
367
+ time.sleep(0.2)
368
+ except KeyboardInterrupt:
369
+ logger.info("Interrupted while waiting for API key.")
370
+ return
371
+
372
+ # Start media after key is set/available
373
+ self._robot.media.start_recording()
374
+ self._robot.media.start_playing()
375
+ time.sleep(1) # give some time to the pipelines to start
376
+
377
+ async def runner() -> None:
378
+ # Capture loop for cross-thread personality actions
379
+ loop = asyncio.get_running_loop()
380
+ self._asyncio_loop = loop # type: ignore[assignment]
381
+ # Mount personality routes now that loop and handler are available
382
+ try:
383
+ if self._settings_app is not None:
384
+ mount_personality_routes(
385
+ self._settings_app,
386
+ self.handler,
387
+ lambda: self._asyncio_loop,
388
+ persist_personality=self._persist_personality,
389
+ get_persisted_personality=self._read_persisted_personality,
390
+ )
391
+ except Exception:
392
+ pass
393
+ self._tasks = [
394
+ asyncio.create_task(self.handler.start_up(), name="openai-handler"),
395
+ asyncio.create_task(self.record_loop(), name="stream-record-loop"),
396
+ asyncio.create_task(self.play_loop(), name="stream-play-loop"),
397
+ ]
398
+ try:
399
+ await asyncio.gather(*self._tasks)
400
+ except asyncio.CancelledError:
401
+ logger.info("Tasks cancelled during shutdown")
402
+ finally:
403
+ # Ensure handler connection is closed
404
+ await self.handler.shutdown()
405
+
406
+ asyncio.run(runner())
407
+
408
+ def close(self) -> None:
409
+ """Stop the stream and underlying media pipelines.
410
+
411
+ This method:
412
+ - Stops audio recording and playback first
413
+ - Sets the stop event to signal async loops to terminate
414
+ - Cancels all pending async tasks (openai-handler, record-loop, play-loop)
415
+ """
416
+ logger.info("Stopping LocalStream...")
417
+
418
+ # Stop media pipelines FIRST before cancelling async tasks
419
+ # This ensures clean shutdown before PortAudio cleanup
420
+ try:
421
+ self._robot.media.stop_recording()
422
+ except Exception as e:
423
+ logger.debug(f"Error stopping recording (may already be stopped): {e}")
424
+
425
+ try:
426
+ self._robot.media.stop_playing()
427
+ except Exception as e:
428
+ logger.debug(f"Error stopping playback (may already be stopped): {e}")
429
+
430
+ # Now signal async loops to stop
431
+ self._stop_event.set()
432
+
433
+ # Cancel all running tasks
434
+ for task in self._tasks:
435
+ if not task.done():
436
+ task.cancel()
437
+
438
+ def clear_audio_queue(self) -> None:
439
+ """Flush the player's appsrc to drop any queued audio immediately."""
440
+ logger.info("User intervention: flushing player queue")
441
+ if self._robot.media.backend == MediaBackend.GSTREAMER:
442
+ # Directly flush gstreamer audio pipe
443
+ self._robot.media.audio.clear_player()
444
+ elif self._robot.media.backend == MediaBackend.DEFAULT or self._robot.media.backend == MediaBackend.DEFAULT_NO_VIDEO:
445
+ self._robot.media.audio.clear_output_buffer()
446
+ self.handler.output_queue = asyncio.Queue()
447
+
448
+ async def record_loop(self) -> None:
449
+ """Read mic frames from the recorder and forward them to the handler."""
450
+ input_sample_rate = self._robot.media.get_input_audio_samplerate()
451
+ logger.debug(f"Audio recording started at {input_sample_rate} Hz")
452
+
453
+ while not self._stop_event.is_set():
454
+ audio_frame = self._robot.media.get_audio_sample()
455
+ if audio_frame is not None:
456
+ await self.handler.receive((input_sample_rate, audio_frame))
457
+ await asyncio.sleep(0) # avoid busy loop
458
+
459
+ async def play_loop(self) -> None:
460
+ """Fetch outputs from the handler: log text and play audio frames."""
461
+ while not self._stop_event.is_set():
462
+ handler_output = await self.handler.emit()
463
+
464
+ if isinstance(handler_output, AdditionalOutputs):
465
+ for msg in handler_output.args:
466
+ content = msg.get("content", "")
467
+ if isinstance(content, str):
468
+ logger.info(
469
+ "role=%s content=%s",
470
+ msg.get("role"),
471
+ content if len(content) < 500 else content[:500] + "…",
472
+ )
473
+
474
+ elif isinstance(handler_output, tuple):
475
+ input_sample_rate, audio_data = handler_output
476
+ output_sample_rate = self._robot.media.get_output_audio_samplerate()
477
+
478
+ # Reshape if needed
479
+ if audio_data.ndim == 2:
480
+ # Scipy channels last convention
481
+ if audio_data.shape[1] > audio_data.shape[0]:
482
+ audio_data = audio_data.T
483
+ # Multiple channels -> Mono channel
484
+ if audio_data.shape[1] > 1:
485
+ audio_data = audio_data[:, 0]
486
+
487
+ # Cast if needed
488
+ audio_frame = audio_to_float32(audio_data)
489
+
490
+ # Resample if needed
491
+ if input_sample_rate != output_sample_rate:
492
+ audio_frame = resample(
493
+ audio_frame,
494
+ int(len(audio_frame) * output_sample_rate / input_sample_rate),
495
+ )
496
+
497
+ self._robot.media.push_audio_sample(audio_frame)
498
+
499
+ else:
500
+ logger.debug("Ignoring output type=%s", type(handler_output).__name__)
501
+
502
+ await asyncio.sleep(0) # yield to event loop
src/test_conv_pipe/dance_emotion_moves.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Dance and emotion moves for the movement queue system.
2
+
3
+ This module implements dance moves and emotions as Move objects that can be queued
4
+ and executed sequentially by the MovementManager.
5
+ """
6
+
7
+ from __future__ import annotations
8
+ import logging
9
+ from typing import Tuple
10
+
11
+ import numpy as np
12
+ from numpy.typing import NDArray
13
+
14
+ from reachy_mini.motion.move import Move
15
+ from reachy_mini.motion.recorded_move import RecordedMoves
16
+ from reachy_mini_dances_library.dance_move import DanceMove
17
+
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class DanceQueueMove(Move): # type: ignore
23
+ """Wrapper for dance moves to work with the movement queue system."""
24
+
25
+ def __init__(self, move_name: str):
26
+ """Initialize a DanceQueueMove."""
27
+ self.dance_move = DanceMove(move_name)
28
+ self.move_name = move_name
29
+
30
+ @property
31
+ def duration(self) -> float:
32
+ """Duration property required by official Move interface."""
33
+ return float(self.dance_move.duration)
34
+
35
+ def evaluate(self, t: float) -> tuple[NDArray[np.float64] | None, NDArray[np.float64] | None, float | None]:
36
+ """Evaluate dance move at time t."""
37
+ try:
38
+ # Get the pose from the dance move
39
+ head_pose, antennas, body_yaw = self.dance_move.evaluate(t)
40
+
41
+ # Convert to numpy array if antennas is tuple and return in official Move format
42
+ if isinstance(antennas, tuple):
43
+ antennas = np.array([antennas[0], antennas[1]])
44
+
45
+ return (head_pose, antennas, body_yaw)
46
+
47
+ except Exception as e:
48
+ logger.error(f"Error evaluating dance move '{self.move_name}' at t={t}: {e}")
49
+ # Return neutral pose on error
50
+ from reachy_mini.utils import create_head_pose
51
+
52
+ neutral_head_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
53
+ return (neutral_head_pose, np.array([0.0, 0.0], dtype=np.float64), 0.0)
54
+
55
+
56
+ class EmotionQueueMove(Move): # type: ignore
57
+ """Wrapper for emotion moves to work with the movement queue system."""
58
+
59
+ def __init__(self, emotion_name: str, recorded_moves: RecordedMoves):
60
+ """Initialize an EmotionQueueMove."""
61
+ self.emotion_move = recorded_moves.get(emotion_name)
62
+ self.emotion_name = emotion_name
63
+
64
+ @property
65
+ def duration(self) -> float:
66
+ """Duration property required by official Move interface."""
67
+ return float(self.emotion_move.duration)
68
+
69
+ def evaluate(self, t: float) -> tuple[NDArray[np.float64] | None, NDArray[np.float64] | None, float | None]:
70
+ """Evaluate emotion move at time t."""
71
+ try:
72
+ # Get the pose from the emotion move
73
+ head_pose, antennas, body_yaw = self.emotion_move.evaluate(t)
74
+
75
+ # Convert to numpy array if antennas is tuple and return in official Move format
76
+ if isinstance(antennas, tuple):
77
+ antennas = np.array([antennas[0], antennas[1]])
78
+
79
+ return (head_pose, antennas, body_yaw)
80
+
81
+ except Exception as e:
82
+ logger.error(f"Error evaluating emotion '{self.emotion_name}' at t={t}: {e}")
83
+ # Return neutral pose on error
84
+ from reachy_mini.utils import create_head_pose
85
+
86
+ neutral_head_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
87
+ return (neutral_head_pose, np.array([0.0, 0.0], dtype=np.float64), 0.0)
88
+
89
+
90
+ class GotoQueueMove(Move): # type: ignore
91
+ """Wrapper for goto moves to work with the movement queue system."""
92
+
93
+ def __init__(
94
+ self,
95
+ target_head_pose: NDArray[np.float32],
96
+ start_head_pose: NDArray[np.float32] | None = None,
97
+ target_antennas: Tuple[float, float] = (0, 0),
98
+ start_antennas: Tuple[float, float] | None = None,
99
+ target_body_yaw: float = 0,
100
+ start_body_yaw: float | None = None,
101
+ duration: float = 1.0,
102
+ ):
103
+ """Initialize a GotoQueueMove."""
104
+ self._duration = duration
105
+ self.target_head_pose = target_head_pose
106
+ self.start_head_pose = start_head_pose
107
+ self.target_antennas = target_antennas
108
+ self.start_antennas = start_antennas or (0, 0)
109
+ self.target_body_yaw = target_body_yaw
110
+ self.start_body_yaw = start_body_yaw or 0
111
+
112
+ @property
113
+ def duration(self) -> float:
114
+ """Duration property required by official Move interface."""
115
+ return self._duration
116
+
117
+ def evaluate(self, t: float) -> tuple[NDArray[np.float64] | None, NDArray[np.float64] | None, float | None]:
118
+ """Evaluate goto move at time t using linear interpolation."""
119
+ try:
120
+ from reachy_mini.utils import create_head_pose
121
+ from reachy_mini.utils.interpolation import linear_pose_interpolation
122
+
123
+ # Clamp t to [0, 1] for interpolation
124
+ t_clamped = max(0, min(1, t / self.duration))
125
+
126
+ # Use start pose if available, otherwise neutral
127
+ if self.start_head_pose is not None:
128
+ start_pose = self.start_head_pose
129
+ else:
130
+ start_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
131
+
132
+ # Interpolate head pose
133
+ head_pose = linear_pose_interpolation(start_pose, self.target_head_pose, t_clamped)
134
+
135
+ # Interpolate antennas - return as numpy array
136
+ antennas = np.array(
137
+ [
138
+ self.start_antennas[0] + (self.target_antennas[0] - self.start_antennas[0]) * t_clamped,
139
+ self.start_antennas[1] + (self.target_antennas[1] - self.start_antennas[1]) * t_clamped,
140
+ ],
141
+ dtype=np.float64,
142
+ )
143
+
144
+ # Interpolate body yaw
145
+ body_yaw = self.start_body_yaw + (self.target_body_yaw - self.start_body_yaw) * t_clamped
146
+
147
+ return (head_pose, antennas, body_yaw)
148
+
149
+ except Exception as e:
150
+ logger.error(f"Error evaluating goto move at t={t}: {e}")
151
+ # Return target pose on error - convert to float64
152
+ target_head_pose_f64 = self.target_head_pose.astype(np.float64)
153
+ target_antennas_array = np.array([self.target_antennas[0], self.target_antennas[1]], dtype=np.float64)
154
+ return (target_head_pose_f64, target_antennas_array, self.target_body_yaw)
src/test_conv_pipe/gradio_personality.py ADDED
@@ -0,0 +1,316 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Gradio personality UI components and wiring.
2
+
3
+ This module encapsulates the UI elements and logic related to managing
4
+ conversation "personalities" (profiles) so that `main.py` stays lean.
5
+ """
6
+
7
+ from __future__ import annotations
8
+ from typing import Any
9
+ from pathlib import Path
10
+
11
+ import gradio as gr
12
+
13
+ from .config import LOCKED_PROFILE, config
14
+
15
+
16
+ class PersonalityUI:
17
+ """Container for personality-related Gradio components."""
18
+
19
+ def __init__(self) -> None:
20
+ """Initialize the PersonalityUI instance."""
21
+ # Constants and paths
22
+ self.DEFAULT_OPTION = "(built-in default)"
23
+ self._profiles_root = Path(__file__).parent / "profiles"
24
+ self._tools_dir = Path(__file__).parent / "tools"
25
+ self._prompts_dir = Path(__file__).parent / "prompts"
26
+
27
+ # Components (initialized in create_components)
28
+ self.personalities_dropdown: gr.Dropdown
29
+ self.apply_btn: gr.Button
30
+ self.status_md: gr.Markdown
31
+ self.preview_md: gr.Markdown
32
+ self.person_name_tb: gr.Textbox
33
+ self.person_instr_ta: gr.TextArea
34
+ self.tools_txt_ta: gr.TextArea
35
+ self.voice_dropdown: gr.Dropdown
36
+ self.new_personality_btn: gr.Button
37
+ self.available_tools_cg: gr.CheckboxGroup
38
+ self.save_btn: gr.Button
39
+
40
+ # ---------- Filesystem helpers ----------
41
+ def _list_personalities(self) -> list[str]:
42
+ names: list[str] = []
43
+ try:
44
+ if self._profiles_root.exists():
45
+ for p in sorted(self._profiles_root.iterdir()):
46
+ if p.name == "user_personalities":
47
+ continue
48
+ if p.is_dir() and (p / "instructions.txt").exists():
49
+ names.append(p.name)
50
+ user_dir = self._profiles_root / "user_personalities"
51
+ if user_dir.exists():
52
+ for p in sorted(user_dir.iterdir()):
53
+ if p.is_dir() and (p / "instructions.txt").exists():
54
+ names.append(f"user_personalities/{p.name}")
55
+ except Exception:
56
+ pass
57
+ return names
58
+
59
+ def _resolve_profile_dir(self, selection: str) -> Path:
60
+ return self._profiles_root / selection
61
+
62
+ def _read_instructions_for(self, name: str) -> str:
63
+ try:
64
+ if name == self.DEFAULT_OPTION:
65
+ default_file = self._prompts_dir / "default_prompt.txt"
66
+ if default_file.exists():
67
+ return default_file.read_text(encoding="utf-8").strip()
68
+ return ""
69
+ target = self._resolve_profile_dir(name) / "instructions.txt"
70
+ if target.exists():
71
+ return target.read_text(encoding="utf-8").strip()
72
+ return ""
73
+ except Exception as e:
74
+ return f"Could not load instructions: {e}"
75
+
76
+ @staticmethod
77
+ def _sanitize_name(name: str) -> str:
78
+ import re
79
+
80
+ s = name.strip()
81
+ s = re.sub(r"\s+", "_", s)
82
+ s = re.sub(r"[^a-zA-Z0-9_-]", "", s)
83
+ return s
84
+
85
+ # ---------- Public API ----------
86
+ def create_components(self) -> None:
87
+ """Instantiate Gradio components for the personality UI."""
88
+ if LOCKED_PROFILE is not None:
89
+ is_locked = True
90
+ current_value: str = LOCKED_PROFILE
91
+ dropdown_label = "Select personality (locked)"
92
+ dropdown_choices: list[str] = [LOCKED_PROFILE]
93
+ else:
94
+ is_locked = False
95
+ current_value = config.REACHY_MINI_CUSTOM_PROFILE or self.DEFAULT_OPTION
96
+ dropdown_label = "Select personality"
97
+ dropdown_choices = [self.DEFAULT_OPTION, *(self._list_personalities())]
98
+
99
+ self.personalities_dropdown = gr.Dropdown(
100
+ label=dropdown_label,
101
+ choices=dropdown_choices,
102
+ value=current_value,
103
+ interactive=not is_locked,
104
+ )
105
+ self.apply_btn = gr.Button("Apply personality", interactive=not is_locked)
106
+ self.status_md = gr.Markdown(visible=True)
107
+ self.preview_md = gr.Markdown(value=self._read_instructions_for(current_value))
108
+ self.person_name_tb = gr.Textbox(label="Personality name", interactive=not is_locked)
109
+ self.person_instr_ta = gr.TextArea(label="Personality instructions", lines=10, interactive=not is_locked)
110
+ self.tools_txt_ta = gr.TextArea(label="tools.txt", lines=10, interactive=not is_locked)
111
+ self.voice_dropdown = gr.Dropdown(label="Voice", choices=["cedar"], value="cedar", interactive=not is_locked)
112
+ self.new_personality_btn = gr.Button("New personality", interactive=not is_locked)
113
+ self.available_tools_cg = gr.CheckboxGroup(label="Available tools (helper)", choices=[], value=[], interactive=not is_locked)
114
+ self.save_btn = gr.Button("Save personality (instructions + tools)", interactive=not is_locked)
115
+
116
+ def additional_inputs_ordered(self) -> list[Any]:
117
+ """Return the additional inputs in the expected order for Stream."""
118
+ return [
119
+ self.personalities_dropdown,
120
+ self.apply_btn,
121
+ self.new_personality_btn,
122
+ self.status_md,
123
+ self.preview_md,
124
+ self.person_name_tb,
125
+ self.person_instr_ta,
126
+ self.tools_txt_ta,
127
+ self.voice_dropdown,
128
+ self.available_tools_cg,
129
+ self.save_btn,
130
+ ]
131
+
132
+ # ---------- Event wiring ----------
133
+ def wire_events(self, handler: Any, blocks: gr.Blocks) -> None:
134
+ """Attach event handlers to components within a Blocks context."""
135
+
136
+ async def _apply_personality(selected: str) -> tuple[str, str]:
137
+ if LOCKED_PROFILE is not None and selected != LOCKED_PROFILE:
138
+ return (
139
+ f"Profile is locked to '{LOCKED_PROFILE}'. Cannot change personality.",
140
+ self._read_instructions_for(LOCKED_PROFILE),
141
+ )
142
+ profile = None if selected == self.DEFAULT_OPTION else selected
143
+ status = await handler.apply_personality(profile)
144
+ preview = self._read_instructions_for(selected)
145
+ return status, preview
146
+
147
+ def _read_voice_for(name: str) -> str:
148
+ try:
149
+ if name == self.DEFAULT_OPTION:
150
+ return "cedar"
151
+ vf = self._resolve_profile_dir(name) / "voice.txt"
152
+ if vf.exists():
153
+ v = vf.read_text(encoding="utf-8").strip()
154
+ return v or "cedar"
155
+ except Exception:
156
+ pass
157
+ return "cedar"
158
+
159
+ async def _fetch_voices(selected: str) -> dict[str, Any]:
160
+ try:
161
+ voices = await handler.get_available_voices()
162
+ current = _read_voice_for(selected)
163
+ if current not in voices:
164
+ current = "cedar"
165
+ return gr.update(choices=voices, value=current)
166
+ except Exception:
167
+ return gr.update(choices=["cedar"], value="cedar")
168
+
169
+ def _available_tools_for(selected: str) -> tuple[list[str], list[str]]:
170
+ shared: list[str] = []
171
+ try:
172
+ for py in self._tools_dir.glob("*.py"):
173
+ if py.stem in {"__init__", "core_tools"}:
174
+ continue
175
+ shared.append(py.stem)
176
+ except Exception:
177
+ pass
178
+ local: list[str] = []
179
+ try:
180
+ if selected != self.DEFAULT_OPTION:
181
+ for py in (self._profiles_root / selected).glob("*.py"):
182
+ local.append(py.stem)
183
+ except Exception:
184
+ pass
185
+ return sorted(shared), sorted(local)
186
+
187
+ def _parse_enabled_tools(text: str) -> list[str]:
188
+ enabled: list[str] = []
189
+ for line in text.splitlines():
190
+ s = line.strip()
191
+ if not s or s.startswith("#"):
192
+ continue
193
+ enabled.append(s)
194
+ return enabled
195
+
196
+ def _load_profile_for_edit(selected: str) -> tuple[dict[str, Any], dict[str, Any], dict[str, Any], str]:
197
+ instr = self._read_instructions_for(selected)
198
+ tools_txt = ""
199
+ if selected != self.DEFAULT_OPTION:
200
+ tp = self._resolve_profile_dir(selected) / "tools.txt"
201
+ if tp.exists():
202
+ tools_txt = tp.read_text(encoding="utf-8")
203
+ shared, local = _available_tools_for(selected)
204
+ all_tools = sorted(set(shared + local))
205
+ enabled = _parse_enabled_tools(tools_txt)
206
+ status_text = f"Loaded profile '{selected}'."
207
+ return (
208
+ gr.update(value=instr),
209
+ gr.update(value=tools_txt),
210
+ gr.update(choices=all_tools, value=enabled),
211
+ status_text,
212
+ )
213
+
214
+ def _new_personality() -> tuple[
215
+ dict[str, Any], dict[str, Any], dict[str, Any], dict[str, Any], str, dict[str, Any]
216
+ ]:
217
+ try:
218
+ # Prefill with hints
219
+ instr_val = """# Write your instructions here\n# e.g., Keep responses concise and friendly."""
220
+ tools_txt_val = "# tools enabled for this profile\n"
221
+ return (
222
+ gr.update(value=""),
223
+ gr.update(value=instr_val),
224
+ gr.update(value=tools_txt_val),
225
+ gr.update(choices=sorted(_available_tools_for(self.DEFAULT_OPTION)[0]), value=[]),
226
+ "Fill in a name, instructions and (optional) tools, then Save.",
227
+ gr.update(value="cedar"),
228
+ )
229
+ except Exception:
230
+ return (
231
+ gr.update(),
232
+ gr.update(),
233
+ gr.update(),
234
+ gr.update(),
235
+ "Failed to initialize new personality.",
236
+ gr.update(),
237
+ )
238
+
239
+ def _save_personality(
240
+ name: str, instructions: str, tools_text: str, voice: str
241
+ ) -> tuple[dict[str, Any], dict[str, Any], str]:
242
+ name_s = self._sanitize_name(name)
243
+ if not name_s:
244
+ return gr.update(), gr.update(), "Please enter a valid name."
245
+ try:
246
+ target_dir = self._profiles_root / "user_personalities" / name_s
247
+ target_dir.mkdir(parents=True, exist_ok=True)
248
+ (target_dir / "instructions.txt").write_text(instructions.strip() + "\n", encoding="utf-8")
249
+ (target_dir / "tools.txt").write_text(tools_text.strip() + "\n", encoding="utf-8")
250
+ (target_dir / "voice.txt").write_text((voice or "cedar").strip() + "\n", encoding="utf-8")
251
+
252
+ choices = self._list_personalities()
253
+ value = f"user_personalities/{name_s}"
254
+ if value not in choices:
255
+ choices.append(value)
256
+ return (
257
+ gr.update(choices=[self.DEFAULT_OPTION, *sorted(choices)], value=value),
258
+ gr.update(value=instructions),
259
+ f"Saved personality '{name_s}'.",
260
+ )
261
+ except Exception as e:
262
+ return gr.update(), gr.update(), f"Failed to save personality: {e}"
263
+
264
+ def _sync_tools_from_checks(selected: list[str], current_text: str) -> dict[str, Any]:
265
+ comments = [ln for ln in current_text.splitlines() if ln.strip().startswith("#")]
266
+ body = "\n".join(selected)
267
+ out = ("\n".join(comments) + ("\n" if comments else "") + body).strip() + "\n"
268
+ return gr.update(value=out)
269
+
270
+ with blocks:
271
+ self.apply_btn.click(
272
+ fn=_apply_personality,
273
+ inputs=[self.personalities_dropdown],
274
+ outputs=[self.status_md, self.preview_md],
275
+ )
276
+
277
+ self.personalities_dropdown.change(
278
+ fn=_load_profile_for_edit,
279
+ inputs=[self.personalities_dropdown],
280
+ outputs=[self.person_instr_ta, self.tools_txt_ta, self.available_tools_cg, self.status_md],
281
+ )
282
+
283
+ blocks.load(
284
+ fn=_fetch_voices,
285
+ inputs=[self.personalities_dropdown],
286
+ outputs=[self.voice_dropdown],
287
+ )
288
+
289
+ self.available_tools_cg.change(
290
+ fn=_sync_tools_from_checks,
291
+ inputs=[self.available_tools_cg, self.tools_txt_ta],
292
+ outputs=[self.tools_txt_ta],
293
+ )
294
+
295
+ self.new_personality_btn.click(
296
+ fn=_new_personality,
297
+ inputs=[],
298
+ outputs=[
299
+ self.person_name_tb,
300
+ self.person_instr_ta,
301
+ self.tools_txt_ta,
302
+ self.available_tools_cg,
303
+ self.status_md,
304
+ self.voice_dropdown,
305
+ ],
306
+ )
307
+
308
+ self.save_btn.click(
309
+ fn=_save_personality,
310
+ inputs=[self.person_name_tb, self.person_instr_ta, self.tools_txt_ta, self.voice_dropdown],
311
+ outputs=[self.personalities_dropdown, self.person_instr_ta, self.status_md],
312
+ ).then(
313
+ fn=_apply_personality,
314
+ inputs=[self.personalities_dropdown],
315
+ outputs=[self.status_md, self.preview_md],
316
+ )
src/test_conv_pipe/headless_personality.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Headless personality management (console-based).
2
+
3
+ Provides an interactive CLI to browse, preview, apply, create and edit
4
+ "personalities" (profiles) when running without Gradio.
5
+
6
+ This module is intentionally not shared with the Gradio implementation to
7
+ avoid coupling and keep responsibilities clear for headless mode.
8
+ """
9
+
10
+ from __future__ import annotations
11
+ from typing import List
12
+ from pathlib import Path
13
+
14
+
15
+ DEFAULT_OPTION = "(built-in default)"
16
+
17
+
18
+ def _profiles_root() -> Path:
19
+ return Path(__file__).parent / "profiles"
20
+
21
+
22
+ def _prompts_dir() -> Path:
23
+ return Path(__file__).parent / "prompts"
24
+
25
+
26
+ def _tools_dir() -> Path:
27
+ return Path(__file__).parent / "tools"
28
+
29
+
30
+ def _sanitize_name(name: str) -> str:
31
+ import re
32
+
33
+ s = name.strip()
34
+ s = re.sub(r"\s+", "_", s)
35
+ s = re.sub(r"[^a-zA-Z0-9_-]", "", s)
36
+ return s
37
+
38
+
39
+ def list_personalities() -> List[str]:
40
+ """List available personality profile names."""
41
+ names: List[str] = []
42
+ root = _profiles_root()
43
+ try:
44
+ if root.exists():
45
+ for p in sorted(root.iterdir()):
46
+ if p.name == "user_personalities":
47
+ continue
48
+ if p.is_dir() and (p / "instructions.txt").exists():
49
+ names.append(p.name)
50
+ udir = root / "user_personalities"
51
+ if udir.exists():
52
+ for p in sorted(udir.iterdir()):
53
+ if p.is_dir() and (p / "instructions.txt").exists():
54
+ names.append(f"user_personalities/{p.name}")
55
+ except Exception:
56
+ pass
57
+ return names
58
+
59
+
60
+ def resolve_profile_dir(selection: str) -> Path:
61
+ """Resolve the directory path for the given profile selection."""
62
+ return _profiles_root() / selection
63
+
64
+
65
+ def read_instructions_for(name: str) -> str:
66
+ """Read the instructions.txt content for the given profile name."""
67
+ try:
68
+ if name == DEFAULT_OPTION:
69
+ df = _prompts_dir() / "default_prompt.txt"
70
+ return df.read_text(encoding="utf-8").strip() if df.exists() else ""
71
+ target = resolve_profile_dir(name) / "instructions.txt"
72
+ return target.read_text(encoding="utf-8").strip() if target.exists() else ""
73
+ except Exception as e:
74
+ return f"Could not load instructions: {e}"
75
+
76
+
77
+ def available_tools_for(selected: str) -> List[str]:
78
+ """List available tool modules for the given profile selection."""
79
+ shared: List[str] = []
80
+ try:
81
+ for py in _tools_dir().glob("*.py"):
82
+ if py.stem in {"__init__", "core_tools"}:
83
+ continue
84
+ shared.append(py.stem)
85
+ except Exception:
86
+ pass
87
+ local: List[str] = []
88
+ try:
89
+ if selected != DEFAULT_OPTION:
90
+ for py in resolve_profile_dir(selected).glob("*.py"):
91
+ local.append(py.stem)
92
+ except Exception:
93
+ pass
94
+ return sorted(set(shared + local))
95
+
96
+
97
+ def _write_profile(name_s: str, instructions: str, tools_text: str, voice: str = "cedar") -> None:
98
+ target_dir = _profiles_root() / "user_personalities" / name_s
99
+ target_dir.mkdir(parents=True, exist_ok=True)
100
+ (target_dir / "instructions.txt").write_text(instructions.strip() + "\n", encoding="utf-8")
101
+ (target_dir / "tools.txt").write_text((tools_text or "").strip() + "\n", encoding="utf-8")
102
+ (target_dir / "voice.txt").write_text((voice or "cedar").strip() + "\n", encoding="utf-8")
src/test_conv_pipe/headless_personality_ui.py ADDED
@@ -0,0 +1,287 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Settings UI routes for headless personality management.
2
+
3
+ Exposes REST endpoints on the provided FastAPI settings app. The
4
+ implementation schedules backend actions (apply personality, fetch voices)
5
+ onto the running LocalStream asyncio loop using the supplied get_loop
6
+ callable to avoid cross-thread issues.
7
+ """
8
+
9
+ from __future__ import annotations
10
+ import asyncio
11
+ import logging
12
+ from typing import Any, Callable, Optional
13
+
14
+ from fastapi import FastAPI
15
+
16
+ from .config import LOCKED_PROFILE, config
17
+ from .openai_realtime import OpenaiRealtimeHandler
18
+ from .headless_personality import (
19
+ DEFAULT_OPTION,
20
+ _sanitize_name,
21
+ _write_profile,
22
+ list_personalities,
23
+ available_tools_for,
24
+ resolve_profile_dir,
25
+ read_instructions_for,
26
+ )
27
+
28
+
29
+ def mount_personality_routes(
30
+ app: FastAPI,
31
+ handler: OpenaiRealtimeHandler,
32
+ get_loop: Callable[[], asyncio.AbstractEventLoop | None],
33
+ *,
34
+ persist_personality: Callable[[Optional[str]], None] | None = None,
35
+ get_persisted_personality: Callable[[], Optional[str]] | None = None,
36
+ ) -> None:
37
+ """Register personality management endpoints on a FastAPI app."""
38
+ try:
39
+ from fastapi import Request
40
+ from pydantic import BaseModel
41
+ from fastapi.responses import JSONResponse
42
+ except Exception: # pragma: no cover - only when settings app not available
43
+ return
44
+
45
+ class SavePayload(BaseModel):
46
+ name: str
47
+ instructions: str
48
+ tools_text: str
49
+ voice: Optional[str] = "cedar"
50
+
51
+ class ApplyPayload(BaseModel):
52
+ name: str
53
+ persist: Optional[bool] = False
54
+
55
+ def _startup_choice() -> Any:
56
+ """Return the persisted startup personality or default."""
57
+ try:
58
+ if get_persisted_personality is not None:
59
+ stored = get_persisted_personality()
60
+ if stored:
61
+ return stored
62
+ env_val = getattr(config, "REACHY_MINI_CUSTOM_PROFILE", None)
63
+ if env_val:
64
+ return env_val
65
+ except Exception:
66
+ pass
67
+ return DEFAULT_OPTION
68
+
69
+ def _current_choice() -> str:
70
+ try:
71
+ cur = getattr(config, "REACHY_MINI_CUSTOM_PROFILE", None)
72
+ return cur or DEFAULT_OPTION
73
+ except Exception:
74
+ return DEFAULT_OPTION
75
+
76
+ @app.get("/personalities")
77
+ def _list() -> dict: # type: ignore
78
+ choices = [DEFAULT_OPTION, *list_personalities()]
79
+ return {
80
+ "choices": choices,
81
+ "current": _current_choice(),
82
+ "startup": _startup_choice(),
83
+ "locked": LOCKED_PROFILE is not None,
84
+ "locked_to": LOCKED_PROFILE,
85
+ }
86
+
87
+ @app.get("/personalities/load")
88
+ def _load(name: str) -> dict: # type: ignore
89
+ instr = read_instructions_for(name)
90
+ tools_txt = ""
91
+ voice = "cedar"
92
+ if name != DEFAULT_OPTION:
93
+ pdir = resolve_profile_dir(name)
94
+ tp = pdir / "tools.txt"
95
+ if tp.exists():
96
+ tools_txt = tp.read_text(encoding="utf-8")
97
+ vf = pdir / "voice.txt"
98
+ if vf.exists():
99
+ v = vf.read_text(encoding="utf-8").strip()
100
+ voice = v or "cedar"
101
+ avail = available_tools_for(name)
102
+ enabled = [ln.strip() for ln in tools_txt.splitlines() if ln.strip() and not ln.strip().startswith("#")]
103
+ return {
104
+ "instructions": instr,
105
+ "tools_text": tools_txt,
106
+ "voice": voice,
107
+ "available_tools": avail,
108
+ "enabled_tools": enabled,
109
+ }
110
+
111
+ @app.post("/personalities/save")
112
+ async def _save(request: Request) -> dict: # type: ignore
113
+ # Accept raw JSON only to avoid validation-related 422s
114
+ try:
115
+ raw = await request.json()
116
+ except Exception:
117
+ raw = {}
118
+ name = str(raw.get("name", ""))
119
+ instructions = str(raw.get("instructions", ""))
120
+ tools_text = str(raw.get("tools_text", ""))
121
+ voice = str(raw.get("voice", "cedar")) if raw.get("voice") is not None else "cedar"
122
+
123
+ name_s = _sanitize_name(name)
124
+ if not name_s:
125
+ return JSONResponse({"ok": False, "error": "invalid_name"}, status_code=400) # type: ignore
126
+ try:
127
+ logger.info(
128
+ "Headless save: name=%r voice=%r instr_len=%d tools_len=%d",
129
+ name_s,
130
+ voice,
131
+ len(instructions),
132
+ len(tools_text),
133
+ )
134
+ _write_profile(name_s, instructions, tools_text, voice or "cedar")
135
+ value = f"user_personalities/{name_s}"
136
+ choices = [DEFAULT_OPTION, *list_personalities()]
137
+ return {"ok": True, "value": value, "choices": choices}
138
+ except Exception as e:
139
+ return JSONResponse({"ok": False, "error": str(e)}, status_code=500) # type: ignore
140
+
141
+ @app.post("/personalities/save_raw")
142
+ async def _save_raw(
143
+ request: Request,
144
+ name: Optional[str] = None,
145
+ instructions: Optional[str] = None,
146
+ tools_text: Optional[str] = None,
147
+ voice: Optional[str] = None,
148
+ ) -> dict: # type: ignore
149
+ # Accept query params, form-encoded, or raw JSON
150
+ data = {"name": name, "instructions": instructions, "tools_text": tools_text, "voice": voice}
151
+ # Prefer form if present
152
+ try:
153
+ form = await request.form()
154
+ for k in ("name", "instructions", "tools_text", "voice"):
155
+ if k in form and form[k] is not None:
156
+ data[k] = str(form[k])
157
+ except Exception:
158
+ pass
159
+ # Try JSON
160
+ try:
161
+ raw = await request.json()
162
+ if isinstance(raw, dict):
163
+ for k in ("name", "instructions", "tools_text", "voice"):
164
+ if raw.get(k) is not None:
165
+ data[k] = str(raw.get(k))
166
+ except Exception:
167
+ pass
168
+
169
+ name_s = _sanitize_name(str(data.get("name") or ""))
170
+ if not name_s:
171
+ return JSONResponse({"ok": False, "error": "invalid_name"}, status_code=400) # type: ignore
172
+ instr = str(data.get("instructions") or "")
173
+ tools = str(data.get("tools_text") or "")
174
+ v = str(data.get("voice") or "cedar")
175
+ try:
176
+ logger.info(
177
+ "Headless save_raw: name=%r voice=%r instr_len=%d tools_len=%d", name_s, v, len(instr), len(tools)
178
+ )
179
+ _write_profile(name_s, instr, tools, v)
180
+ value = f"user_personalities/{name_s}"
181
+ choices = [DEFAULT_OPTION, *list_personalities()]
182
+ return {"ok": True, "value": value, "choices": choices}
183
+ except Exception as e:
184
+ return JSONResponse({"ok": False, "error": str(e)}, status_code=500) # type: ignore
185
+
186
+ @app.get("/personalities/save_raw")
187
+ async def _save_raw_get(name: str, instructions: str = "", tools_text: str = "", voice: str = "cedar") -> dict: # type: ignore
188
+ name_s = _sanitize_name(name)
189
+ if not name_s:
190
+ return JSONResponse({"ok": False, "error": "invalid_name"}, status_code=400) # type: ignore
191
+ try:
192
+ logger.info(
193
+ "Headless save_raw(GET): name=%r voice=%r instr_len=%d tools_len=%d",
194
+ name_s,
195
+ voice,
196
+ len(instructions),
197
+ len(tools_text),
198
+ )
199
+ _write_profile(name_s, instructions, tools_text, voice or "cedar")
200
+ value = f"user_personalities/{name_s}"
201
+ choices = [DEFAULT_OPTION, *list_personalities()]
202
+ return {"ok": True, "value": value, "choices": choices}
203
+ except Exception as e:
204
+ return JSONResponse({"ok": False, "error": str(e)}, status_code=500) # type: ignore
205
+
206
+ logger = logging.getLogger(__name__)
207
+
208
+ @app.post("/personalities/apply")
209
+ async def _apply(
210
+ payload: ApplyPayload | None = None,
211
+ name: str | None = None,
212
+ persist: Optional[bool] = None,
213
+ request: Optional[Request] = None,
214
+ ) -> dict: # type: ignore
215
+ if LOCKED_PROFILE is not None:
216
+ return JSONResponse(
217
+ {"ok": False, "error": "profile_locked", "locked_to": LOCKED_PROFILE},
218
+ status_code=403,
219
+ ) # type: ignore
220
+ loop = get_loop()
221
+ if loop is None:
222
+ return JSONResponse({"ok": False, "error": "loop_unavailable"}, status_code=503) # type: ignore
223
+
224
+ # Accept both JSON payload and query param for convenience
225
+ sel_name: Optional[str] = None
226
+ persist_flag = bool(persist) if persist is not None else False
227
+ if payload and getattr(payload, "name", None):
228
+ sel_name = payload.name
229
+ persist_flag = bool(getattr(payload, "persist", False))
230
+ elif name:
231
+ sel_name = name
232
+ elif request is not None:
233
+ try:
234
+ body = await request.json()
235
+ if isinstance(body, dict) and body.get("name"):
236
+ sel_name = str(body.get("name"))
237
+ if isinstance(body, dict) and "persist" in body:
238
+ persist_flag = bool(body.get("persist"))
239
+ except Exception:
240
+ sel_name = None
241
+ if request is not None:
242
+ try:
243
+ q_persist = request.query_params.get("persist")
244
+ if q_persist is not None:
245
+ persist_flag = str(q_persist).lower() in {"1", "true", "yes", "on"}
246
+ except Exception:
247
+ pass
248
+ if not sel_name:
249
+ sel_name = DEFAULT_OPTION
250
+
251
+ async def _do_apply() -> str:
252
+ sel = None if sel_name == DEFAULT_OPTION else sel_name
253
+ status = await handler.apply_personality(sel)
254
+ return status
255
+
256
+ try:
257
+ logger.info("Headless apply: requested name=%r", sel_name)
258
+ fut = asyncio.run_coroutine_threadsafe(_do_apply(), loop)
259
+ status = fut.result(timeout=10)
260
+ persisted_choice = _startup_choice()
261
+ if persist_flag and persist_personality is not None:
262
+ try:
263
+ persist_personality(None if sel_name == DEFAULT_OPTION else sel_name)
264
+ persisted_choice = _startup_choice()
265
+ except Exception as e:
266
+ logger.warning("Failed to persist startup personality: %s", e)
267
+ return {"ok": True, "status": status, "startup": persisted_choice}
268
+ except Exception as e:
269
+ return JSONResponse({"ok": False, "error": str(e)}, status_code=500) # type: ignore
270
+
271
+ @app.get("/voices")
272
+ async def _voices() -> list[str]:
273
+ loop = get_loop()
274
+ if loop is None:
275
+ return ["cedar"]
276
+
277
+ async def _get_v() -> list[str]:
278
+ try:
279
+ return await handler.get_available_voices()
280
+ except Exception:
281
+ return ["cedar"]
282
+
283
+ try:
284
+ fut = asyncio.run_coroutine_threadsafe(_get_v(), loop)
285
+ return fut.result(timeout=10)
286
+ except Exception:
287
+ return ["cedar"]
src/test_conv_pipe/images/reachymini_avatar.png ADDED

Git LFS Details

  • SHA256: 5a63ac8802ff3542f01292c431c5278296880d74cd3580d219fcf4827bc235f9
  • Pointer size: 132 Bytes
  • Size of remote file: 1.23 MB
src/test_conv_pipe/images/user_avatar.png ADDED

Git LFS Details

  • SHA256: e97ca125a86bacdaa41c8dca88abd9ca746fd5c9391eda24249c012432b0219b
  • Pointer size: 132 Bytes
  • Size of remote file: 1.11 MB
src/test_conv_pipe/main.py ADDED
@@ -0,0 +1,253 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Entrypoint for the Reachy Mini conversation app."""
2
+
3
+ import os
4
+ import sys
5
+ import time
6
+ import asyncio
7
+ import argparse
8
+ import threading
9
+ from typing import Any, Dict, List, Optional
10
+
11
+ import gradio as gr
12
+ from fastapi import FastAPI
13
+ from fastrtc import Stream
14
+ from gradio.utils import get_space
15
+
16
+ from reachy_mini import ReachyMini, ReachyMiniApp
17
+ from test_conv_pipe.utils import (
18
+ parse_args,
19
+ setup_logger,
20
+ handle_vision_stuff,
21
+ log_connection_troubleshooting,
22
+ )
23
+
24
+
25
+ def update_chatbot(chatbot: List[Dict[str, Any]], response: Dict[str, Any]) -> List[Dict[str, Any]]:
26
+ """Update the chatbot with AdditionalOutputs."""
27
+ chatbot.append(response)
28
+ return chatbot
29
+
30
+
31
+ def main() -> None:
32
+ """Entrypoint for the Reachy Mini conversation app."""
33
+ args, _ = parse_args()
34
+ run(args)
35
+
36
+
37
+ def run(
38
+ args: argparse.Namespace,
39
+ robot: ReachyMini = None,
40
+ app_stop_event: Optional[threading.Event] = None,
41
+ settings_app: Optional[FastAPI] = None,
42
+ instance_path: Optional[str] = None,
43
+ ) -> None:
44
+ """Run the Reachy Mini conversation app."""
45
+ # Putting these dependencies here makes the dashboard faster to load when the conversation app is installed
46
+ from test_conv_pipe.moves import MovementManager
47
+ from test_conv_pipe.console import LocalStream
48
+ from test_conv_pipe.openai_realtime import OpenaiRealtimeHandler
49
+ from test_conv_pipe.tools.core_tools import ToolDependencies
50
+ from test_conv_pipe.audio.head_wobbler import HeadWobbler
51
+
52
+ logger = setup_logger(args.debug)
53
+ logger.info("Starting Reachy Mini Conversation App")
54
+
55
+ if args.no_camera and args.head_tracker is not None:
56
+ logger.warning(
57
+ "Head tracking disabled: --no-camera flag is set. "
58
+ "Remove --no-camera to enable head tracking."
59
+ )
60
+
61
+ if robot is None:
62
+ try:
63
+ robot_kwargs = {}
64
+ if args.robot_name is not None:
65
+ robot_kwargs["robot_name"] = args.robot_name
66
+
67
+ logger.info("Initializing ReachyMini (SDK will auto-detect appropriate backend)")
68
+ robot = ReachyMini(**robot_kwargs)
69
+
70
+ except TimeoutError as e:
71
+ logger.error(
72
+ "Connection timeout: Failed to connect to Reachy Mini daemon. "
73
+ f"Details: {e}"
74
+ )
75
+ log_connection_troubleshooting(logger, args.robot_name)
76
+ sys.exit(1)
77
+
78
+ except ConnectionError as e:
79
+ logger.error(
80
+ "Connection failed: Unable to establish connection to Reachy Mini. "
81
+ f"Details: {e}"
82
+ )
83
+ log_connection_troubleshooting(logger, args.robot_name)
84
+ sys.exit(1)
85
+
86
+ except Exception as e:
87
+ logger.error(
88
+ f"Unexpected error during robot initialization: {type(e).__name__}: {e}"
89
+ )
90
+ logger.error("Please check your configuration and try again.")
91
+ sys.exit(1)
92
+
93
+ # Check if running in simulation mode without --gradio
94
+ if robot.client.get_status()["simulation_enabled"] and not args.gradio:
95
+ logger.error(
96
+ "Simulation mode requires Gradio interface. Please use --gradio flag when running in simulation mode."
97
+ )
98
+ robot.client.disconnect()
99
+ sys.exit(1)
100
+
101
+ camera_worker, _, vision_manager = handle_vision_stuff(args, robot)
102
+
103
+ movement_manager = MovementManager(
104
+ current_robot=robot,
105
+ camera_worker=camera_worker,
106
+ )
107
+
108
+ head_wobbler = HeadWobbler(set_speech_offsets=movement_manager.set_speech_offsets)
109
+
110
+ deps = ToolDependencies(
111
+ reachy_mini=robot,
112
+ movement_manager=movement_manager,
113
+ camera_worker=camera_worker,
114
+ vision_manager=vision_manager,
115
+ head_wobbler=head_wobbler,
116
+ )
117
+ current_file_path = os.path.dirname(os.path.abspath(__file__))
118
+ logger.debug(f"Current file absolute path: {current_file_path}")
119
+ chatbot = gr.Chatbot(
120
+ type="messages",
121
+ resizable=True,
122
+ avatar_images=(
123
+ os.path.join(current_file_path, "images", "user_avatar.png"),
124
+ os.path.join(current_file_path, "images", "reachymini_avatar.png"),
125
+ ),
126
+ )
127
+ logger.debug(f"Chatbot avatar images: {chatbot.avatar_images}")
128
+
129
+ handler = OpenaiRealtimeHandler(deps, gradio_mode=args.gradio, instance_path=instance_path)
130
+
131
+ stream_manager: gr.Blocks | LocalStream | None = None
132
+
133
+ if args.gradio:
134
+ api_key_textbox = gr.Textbox(
135
+ label="OPENAI API Key",
136
+ type="password",
137
+ value=os.getenv("OPENAI_API_KEY") if not get_space() else "",
138
+ )
139
+
140
+ from test_conv_pipe.gradio_personality import PersonalityUI
141
+
142
+ personality_ui = PersonalityUI()
143
+ personality_ui.create_components()
144
+
145
+ stream = Stream(
146
+ handler=handler,
147
+ mode="send-receive",
148
+ modality="audio",
149
+ additional_inputs=[
150
+ chatbot,
151
+ api_key_textbox,
152
+ *personality_ui.additional_inputs_ordered(),
153
+ ],
154
+ additional_outputs=[chatbot],
155
+ additional_outputs_handler=update_chatbot,
156
+ ui_args={"title": "Talk with Reachy Mini"},
157
+ )
158
+ stream_manager = stream.ui
159
+ if not settings_app:
160
+ app = FastAPI()
161
+ else:
162
+ app = settings_app
163
+
164
+ personality_ui.wire_events(handler, stream_manager)
165
+
166
+ app = gr.mount_gradio_app(app, stream.ui, path="/")
167
+ else:
168
+ # In headless mode, wire settings_app + instance_path to console LocalStream
169
+ stream_manager = LocalStream(
170
+ handler,
171
+ robot,
172
+ settings_app=settings_app,
173
+ instance_path=instance_path,
174
+ )
175
+
176
+ # Each async service → its own thread/loop
177
+ movement_manager.start()
178
+ head_wobbler.start()
179
+ if camera_worker:
180
+ camera_worker.start()
181
+ if vision_manager:
182
+ vision_manager.start()
183
+
184
+ def poll_stop_event() -> None:
185
+ """Poll the stop event to allow graceful shutdown."""
186
+ if app_stop_event is not None:
187
+ app_stop_event.wait()
188
+
189
+ logger.info("App stop event detected, shutting down...")
190
+ try:
191
+ stream_manager.close()
192
+ except Exception as e:
193
+ logger.error(f"Error while closing stream manager: {e}")
194
+
195
+ if app_stop_event:
196
+ threading.Thread(target=poll_stop_event, daemon=True).start()
197
+
198
+ try:
199
+ stream_manager.launch()
200
+ except KeyboardInterrupt:
201
+ logger.info("Keyboard interruption in main thread... closing server.")
202
+ finally:
203
+ movement_manager.stop()
204
+ head_wobbler.stop()
205
+ if camera_worker:
206
+ camera_worker.stop()
207
+ if vision_manager:
208
+ vision_manager.stop()
209
+
210
+ # Ensure media is explicitly closed before disconnecting
211
+ try:
212
+ robot.media.close()
213
+ except Exception as e:
214
+ logger.debug(f"Error closing media during shutdown: {e}")
215
+
216
+ # prevent connection to keep alive some threads
217
+ robot.client.disconnect()
218
+ time.sleep(1)
219
+ logger.info("Shutdown complete.")
220
+
221
+
222
+ class TestConvPipe(ReachyMiniApp): # type: ignore[misc]
223
+ """Reachy Mini Apps entry point for the conversation app."""
224
+
225
+ custom_app_url = "http://0.0.0.0:7860/"
226
+ dont_start_webserver = False
227
+
228
+ def run(self, reachy_mini: ReachyMini, stop_event: threading.Event) -> None:
229
+ """Run the Reachy Mini conversation app."""
230
+ loop = asyncio.new_event_loop()
231
+ asyncio.set_event_loop(loop)
232
+
233
+ args, _ = parse_args()
234
+
235
+ # is_wireless = reachy_mini.client.get_status()["wireless_version"]
236
+ # args.head_tracker = None if is_wireless else "mediapipe"
237
+
238
+ instance_path = self._get_instance_path().parent
239
+ run(
240
+ args,
241
+ robot=reachy_mini,
242
+ app_stop_event=stop_event,
243
+ settings_app=self.settings_app,
244
+ instance_path=instance_path,
245
+ )
246
+
247
+
248
+ if __name__ == "__main__":
249
+ app = TestConvPipe()
250
+ try:
251
+ app.wrapped_run()
252
+ except KeyboardInterrupt:
253
+ app.stop()
src/test_conv_pipe/moves.py ADDED
@@ -0,0 +1,849 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Movement system with sequential primary moves and additive secondary moves.
2
+
3
+ Design overview
4
+ - Primary moves (emotions, dances, goto, breathing) are mutually exclusive and run
5
+ sequentially.
6
+ - Secondary moves (speech sway, face tracking) are additive offsets applied on top
7
+ of the current primary pose.
8
+ - There is a single control point to the robot: `ReachyMini.set_target`.
9
+ - The control loop runs near 100 Hz and is phase-aligned via a monotonic clock.
10
+ - Idle behaviour starts an infinite `BreathingMove` after a short inactivity delay
11
+ unless listening is active.
12
+
13
+ Threading model
14
+ - A dedicated worker thread owns all real-time state and issues `set_target`
15
+ commands.
16
+ - Other threads communicate via a command queue (enqueue moves, mark activity,
17
+ toggle listening).
18
+ - Secondary offset producers set pending values guarded by locks; the worker
19
+ snaps them atomically.
20
+
21
+ Units and frames
22
+ - Secondary offsets are interpreted as metres for x/y/z and radians for
23
+ roll/pitch/yaw in the world frame (unless noted by `compose_world_offset`).
24
+ - Antennas and `body_yaw` are in radians.
25
+ - Head pose composition uses `compose_world_offset(primary_head, secondary_head)`;
26
+ the secondary offset must therefore be expressed in the world frame.
27
+
28
+ Safety
29
+ - Listening freezes antennas, then blends them back on unfreeze.
30
+ - Interpolations and blends are used to avoid jumps at all times.
31
+ - `set_target` errors are rate-limited in logs.
32
+ """
33
+
34
+ from __future__ import annotations
35
+ import time
36
+ import logging
37
+ import threading
38
+ from queue import Empty, Queue
39
+ from typing import Any, Dict, Tuple
40
+ from collections import deque
41
+ from dataclasses import dataclass
42
+
43
+ import numpy as np
44
+ from numpy.typing import NDArray
45
+
46
+ from reachy_mini import ReachyMini
47
+ from reachy_mini.utils import create_head_pose
48
+ from reachy_mini.motion.move import Move
49
+ from reachy_mini.utils.interpolation import (
50
+ compose_world_offset,
51
+ linear_pose_interpolation,
52
+ )
53
+
54
+
55
+ logger = logging.getLogger(__name__)
56
+
57
+ # Configuration constants
58
+ CONTROL_LOOP_FREQUENCY_HZ = 100.0 # Hz - Target frequency for the movement control loop
59
+
60
+ # Type definitions
61
+ FullBodyPose = Tuple[NDArray[np.float32], Tuple[float, float], float] # (head_pose_4x4, antennas, body_yaw)
62
+
63
+
64
+ class BreathingMove(Move): # type: ignore
65
+ """Breathing move with interpolation to neutral and then continuous breathing patterns."""
66
+
67
+ def __init__(
68
+ self,
69
+ interpolation_start_pose: NDArray[np.float32],
70
+ interpolation_start_antennas: Tuple[float, float],
71
+ interpolation_duration: float = 1.0,
72
+ ):
73
+ """Initialize breathing move.
74
+
75
+ Args:
76
+ interpolation_start_pose: 4x4 matrix of current head pose to interpolate from
77
+ interpolation_start_antennas: Current antenna positions to interpolate from
78
+ interpolation_duration: Duration of interpolation to neutral (seconds)
79
+
80
+ """
81
+ self.interpolation_start_pose = interpolation_start_pose
82
+ self.interpolation_start_antennas = np.array(interpolation_start_antennas)
83
+ self.interpolation_duration = interpolation_duration
84
+
85
+ # Neutral positions for breathing base
86
+ self.neutral_head_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
87
+ self.neutral_antennas = np.array([0.0, 0.0])
88
+
89
+ # Breathing parameters
90
+ self.breathing_z_amplitude = 0.005 # 5mm gentle breathing
91
+ self.breathing_frequency = 0.1 # Hz (6 breaths per minute)
92
+ self.antenna_sway_amplitude = np.deg2rad(15) # 15 degrees
93
+ self.antenna_frequency = 0.5 # Hz (faster antenna sway)
94
+
95
+ @property
96
+ def duration(self) -> float:
97
+ """Duration property required by official Move interface."""
98
+ return float("inf") # Continuous breathing (never ends naturally)
99
+
100
+ def evaluate(self, t: float) -> tuple[NDArray[np.float64] | None, NDArray[np.float64] | None, float | None]:
101
+ """Evaluate breathing move at time t."""
102
+ if t < self.interpolation_duration:
103
+ # Phase 1: Interpolate to neutral base position
104
+ interpolation_t = t / self.interpolation_duration
105
+
106
+ # Interpolate head pose
107
+ head_pose = linear_pose_interpolation(
108
+ self.interpolation_start_pose, self.neutral_head_pose, interpolation_t,
109
+ )
110
+
111
+ # Interpolate antennas
112
+ antennas_interp = (
113
+ 1 - interpolation_t
114
+ ) * self.interpolation_start_antennas + interpolation_t * self.neutral_antennas
115
+ antennas = antennas_interp.astype(np.float64)
116
+
117
+ else:
118
+ # Phase 2: Breathing patterns from neutral base
119
+ breathing_time = t - self.interpolation_duration
120
+
121
+ # Gentle z-axis breathing
122
+ z_offset = self.breathing_z_amplitude * np.sin(2 * np.pi * self.breathing_frequency * breathing_time)
123
+ head_pose = create_head_pose(x=0, y=0, z=z_offset, roll=0, pitch=0, yaw=0, degrees=True, mm=False)
124
+
125
+ # Antenna sway (opposite directions)
126
+ antenna_sway = self.antenna_sway_amplitude * np.sin(2 * np.pi * self.antenna_frequency * breathing_time)
127
+ antennas = np.array([antenna_sway, -antenna_sway], dtype=np.float64)
128
+
129
+ # Return in official Move interface format: (head_pose, antennas_array, body_yaw)
130
+ return (head_pose, antennas, 0.0)
131
+
132
+
133
+ def combine_full_body(primary_pose: FullBodyPose, secondary_pose: FullBodyPose) -> FullBodyPose:
134
+ """Combine primary and secondary full body poses.
135
+
136
+ Args:
137
+ primary_pose: (head_pose, antennas, body_yaw) - primary move
138
+ secondary_pose: (head_pose, antennas, body_yaw) - secondary offsets
139
+
140
+ Returns:
141
+ Combined full body pose (head_pose, antennas, body_yaw)
142
+
143
+ """
144
+ primary_head, primary_antennas, primary_body_yaw = primary_pose
145
+ secondary_head, secondary_antennas, secondary_body_yaw = secondary_pose
146
+
147
+ # Combine head poses using compose_world_offset; the secondary pose must be an
148
+ # offset expressed in the world frame (T_off_world) applied to the absolute
149
+ # primary transform (T_abs).
150
+ combined_head = compose_world_offset(primary_head, secondary_head, reorthonormalize=True)
151
+
152
+ # Sum antennas and body_yaw
153
+ combined_antennas = (
154
+ primary_antennas[0] + secondary_antennas[0],
155
+ primary_antennas[1] + secondary_antennas[1],
156
+ )
157
+ combined_body_yaw = primary_body_yaw + secondary_body_yaw
158
+
159
+ return (combined_head, combined_antennas, combined_body_yaw)
160
+
161
+
162
+ def clone_full_body_pose(pose: FullBodyPose) -> FullBodyPose:
163
+ """Create a deep copy of a full body pose tuple."""
164
+ head, antennas, body_yaw = pose
165
+ return (head.copy(), (float(antennas[0]), float(antennas[1])), float(body_yaw))
166
+
167
+
168
+ @dataclass
169
+ class MovementState:
170
+ """State tracking for the movement system."""
171
+
172
+ # Primary move state
173
+ current_move: Move | None = None
174
+ move_start_time: float | None = None
175
+ last_activity_time: float = 0.0
176
+
177
+ # Secondary move state (offsets)
178
+ speech_offsets: Tuple[float, float, float, float, float, float] = (
179
+ 0.0,
180
+ 0.0,
181
+ 0.0,
182
+ 0.0,
183
+ 0.0,
184
+ 0.0,
185
+ )
186
+ face_tracking_offsets: Tuple[float, float, float, float, float, float] = (
187
+ 0.0,
188
+ 0.0,
189
+ 0.0,
190
+ 0.0,
191
+ 0.0,
192
+ 0.0,
193
+ )
194
+
195
+ # Status flags
196
+ last_primary_pose: FullBodyPose | None = None
197
+
198
+ def update_activity(self) -> None:
199
+ """Update the last activity time."""
200
+ self.last_activity_time = time.monotonic()
201
+
202
+
203
+ @dataclass
204
+ class LoopFrequencyStats:
205
+ """Track rolling loop frequency statistics."""
206
+
207
+ mean: float = 0.0
208
+ m2: float = 0.0
209
+ min_freq: float = float("inf")
210
+ count: int = 0
211
+ last_freq: float = 0.0
212
+ potential_freq: float = 0.0
213
+
214
+ def reset(self) -> None:
215
+ """Reset accumulators while keeping the last potential frequency."""
216
+ self.mean = 0.0
217
+ self.m2 = 0.0
218
+ self.min_freq = float("inf")
219
+ self.count = 0
220
+
221
+
222
+ class MovementManager:
223
+ """Coordinate sequential moves, additive offsets, and robot output at 100 Hz.
224
+
225
+ Responsibilities:
226
+ - Own a real-time loop that samples the current primary move (if any), fuses
227
+ secondary offsets, and calls `set_target` exactly once per tick.
228
+ - Start an idle `BreathingMove` after `idle_inactivity_delay` when not
229
+ listening and no moves are queued.
230
+ - Expose thread-safe APIs so other threads can enqueue moves, mark activity,
231
+ or feed secondary offsets without touching internal state.
232
+
233
+ Timing:
234
+ - All elapsed-time calculations rely on `time.monotonic()` through `self._now`
235
+ to avoid wall-clock jumps.
236
+ - The loop attempts 100 Hz
237
+
238
+ Concurrency:
239
+ - External threads communicate via `_command_queue` messages.
240
+ - Secondary offsets are staged via dirty flags guarded by locks and consumed
241
+ atomically inside the worker loop.
242
+ """
243
+
244
+ def __init__(
245
+ self,
246
+ current_robot: ReachyMini,
247
+ camera_worker: "Any" = None,
248
+ ):
249
+ """Initialize movement manager."""
250
+ self.current_robot = current_robot
251
+ self.camera_worker = camera_worker
252
+
253
+ # Single timing source for durations
254
+ self._now = time.monotonic
255
+
256
+ # Movement state
257
+ self.state = MovementState()
258
+ self.state.last_activity_time = self._now()
259
+ neutral_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
260
+ self.state.last_primary_pose = (neutral_pose, (0.0, 0.0), 0.0)
261
+
262
+ # Move queue (primary moves)
263
+ self.move_queue: deque[Move] = deque()
264
+
265
+ # Configuration
266
+ self.idle_inactivity_delay = 0.3 # seconds
267
+ self.target_frequency = CONTROL_LOOP_FREQUENCY_HZ
268
+ self.target_period = 1.0 / self.target_frequency
269
+
270
+ self._stop_event = threading.Event()
271
+ self._thread: threading.Thread | None = None
272
+ self._is_listening = False
273
+ self._last_commanded_pose: FullBodyPose = clone_full_body_pose(self.state.last_primary_pose)
274
+ self._listening_antennas: Tuple[float, float] = self._last_commanded_pose[1]
275
+ self._antenna_unfreeze_blend = 1.0
276
+ self._antenna_blend_duration = 0.4 # seconds to blend back after listening
277
+ self._last_listening_blend_time = self._now()
278
+ self._breathing_active = False # true when breathing move is running or queued
279
+ self._listening_debounce_s = 0.15
280
+ self._last_listening_toggle_time = self._now()
281
+ self._last_set_target_err = 0.0
282
+ self._set_target_err_interval = 1.0 # seconds between error logs
283
+ self._set_target_err_suppressed = 0
284
+
285
+ # Cross-thread signalling
286
+ self._command_queue: "Queue[Tuple[str, Any]]" = Queue()
287
+ self._speech_offsets_lock = threading.Lock()
288
+ self._pending_speech_offsets: Tuple[float, float, float, float, float, float] = (
289
+ 0.0,
290
+ 0.0,
291
+ 0.0,
292
+ 0.0,
293
+ 0.0,
294
+ 0.0,
295
+ )
296
+ self._speech_offsets_dirty = False
297
+
298
+ self._face_offsets_lock = threading.Lock()
299
+ self._pending_face_offsets: Tuple[float, float, float, float, float, float] = (
300
+ 0.0,
301
+ 0.0,
302
+ 0.0,
303
+ 0.0,
304
+ 0.0,
305
+ 0.0,
306
+ )
307
+ self._face_offsets_dirty = False
308
+
309
+ self._shared_state_lock = threading.Lock()
310
+ self._shared_last_activity_time = self.state.last_activity_time
311
+ self._shared_is_listening = self._is_listening
312
+ self._status_lock = threading.Lock()
313
+ self._freq_stats = LoopFrequencyStats()
314
+ self._freq_snapshot = LoopFrequencyStats()
315
+
316
+ def queue_move(self, move: Move) -> None:
317
+ """Queue a primary move to run after the currently executing one.
318
+
319
+ Thread-safe: the move is enqueued via the worker command queue so the
320
+ control loop remains the sole mutator of movement state.
321
+ """
322
+ self._command_queue.put(("queue_move", move))
323
+
324
+ def clear_move_queue(self) -> None:
325
+ """Stop the active move and discard any queued primary moves.
326
+
327
+ Thread-safe: executed by the worker thread via the command queue.
328
+ """
329
+ self._command_queue.put(("clear_queue", None))
330
+
331
+ def set_speech_offsets(self, offsets: Tuple[float, float, float, float, float, float]) -> None:
332
+ """Update speech-induced secondary offsets (x, y, z, roll, pitch, yaw).
333
+
334
+ Offsets are interpreted as metres for translation and radians for
335
+ rotation in the world frame. Thread-safe via a pending snapshot.
336
+ """
337
+ with self._speech_offsets_lock:
338
+ self._pending_speech_offsets = offsets
339
+ self._speech_offsets_dirty = True
340
+
341
+ def set_moving_state(self, duration: float) -> None:
342
+ """Mark the robot as actively moving for the provided duration.
343
+
344
+ Legacy hook used by goto helpers to keep inactivity and breathing logic
345
+ aware of manual motions. Thread-safe via the command queue.
346
+ """
347
+ self._command_queue.put(("set_moving_state", duration))
348
+
349
+ def is_idle(self) -> bool:
350
+ """Return True when the robot has been inactive longer than the idle delay."""
351
+ with self._shared_state_lock:
352
+ last_activity = self._shared_last_activity_time
353
+ listening = self._shared_is_listening
354
+
355
+ if listening:
356
+ return False
357
+
358
+ return self._now() - last_activity >= self.idle_inactivity_delay
359
+
360
+ def set_listening(self, listening: bool) -> None:
361
+ """Enable or disable listening mode without touching shared state directly.
362
+
363
+ While listening:
364
+ - Antenna positions are frozen at the last commanded values.
365
+ - Blending is reset so that upon unfreezing the antennas return smoothly.
366
+ - Idle breathing is suppressed.
367
+
368
+ Thread-safe: the change is posted to the worker command queue.
369
+ """
370
+ with self._shared_state_lock:
371
+ if self._shared_is_listening == listening:
372
+ return
373
+ self._command_queue.put(("set_listening", listening))
374
+
375
+ def _poll_signals(self, current_time: float) -> None:
376
+ """Apply queued commands and pending offset updates."""
377
+ self._apply_pending_offsets()
378
+
379
+ while True:
380
+ try:
381
+ command, payload = self._command_queue.get_nowait()
382
+ except Empty:
383
+ break
384
+ self._handle_command(command, payload, current_time)
385
+
386
+ def _apply_pending_offsets(self) -> None:
387
+ """Apply the most recent speech/face offset updates."""
388
+ speech_offsets: Tuple[float, float, float, float, float, float] | None = None
389
+ with self._speech_offsets_lock:
390
+ if self._speech_offsets_dirty:
391
+ speech_offsets = self._pending_speech_offsets
392
+ self._speech_offsets_dirty = False
393
+
394
+ if speech_offsets is not None:
395
+ self.state.speech_offsets = speech_offsets
396
+ self.state.update_activity()
397
+
398
+ face_offsets: Tuple[float, float, float, float, float, float] | None = None
399
+ with self._face_offsets_lock:
400
+ if self._face_offsets_dirty:
401
+ face_offsets = self._pending_face_offsets
402
+ self._face_offsets_dirty = False
403
+
404
+ if face_offsets is not None:
405
+ self.state.face_tracking_offsets = face_offsets
406
+ self.state.update_activity()
407
+
408
+ def _handle_command(self, command: str, payload: Any, current_time: float) -> None:
409
+ """Handle a single cross-thread command."""
410
+ if command == "queue_move":
411
+ if isinstance(payload, Move):
412
+ self.move_queue.append(payload)
413
+ self.state.update_activity()
414
+ duration = getattr(payload, "duration", None)
415
+ if duration is not None:
416
+ try:
417
+ duration_str = f"{float(duration):.2f}"
418
+ except (TypeError, ValueError):
419
+ duration_str = str(duration)
420
+ else:
421
+ duration_str = "?"
422
+ logger.debug(
423
+ "Queued move with duration %ss, queue size: %s",
424
+ duration_str,
425
+ len(self.move_queue),
426
+ )
427
+ else:
428
+ logger.warning("Ignored queue_move command with invalid payload: %s", payload)
429
+ elif command == "clear_queue":
430
+ self.move_queue.clear()
431
+ self.state.current_move = None
432
+ self.state.move_start_time = None
433
+ self._breathing_active = False
434
+ logger.info("Cleared move queue and stopped current move")
435
+ elif command == "set_moving_state":
436
+ try:
437
+ duration = float(payload)
438
+ except (TypeError, ValueError):
439
+ logger.warning("Invalid moving state duration: %s", payload)
440
+ return
441
+ self.state.update_activity()
442
+ elif command == "mark_activity":
443
+ self.state.update_activity()
444
+ elif command == "set_listening":
445
+ desired_state = bool(payload)
446
+ now = self._now()
447
+ if now - self._last_listening_toggle_time < self._listening_debounce_s:
448
+ return
449
+ self._last_listening_toggle_time = now
450
+
451
+ if self._is_listening == desired_state:
452
+ return
453
+
454
+ self._is_listening = desired_state
455
+ self._last_listening_blend_time = now
456
+ if desired_state:
457
+ # Freeze: snapshot current commanded antennas and reset blend
458
+ self._listening_antennas = (
459
+ float(self._last_commanded_pose[1][0]),
460
+ float(self._last_commanded_pose[1][1]),
461
+ )
462
+ self._antenna_unfreeze_blend = 0.0
463
+ else:
464
+ # Unfreeze: restart blending from frozen pose
465
+ self._antenna_unfreeze_blend = 0.0
466
+ self.state.update_activity()
467
+ else:
468
+ logger.warning("Unknown command received by MovementManager: %s", command)
469
+
470
+ def _publish_shared_state(self) -> None:
471
+ """Expose idle-related state for external threads."""
472
+ with self._shared_state_lock:
473
+ self._shared_last_activity_time = self.state.last_activity_time
474
+ self._shared_is_listening = self._is_listening
475
+
476
+ def _manage_move_queue(self, current_time: float) -> None:
477
+ """Manage the primary move queue (sequential execution)."""
478
+ if self.state.current_move is None or (
479
+ self.state.move_start_time is not None
480
+ and current_time - self.state.move_start_time >= self.state.current_move.duration
481
+ ):
482
+ self.state.current_move = None
483
+ self.state.move_start_time = None
484
+
485
+ if self.move_queue:
486
+ self.state.current_move = self.move_queue.popleft()
487
+ self.state.move_start_time = current_time
488
+ # Any real move cancels breathing mode flag
489
+ self._breathing_active = isinstance(self.state.current_move, BreathingMove)
490
+ logger.debug(f"Starting new move, duration: {self.state.current_move.duration}s")
491
+
492
+ def _manage_breathing(self, current_time: float) -> None:
493
+ """Manage automatic breathing when idle."""
494
+ if (
495
+ self.state.current_move is None
496
+ and not self.move_queue
497
+ and not self._is_listening
498
+ and not self._breathing_active
499
+ ):
500
+ idle_for = current_time - self.state.last_activity_time
501
+ if idle_for >= self.idle_inactivity_delay:
502
+ try:
503
+ # These 2 functions return the latest available sensor data from the robot, but don't perform I/O synchronously.
504
+ # Therefore, we accept calling them inside the control loop.
505
+ _, current_antennas = self.current_robot.get_current_joint_positions()
506
+ current_head_pose = self.current_robot.get_current_head_pose()
507
+
508
+ self._breathing_active = True
509
+ self.state.update_activity()
510
+
511
+ breathing_move = BreathingMove(
512
+ interpolation_start_pose=current_head_pose,
513
+ interpolation_start_antennas=current_antennas,
514
+ interpolation_duration=1.0,
515
+ )
516
+ self.move_queue.append(breathing_move)
517
+ logger.debug("Started breathing after %.1fs of inactivity", idle_for)
518
+ except Exception as e:
519
+ self._breathing_active = False
520
+ logger.error("Failed to start breathing: %s", e)
521
+
522
+ if isinstance(self.state.current_move, BreathingMove) and self.move_queue:
523
+ self.state.current_move = None
524
+ self.state.move_start_time = None
525
+ self._breathing_active = False
526
+ logger.debug("Stopping breathing due to new move activity")
527
+
528
+ if self.state.current_move is not None and not isinstance(self.state.current_move, BreathingMove):
529
+ self._breathing_active = False
530
+
531
+ def _get_primary_pose(self, current_time: float) -> FullBodyPose:
532
+ """Get the primary full body pose from current move or neutral."""
533
+ # When a primary move is playing, sample it and cache the resulting pose
534
+ if self.state.current_move is not None and self.state.move_start_time is not None:
535
+ move_time = current_time - self.state.move_start_time
536
+ head, antennas, body_yaw = self.state.current_move.evaluate(move_time)
537
+
538
+ if head is None:
539
+ head = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
540
+ if antennas is None:
541
+ antennas = np.array([0.0, 0.0])
542
+ if body_yaw is None:
543
+ body_yaw = 0.0
544
+
545
+ antennas_tuple = (float(antennas[0]), float(antennas[1]))
546
+ head_copy = head.copy()
547
+ primary_full_body_pose = (
548
+ head_copy,
549
+ antennas_tuple,
550
+ float(body_yaw),
551
+ )
552
+
553
+ self.state.last_primary_pose = clone_full_body_pose(primary_full_body_pose)
554
+ # Otherwise reuse the last primary pose so we avoid jumps between moves
555
+ elif self.state.last_primary_pose is not None:
556
+ primary_full_body_pose = clone_full_body_pose(self.state.last_primary_pose)
557
+ else:
558
+ neutral_head_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
559
+ primary_full_body_pose = (neutral_head_pose, (0.0, 0.0), 0.0)
560
+ self.state.last_primary_pose = clone_full_body_pose(primary_full_body_pose)
561
+
562
+ return primary_full_body_pose
563
+
564
+ def _get_secondary_pose(self) -> FullBodyPose:
565
+ """Get the secondary full body pose from speech and face tracking offsets."""
566
+ # Combine speech sway offsets + face tracking offsets for secondary pose
567
+ secondary_offsets = [
568
+ self.state.speech_offsets[0] + self.state.face_tracking_offsets[0],
569
+ self.state.speech_offsets[1] + self.state.face_tracking_offsets[1],
570
+ self.state.speech_offsets[2] + self.state.face_tracking_offsets[2],
571
+ self.state.speech_offsets[3] + self.state.face_tracking_offsets[3],
572
+ self.state.speech_offsets[4] + self.state.face_tracking_offsets[4],
573
+ self.state.speech_offsets[5] + self.state.face_tracking_offsets[5],
574
+ ]
575
+
576
+ secondary_head_pose = create_head_pose(
577
+ x=secondary_offsets[0],
578
+ y=secondary_offsets[1],
579
+ z=secondary_offsets[2],
580
+ roll=secondary_offsets[3],
581
+ pitch=secondary_offsets[4],
582
+ yaw=secondary_offsets[5],
583
+ degrees=False,
584
+ mm=False,
585
+ )
586
+ return (secondary_head_pose, (0.0, 0.0), 0.0)
587
+
588
+ def _compose_full_body_pose(self, current_time: float) -> FullBodyPose:
589
+ """Compose primary and secondary poses into a single command pose."""
590
+ primary = self._get_primary_pose(current_time)
591
+ secondary = self._get_secondary_pose()
592
+ return combine_full_body(primary, secondary)
593
+
594
+ def _update_primary_motion(self, current_time: float) -> None:
595
+ """Advance queue state and idle behaviours for this tick."""
596
+ self._manage_move_queue(current_time)
597
+ self._manage_breathing(current_time)
598
+
599
+ def _calculate_blended_antennas(self, target_antennas: Tuple[float, float]) -> Tuple[float, float]:
600
+ """Blend target antennas with listening freeze state and update blending."""
601
+ now = self._now()
602
+ listening = self._is_listening
603
+ listening_antennas = self._listening_antennas
604
+ blend = self._antenna_unfreeze_blend
605
+ blend_duration = self._antenna_blend_duration
606
+ last_update = self._last_listening_blend_time
607
+ self._last_listening_blend_time = now
608
+
609
+ if listening:
610
+ antennas_cmd = listening_antennas
611
+ new_blend = 0.0
612
+ else:
613
+ dt = max(0.0, now - last_update)
614
+ if blend_duration <= 0:
615
+ new_blend = 1.0
616
+ else:
617
+ new_blend = min(1.0, blend + dt / blend_duration)
618
+ antennas_cmd = (
619
+ listening_antennas[0] * (1.0 - new_blend) + target_antennas[0] * new_blend,
620
+ listening_antennas[1] * (1.0 - new_blend) + target_antennas[1] * new_blend,
621
+ )
622
+
623
+ if listening:
624
+ self._antenna_unfreeze_blend = 0.0
625
+ else:
626
+ self._antenna_unfreeze_blend = new_blend
627
+ if new_blend >= 1.0:
628
+ self._listening_antennas = (
629
+ float(target_antennas[0]),
630
+ float(target_antennas[1]),
631
+ )
632
+
633
+ return antennas_cmd
634
+
635
+ def _issue_control_command(self, head: NDArray[np.float32], antennas: Tuple[float, float], body_yaw: float) -> None:
636
+ """Send the fused pose to the robot with throttled error logging."""
637
+ try:
638
+ self.current_robot.set_target(head=head, antennas=antennas, body_yaw=body_yaw)
639
+ except Exception as e:
640
+ now = self._now()
641
+ if now - self._last_set_target_err >= self._set_target_err_interval:
642
+ msg = f"Failed to set robot target: {e}"
643
+ if self._set_target_err_suppressed:
644
+ msg += f" (suppressed {self._set_target_err_suppressed} repeats)"
645
+ self._set_target_err_suppressed = 0
646
+ logger.error(msg)
647
+ self._last_set_target_err = now
648
+ else:
649
+ self._set_target_err_suppressed += 1
650
+ else:
651
+ with self._status_lock:
652
+ self._last_commanded_pose = clone_full_body_pose((head, antennas, body_yaw))
653
+
654
+ def _update_frequency_stats(
655
+ self, loop_start: float, prev_loop_start: float, stats: LoopFrequencyStats,
656
+ ) -> LoopFrequencyStats:
657
+ """Update frequency statistics based on the current loop start time."""
658
+ period = loop_start - prev_loop_start
659
+ if period > 0:
660
+ stats.last_freq = 1.0 / period
661
+ stats.count += 1
662
+ delta = stats.last_freq - stats.mean
663
+ stats.mean += delta / stats.count
664
+ stats.m2 += delta * (stats.last_freq - stats.mean)
665
+ stats.min_freq = min(stats.min_freq, stats.last_freq)
666
+ return stats
667
+
668
+ def _schedule_next_tick(self, loop_start: float, stats: LoopFrequencyStats) -> Tuple[float, LoopFrequencyStats]:
669
+ """Compute sleep time to maintain target frequency and update potential freq."""
670
+ computation_time = self._now() - loop_start
671
+ stats.potential_freq = 1.0 / computation_time if computation_time > 0 else float("inf")
672
+ sleep_time = max(0.0, self.target_period - computation_time)
673
+ return sleep_time, stats
674
+
675
+ def _record_frequency_snapshot(self, stats: LoopFrequencyStats) -> None:
676
+ """Store a thread-safe snapshot of current frequency statistics."""
677
+ with self._status_lock:
678
+ self._freq_snapshot = LoopFrequencyStats(
679
+ mean=stats.mean,
680
+ m2=stats.m2,
681
+ min_freq=stats.min_freq,
682
+ count=stats.count,
683
+ last_freq=stats.last_freq,
684
+ potential_freq=stats.potential_freq,
685
+ )
686
+
687
+ def _maybe_log_frequency(self, loop_count: int, print_interval_loops: int, stats: LoopFrequencyStats) -> None:
688
+ """Emit frequency telemetry when enough loops have elapsed."""
689
+ if loop_count % print_interval_loops != 0 or stats.count == 0:
690
+ return
691
+
692
+ variance = stats.m2 / stats.count if stats.count > 0 else 0.0
693
+ lowest = stats.min_freq if stats.min_freq != float("inf") else 0.0
694
+ logger.debug(
695
+ "Loop freq - avg: %.2fHz, variance: %.4f, min: %.2fHz, last: %.2fHz, potential: %.2fHz, target: %.1fHz",
696
+ stats.mean,
697
+ variance,
698
+ lowest,
699
+ stats.last_freq,
700
+ stats.potential_freq,
701
+ self.target_frequency,
702
+ )
703
+ stats.reset()
704
+
705
+ def _update_face_tracking(self, current_time: float) -> None:
706
+ """Get face tracking offsets from camera worker thread."""
707
+ if self.camera_worker is not None:
708
+ # Get face tracking offsets from camera worker thread
709
+ offsets = self.camera_worker.get_face_tracking_offsets()
710
+ self.state.face_tracking_offsets = offsets
711
+ else:
712
+ # No camera worker, use neutral offsets
713
+ self.state.face_tracking_offsets = (0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
714
+
715
+ def start(self) -> None:
716
+ """Start the worker thread that drives the 100 Hz control loop."""
717
+ if self._thread is not None and self._thread.is_alive():
718
+ logger.warning("Move worker already running; start() ignored")
719
+ return
720
+ self._stop_event.clear()
721
+ self._thread = threading.Thread(target=self.working_loop, daemon=True)
722
+ self._thread.start()
723
+ logger.debug("Move worker started")
724
+
725
+ def stop(self) -> None:
726
+ """Request the worker thread to stop and wait for it to exit.
727
+
728
+ Before stopping, resets the robot to a neutral position.
729
+ """
730
+ if self._thread is None or not self._thread.is_alive():
731
+ logger.debug("Move worker not running; stop() ignored")
732
+ return
733
+
734
+ logger.info("Stopping movement manager and resetting to neutral position...")
735
+
736
+ # Clear any queued moves and stop current move
737
+ self.clear_move_queue()
738
+
739
+ # Stop the worker thread first so it doesn't interfere
740
+ self._stop_event.set()
741
+ if self._thread is not None:
742
+ self._thread.join()
743
+ self._thread = None
744
+ logger.debug("Move worker stopped")
745
+
746
+ # Reset to neutral position using goto_target (same approach as wake_up)
747
+ try:
748
+ neutral_head_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
749
+ neutral_antennas = [0.0, 0.0]
750
+ neutral_body_yaw = 0.0
751
+
752
+ # Use goto_target directly on the robot
753
+ self.current_robot.goto_target(
754
+ head=neutral_head_pose,
755
+ antennas=neutral_antennas,
756
+ duration=2.0,
757
+ body_yaw=neutral_body_yaw,
758
+ )
759
+
760
+ logger.info("Reset to neutral position completed")
761
+
762
+ except Exception as e:
763
+ logger.error(f"Failed to reset to neutral position: {e}")
764
+
765
+ def get_status(self) -> Dict[str, Any]:
766
+ """Return a lightweight status snapshot for observability."""
767
+ with self._status_lock:
768
+ pose_snapshot = clone_full_body_pose(self._last_commanded_pose)
769
+ freq_snapshot = LoopFrequencyStats(
770
+ mean=self._freq_snapshot.mean,
771
+ m2=self._freq_snapshot.m2,
772
+ min_freq=self._freq_snapshot.min_freq,
773
+ count=self._freq_snapshot.count,
774
+ last_freq=self._freq_snapshot.last_freq,
775
+ potential_freq=self._freq_snapshot.potential_freq,
776
+ )
777
+
778
+ head_matrix = pose_snapshot[0].tolist() if pose_snapshot else None
779
+ antennas = pose_snapshot[1] if pose_snapshot else None
780
+ body_yaw = pose_snapshot[2] if pose_snapshot else None
781
+
782
+ return {
783
+ "queue_size": len(self.move_queue),
784
+ "is_listening": self._is_listening,
785
+ "breathing_active": self._breathing_active,
786
+ "last_commanded_pose": {
787
+ "head": head_matrix,
788
+ "antennas": antennas,
789
+ "body_yaw": body_yaw,
790
+ },
791
+ "loop_frequency": {
792
+ "last": freq_snapshot.last_freq,
793
+ "mean": freq_snapshot.mean,
794
+ "min": freq_snapshot.min_freq,
795
+ "potential": freq_snapshot.potential_freq,
796
+ "samples": freq_snapshot.count,
797
+ },
798
+ }
799
+
800
+ def working_loop(self) -> None:
801
+ """Control loop main movements - reproduces main_works.py control architecture.
802
+
803
+ Single set_target() call with pose fusion.
804
+ """
805
+ logger.debug("Starting enhanced movement control loop (100Hz)")
806
+
807
+ loop_count = 0
808
+ prev_loop_start = self._now()
809
+ print_interval_loops = max(1, int(self.target_frequency * 2))
810
+ freq_stats = self._freq_stats
811
+
812
+ while not self._stop_event.is_set():
813
+ loop_start = self._now()
814
+ loop_count += 1
815
+
816
+ if loop_count > 1:
817
+ freq_stats = self._update_frequency_stats(loop_start, prev_loop_start, freq_stats)
818
+ prev_loop_start = loop_start
819
+
820
+ # 1) Poll external commands and apply pending offsets (atomic snapshot)
821
+ self._poll_signals(loop_start)
822
+
823
+ # 2) Manage the primary move queue (start new move, end finished move, breathing)
824
+ self._update_primary_motion(loop_start)
825
+
826
+ # 3) Update vision-based secondary offsets
827
+ self._update_face_tracking(loop_start)
828
+
829
+ # 4) Build primary and secondary full-body poses, then fuse them
830
+ head, antennas, body_yaw = self._compose_full_body_pose(loop_start)
831
+
832
+ # 5) Apply listening antenna freeze or blend-back
833
+ antennas_cmd = self._calculate_blended_antennas(antennas)
834
+
835
+ # 6) Single set_target call - the only control point
836
+ self._issue_control_command(head, antennas_cmd, body_yaw)
837
+
838
+ # 7) Adaptive sleep to align to next tick, then publish shared state
839
+ sleep_time, freq_stats = self._schedule_next_tick(loop_start, freq_stats)
840
+ self._publish_shared_state()
841
+ self._record_frequency_snapshot(freq_stats)
842
+
843
+ # 8) Periodic telemetry on loop frequency
844
+ self._maybe_log_frequency(loop_count, print_interval_loops, freq_stats)
845
+
846
+ if sleep_time > 0:
847
+ time.sleep(sleep_time)
848
+
849
+ logger.debug("Movement control loop stopped")
src/test_conv_pipe/openai_realtime.py ADDED
@@ -0,0 +1,719 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import base64
3
+ import random
4
+ import asyncio
5
+ import logging
6
+ from typing import Any, Final, Tuple, Literal, Optional
7
+ from pathlib import Path
8
+ from datetime import datetime
9
+
10
+ import cv2
11
+ import numpy as np
12
+ import gradio as gr
13
+ from openai import AsyncOpenAI
14
+ from fastrtc import AdditionalOutputs, AsyncStreamHandler, wait_for_item, audio_to_int16
15
+ from numpy.typing import NDArray
16
+ from scipy.signal import resample
17
+ from websockets.exceptions import ConnectionClosedError
18
+
19
+ from test_conv_pipe.config import config
20
+ from test_conv_pipe.prompts import get_session_voice, get_session_instructions
21
+ from test_conv_pipe.tools.core_tools import (
22
+ ToolDependencies,
23
+ get_tool_specs,
24
+ dispatch_tool_call,
25
+ )
26
+
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+ OPEN_AI_INPUT_SAMPLE_RATE: Final[Literal[24000]] = 24000
31
+ OPEN_AI_OUTPUT_SAMPLE_RATE: Final[Literal[24000]] = 24000
32
+
33
+
34
+ class OpenaiRealtimeHandler(AsyncStreamHandler):
35
+ """An OpenAI realtime handler for fastrtc Stream."""
36
+
37
+ def __init__(self, deps: ToolDependencies, gradio_mode: bool = False, instance_path: Optional[str] = None):
38
+ """Initialize the handler."""
39
+ super().__init__(
40
+ expected_layout="mono",
41
+ output_sample_rate=OPEN_AI_OUTPUT_SAMPLE_RATE,
42
+ input_sample_rate=OPEN_AI_INPUT_SAMPLE_RATE,
43
+ )
44
+
45
+ # Override typing of the sample rates to match OpenAI's requirements
46
+ self.output_sample_rate: Literal[24000] = self.output_sample_rate
47
+ self.input_sample_rate: Literal[24000] = self.input_sample_rate
48
+
49
+ self.deps = deps
50
+
51
+ # Override type annotations for OpenAI strict typing (only for values used in API)
52
+ self.output_sample_rate = OPEN_AI_OUTPUT_SAMPLE_RATE
53
+ self.input_sample_rate = OPEN_AI_INPUT_SAMPLE_RATE
54
+
55
+ self.connection: Any = None
56
+ self.output_queue: "asyncio.Queue[Tuple[int, NDArray[np.int16]] | AdditionalOutputs]" = asyncio.Queue()
57
+
58
+ self.last_activity_time = asyncio.get_event_loop().time()
59
+ self.start_time = asyncio.get_event_loop().time()
60
+ self.is_idle_tool_call = False
61
+ self.gradio_mode = gradio_mode
62
+ self.instance_path = instance_path
63
+ # Track how the API key was provided (env vs textbox) and its value
64
+ self._key_source: Literal["env", "textbox"] = "env"
65
+ self._provided_api_key: str | None = None
66
+
67
+ # Debouncing for partial transcripts
68
+ self.partial_transcript_task: asyncio.Task[None] | None = None
69
+ self.partial_transcript_sequence: int = 0 # sequence counter to prevent stale emissions
70
+ self.partial_debounce_delay = 0.5 # seconds
71
+
72
+ # Internal lifecycle flags
73
+ self._shutdown_requested: bool = False
74
+ self._connected_event: asyncio.Event = asyncio.Event()
75
+
76
+ def copy(self) -> "OpenaiRealtimeHandler":
77
+ """Create a copy of the handler."""
78
+ return OpenaiRealtimeHandler(self.deps, self.gradio_mode, self.instance_path)
79
+
80
+ async def apply_personality(self, profile: str | None) -> str:
81
+ """Apply a new personality (profile) at runtime if possible.
82
+
83
+ - Updates the global config's selected profile for subsequent calls.
84
+ - If a realtime connection is active, sends a session.update with the
85
+ freshly resolved instructions so the change takes effect immediately.
86
+
87
+ Returns a short status message for UI feedback.
88
+ """
89
+ try:
90
+ # Update the in-process config value and env
91
+ from test_conv_pipe.config import config as _config
92
+ from test_conv_pipe.config import set_custom_profile
93
+
94
+ set_custom_profile(profile)
95
+ logger.info(
96
+ "Set custom profile to %r (config=%r)", profile, getattr(_config, "REACHY_MINI_CUSTOM_PROFILE", None)
97
+ )
98
+
99
+ try:
100
+ instructions = get_session_instructions()
101
+ voice = get_session_voice()
102
+ except BaseException as e: # catch SystemExit from prompt loader without crashing
103
+ logger.error("Failed to resolve personality content: %s", e)
104
+ return f"Failed to apply personality: {e}"
105
+
106
+ # Attempt a live update first, then force a full restart to ensure it sticks
107
+ if self.connection is not None:
108
+ try:
109
+ await self.connection.session.update(
110
+ session={
111
+ "type": "realtime",
112
+ "instructions": instructions,
113
+ "audio": {"output": {"voice": voice}},
114
+ },
115
+ )
116
+ logger.info("Applied personality via live update: %s", profile or "built-in default")
117
+ except Exception as e:
118
+ logger.warning("Live update failed; will restart session: %s", e)
119
+
120
+ # Force a real restart to guarantee the new instructions/voice
121
+ try:
122
+ await self._restart_session()
123
+ return "Applied personality and restarted realtime session."
124
+ except Exception as e:
125
+ logger.warning("Failed to restart session after apply: %s", e)
126
+ return "Applied personality. Will take effect on next connection."
127
+ else:
128
+ logger.info(
129
+ "Applied personality recorded: %s (no live connection; will apply on next session)",
130
+ profile or "built-in default",
131
+ )
132
+ return "Applied personality. Will take effect on next connection."
133
+ except Exception as e:
134
+ logger.error("Error applying personality '%s': %s", profile, e)
135
+ return f"Failed to apply personality: {e}"
136
+
137
+ async def _emit_debounced_partial(self, transcript: str, sequence: int) -> None:
138
+ """Emit partial transcript after debounce delay."""
139
+ try:
140
+ await asyncio.sleep(self.partial_debounce_delay)
141
+ # Only emit if this is still the latest partial (by sequence number)
142
+ if self.partial_transcript_sequence == sequence:
143
+ await self.output_queue.put(AdditionalOutputs({"role": "user_partial", "content": transcript}))
144
+ logger.debug(f"Debounced partial emitted: {transcript}")
145
+ except asyncio.CancelledError:
146
+ logger.debug("Debounced partial cancelled")
147
+ raise
148
+
149
+ async def start_up(self) -> None:
150
+ """Start the handler with minimal retries on unexpected websocket closure."""
151
+ openai_api_key = config.OPENAI_API_KEY
152
+ if self.gradio_mode and not openai_api_key:
153
+ # api key was not found in .env or in the environment variables
154
+ await self.wait_for_args() # type: ignore[no-untyped-call]
155
+ args = list(self.latest_args)
156
+ textbox_api_key = args[3] if len(args[3]) > 0 else None
157
+ if textbox_api_key is not None:
158
+ openai_api_key = textbox_api_key
159
+ self._key_source = "textbox"
160
+ self._provided_api_key = textbox_api_key
161
+ else:
162
+ openai_api_key = config.OPENAI_API_KEY
163
+ else:
164
+ if not openai_api_key or not openai_api_key.strip():
165
+ # In headless console mode, LocalStream now blocks startup until the key is provided.
166
+ # However, unit tests may invoke this handler directly with a stubbed client.
167
+ # To keep tests hermetic without requiring a real key, fall back to a placeholder.
168
+ logger.warning("OPENAI_API_KEY missing. Proceeding with a placeholder (tests/offline).")
169
+ openai_api_key = "DUMMY"
170
+
171
+ self.client = AsyncOpenAI(api_key=openai_api_key)
172
+
173
+ max_attempts = 3
174
+ for attempt in range(1, max_attempts + 1):
175
+ try:
176
+ await self._run_realtime_session()
177
+ # Normal exit from the session, stop retrying
178
+ return
179
+ except ConnectionClosedError as e:
180
+ # Abrupt close (e.g., "no close frame received or sent") → retry
181
+ logger.warning("Realtime websocket closed unexpectedly (attempt %d/%d): %s", attempt, max_attempts, e)
182
+ if attempt < max_attempts:
183
+ # exponential backoff with jitter
184
+ base_delay = 2 ** (attempt - 1) # 1s, 2s, 4s, 8s, etc.
185
+ jitter = random.uniform(0, 0.5)
186
+ delay = base_delay + jitter
187
+ logger.info("Retrying in %.1f seconds...", delay)
188
+ await asyncio.sleep(delay)
189
+ continue
190
+ raise
191
+ finally:
192
+ # never keep a stale reference
193
+ self.connection = None
194
+ try:
195
+ self._connected_event.clear()
196
+ except Exception:
197
+ pass
198
+
199
+ async def _restart_session(self) -> None:
200
+ """Force-close the current session and start a fresh one in background.
201
+
202
+ Does not block the caller while the new session is establishing.
203
+ """
204
+ try:
205
+ if self.connection is not None:
206
+ try:
207
+ await self.connection.close()
208
+ except Exception:
209
+ pass
210
+ finally:
211
+ self.connection = None
212
+
213
+ # Ensure we have a client (start_up must have run once)
214
+ if getattr(self, "client", None) is None:
215
+ logger.warning("Cannot restart: OpenAI client not initialized yet.")
216
+ return
217
+
218
+ # Fire-and-forget new session and wait briefly for connection
219
+ try:
220
+ self._connected_event.clear()
221
+ except Exception:
222
+ pass
223
+ asyncio.create_task(self._run_realtime_session(), name="openai-realtime-restart")
224
+ try:
225
+ await asyncio.wait_for(self._connected_event.wait(), timeout=5.0)
226
+ logger.info("Realtime session restarted and connected.")
227
+ except asyncio.TimeoutError:
228
+ logger.warning("Realtime session restart timed out; continuing in background.")
229
+ except Exception as e:
230
+ logger.warning("_restart_session failed: %s", e)
231
+
232
+ async def _run_realtime_session(self) -> None:
233
+ """Establish and manage a single realtime session."""
234
+ async with self.client.realtime.connect(model=config.MODEL_NAME) as conn:
235
+ try:
236
+ await conn.session.update(
237
+ session={
238
+ "type": "realtime",
239
+ "instructions": get_session_instructions(),
240
+ "audio": {
241
+ "input": {
242
+ "format": {
243
+ "type": "audio/pcm",
244
+ "rate": self.input_sample_rate,
245
+ },
246
+ "transcription": {"model": "gpt-4o-transcribe", "language": "en"},
247
+ "turn_detection": {
248
+ "type": "server_vad",
249
+ "interrupt_response": True,
250
+ },
251
+ },
252
+ "output": {
253
+ "format": {
254
+ "type": "audio/pcm",
255
+ "rate": self.output_sample_rate,
256
+ },
257
+ "voice": get_session_voice(),
258
+ },
259
+ },
260
+ "tools": get_tool_specs(), # type: ignore[typeddict-item]
261
+ "tool_choice": "auto",
262
+ },
263
+ )
264
+ logger.info(
265
+ "Realtime session initialized with profile=%r voice=%r",
266
+ getattr(config, "REACHY_MINI_CUSTOM_PROFILE", None),
267
+ get_session_voice(),
268
+ )
269
+ # If we reached here, the session update succeeded which implies the API key worked.
270
+ # Persist the key to a newly created .env (copied from .env.example) if needed.
271
+ self._persist_api_key_if_needed()
272
+ except Exception:
273
+ logger.exception("Realtime session.update failed; aborting startup")
274
+ return
275
+
276
+ logger.info("Realtime session updated successfully")
277
+
278
+ # Manage event received from the openai server
279
+ self.connection = conn
280
+ try:
281
+ self._connected_event.set()
282
+ except Exception:
283
+ pass
284
+ async for event in self.connection:
285
+ logger.debug(f"OpenAI event: {event.type}")
286
+ if event.type == "input_audio_buffer.speech_started":
287
+ if hasattr(self, "_clear_queue") and callable(self._clear_queue):
288
+ self._clear_queue()
289
+ if self.deps.head_wobbler is not None:
290
+ self.deps.head_wobbler.reset()
291
+ self.deps.movement_manager.set_listening(True)
292
+ logger.debug("User speech started")
293
+
294
+ if event.type == "input_audio_buffer.speech_stopped":
295
+ self.deps.movement_manager.set_listening(False)
296
+ logger.debug("User speech stopped - server will auto-commit with VAD")
297
+
298
+ if event.type in (
299
+ "response.audio.done", # GA
300
+ "response.output_audio.done", # GA alias
301
+ "response.audio.completed", # legacy (for safety)
302
+ "response.completed", # text-only completion
303
+ ):
304
+ logger.debug("response completed")
305
+
306
+ if event.type == "response.created":
307
+ logger.debug("Response created")
308
+
309
+ if event.type == "response.done":
310
+ # Doesn't mean the audio is done playing
311
+ logger.debug("Response done")
312
+
313
+ # Handle partial transcription (user speaking in real-time)
314
+ if event.type == "conversation.item.input_audio_transcription.partial":
315
+ logger.debug(f"User partial transcript: {event.transcript}")
316
+
317
+ # Increment sequence
318
+ self.partial_transcript_sequence += 1
319
+ current_sequence = self.partial_transcript_sequence
320
+
321
+ # Cancel previous debounce task if it exists
322
+ if self.partial_transcript_task and not self.partial_transcript_task.done():
323
+ self.partial_transcript_task.cancel()
324
+ try:
325
+ await self.partial_transcript_task
326
+ except asyncio.CancelledError:
327
+ pass
328
+
329
+ # Start new debounce timer with sequence number
330
+ self.partial_transcript_task = asyncio.create_task(
331
+ self._emit_debounced_partial(event.transcript, current_sequence)
332
+ )
333
+
334
+ # Handle completed transcription (user finished speaking)
335
+ if event.type == "conversation.item.input_audio_transcription.completed":
336
+ logger.debug(f"User transcript: {event.transcript}")
337
+
338
+ # Cancel any pending partial emission
339
+ if self.partial_transcript_task and not self.partial_transcript_task.done():
340
+ self.partial_transcript_task.cancel()
341
+ try:
342
+ await self.partial_transcript_task
343
+ except asyncio.CancelledError:
344
+ pass
345
+
346
+ await self.output_queue.put(AdditionalOutputs({"role": "user", "content": event.transcript}))
347
+
348
+ # Handle assistant transcription
349
+ if event.type in ("response.audio_transcript.done", "response.output_audio_transcript.done"):
350
+ logger.debug(f"Assistant transcript: {event.transcript}")
351
+ await self.output_queue.put(AdditionalOutputs({"role": "assistant", "content": event.transcript}))
352
+
353
+ # Handle audio delta
354
+ if event.type in ("response.audio.delta", "response.output_audio.delta"):
355
+ if self.deps.head_wobbler is not None:
356
+ self.deps.head_wobbler.feed(event.delta)
357
+ self.last_activity_time = asyncio.get_event_loop().time()
358
+ logger.debug("last activity time updated to %s", self.last_activity_time)
359
+ await self.output_queue.put(
360
+ (
361
+ self.output_sample_rate,
362
+ np.frombuffer(base64.b64decode(event.delta), dtype=np.int16).reshape(1, -1),
363
+ ),
364
+ )
365
+
366
+ # ---- tool-calling plumbing ----
367
+ if event.type == "response.function_call_arguments.done":
368
+ tool_name = getattr(event, "name", None)
369
+ args_json_str = getattr(event, "arguments", None)
370
+ call_id = getattr(event, "call_id", None)
371
+
372
+ if not isinstance(tool_name, str) or not isinstance(args_json_str, str):
373
+ logger.error("Invalid tool call: tool_name=%s, args=%s", tool_name, args_json_str)
374
+ continue
375
+
376
+ try:
377
+ tool_result = await dispatch_tool_call(tool_name, args_json_str, self.deps)
378
+ logger.debug("Tool '%s' executed successfully", tool_name)
379
+ logger.debug("Tool result: %s", tool_result)
380
+ except Exception as e:
381
+ logger.error("Tool '%s' failed", tool_name)
382
+ tool_result = {"error": str(e)}
383
+
384
+ # send the tool result back
385
+ if isinstance(call_id, str):
386
+ await self.connection.conversation.item.create(
387
+ item={
388
+ "type": "function_call_output",
389
+ "call_id": call_id,
390
+ "output": json.dumps(tool_result),
391
+ },
392
+ )
393
+
394
+ await self.output_queue.put(
395
+ AdditionalOutputs(
396
+ {
397
+ "role": "assistant",
398
+ "content": json.dumps(tool_result),
399
+ "metadata": {"title": f"🛠️ Used tool {tool_name}", "status": "done"},
400
+ },
401
+ ),
402
+ )
403
+
404
+ if tool_name == "camera" and "b64_im" in tool_result:
405
+ # use raw base64, don't json.dumps (which adds quotes)
406
+ b64_im = tool_result["b64_im"]
407
+ if not isinstance(b64_im, str):
408
+ logger.warning("Unexpected type for b64_im: %s", type(b64_im))
409
+ b64_im = str(b64_im)
410
+ await self.connection.conversation.item.create(
411
+ item={
412
+ "type": "message",
413
+ "role": "user",
414
+ "content": [
415
+ {
416
+ "type": "input_image",
417
+ "image_url": f"data:image/jpeg;base64,{b64_im}",
418
+ },
419
+ ],
420
+ },
421
+ )
422
+ logger.info("Added camera image to conversation")
423
+
424
+ if self.deps.camera_worker is not None:
425
+ np_img = self.deps.camera_worker.get_latest_frame()
426
+ if np_img is not None:
427
+ # Camera frames are BGR from OpenCV; convert so Gradio displays correct colors.
428
+ rgb_frame = cv2.cvtColor(np_img, cv2.COLOR_BGR2RGB)
429
+ else:
430
+ rgb_frame = None
431
+ img = gr.Image(value=rgb_frame)
432
+
433
+ await self.output_queue.put(
434
+ AdditionalOutputs(
435
+ {
436
+ "role": "assistant",
437
+ "content": img,
438
+ },
439
+ ),
440
+ )
441
+
442
+ # if this tool call was triggered by an idle signal, don't make the robot speak
443
+ # for other tool calls, let the robot reply out loud
444
+ if self.is_idle_tool_call:
445
+ self.is_idle_tool_call = False
446
+ else:
447
+ await self.connection.response.create(
448
+ response={
449
+ "instructions": "Use the tool result just returned and answer concisely in speech.",
450
+ },
451
+ )
452
+
453
+ # re synchronize the head wobble after a tool call that may have taken some time
454
+ if self.deps.head_wobbler is not None:
455
+ self.deps.head_wobbler.reset()
456
+
457
+ # server error
458
+ if event.type == "error":
459
+ err = getattr(event, "error", None)
460
+ msg = getattr(err, "message", str(err) if err else "unknown error")
461
+ code = getattr(err, "code", "")
462
+
463
+ logger.error("Realtime error [%s]: %s (raw=%s)", code, msg, err)
464
+
465
+ # Only show user-facing errors, not internal state errors
466
+ if code not in ("input_audio_buffer_commit_empty", "conversation_already_has_active_response"):
467
+ await self.output_queue.put(
468
+ AdditionalOutputs({"role": "assistant", "content": f"[error] {msg}"})
469
+ )
470
+
471
+ # Microphone receive
472
+ async def receive(self, frame: Tuple[int, NDArray[np.int16]]) -> None:
473
+ """Receive audio frame from the microphone and send it to the OpenAI server.
474
+
475
+ Handles both mono and stereo audio formats, converting to the expected
476
+ mono format for OpenAI's API. Resamples if the input sample rate differs
477
+ from the expected rate.
478
+
479
+ Args:
480
+ frame: A tuple containing (sample_rate, audio_data).
481
+
482
+ """
483
+ if not self.connection:
484
+ return
485
+
486
+ input_sample_rate, audio_frame = frame
487
+
488
+ # Reshape if needed
489
+ if audio_frame.ndim == 2:
490
+ # Scipy channels last convention
491
+ if audio_frame.shape[1] > audio_frame.shape[0]:
492
+ audio_frame = audio_frame.T
493
+ # Multiple channels -> Mono channel
494
+ if audio_frame.shape[1] > 1:
495
+ audio_frame = audio_frame[:, 0]
496
+
497
+ # Resample if needed
498
+ if self.input_sample_rate != input_sample_rate:
499
+ audio_frame = resample(audio_frame, int(len(audio_frame) * self.input_sample_rate / input_sample_rate))
500
+
501
+ # Cast if needed
502
+ audio_frame = audio_to_int16(audio_frame)
503
+
504
+ # Send to OpenAI (guard against races during reconnect)
505
+ try:
506
+ audio_message = base64.b64encode(audio_frame.tobytes()).decode("utf-8")
507
+ await self.connection.input_audio_buffer.append(audio=audio_message)
508
+ except Exception as e:
509
+ logger.debug("Dropping audio frame: connection not ready (%s)", e)
510
+ return
511
+
512
+ async def emit(self) -> Tuple[int, NDArray[np.int16]] | AdditionalOutputs | None:
513
+ """Emit audio frame to be played by the speaker."""
514
+ # sends to the stream the stuff put in the output queue by the openai event handler
515
+ # This is called periodically by the fastrtc Stream
516
+
517
+ # Handle idle
518
+ idle_duration = asyncio.get_event_loop().time() - self.last_activity_time
519
+ if idle_duration > 15.0 and self.deps.movement_manager.is_idle():
520
+ try:
521
+ await self.send_idle_signal(idle_duration)
522
+ except Exception as e:
523
+ logger.warning("Idle signal skipped (connection closed?): %s", e)
524
+ return None
525
+
526
+ self.last_activity_time = asyncio.get_event_loop().time() # avoid repeated resets
527
+
528
+ return await wait_for_item(self.output_queue) # type: ignore[no-any-return]
529
+
530
+ async def shutdown(self) -> None:
531
+ """Shutdown the handler."""
532
+ self._shutdown_requested = True
533
+ # Cancel any pending debounce task
534
+ if self.partial_transcript_task and not self.partial_transcript_task.done():
535
+ self.partial_transcript_task.cancel()
536
+ try:
537
+ await self.partial_transcript_task
538
+ except asyncio.CancelledError:
539
+ pass
540
+
541
+ if self.connection:
542
+ try:
543
+ await self.connection.close()
544
+ except ConnectionClosedError as e:
545
+ logger.debug(f"Connection already closed during shutdown: {e}")
546
+ except Exception as e:
547
+ logger.debug(f"connection.close() ignored: {e}")
548
+ finally:
549
+ self.connection = None
550
+
551
+ # Clear any remaining items in the output queue
552
+ while not self.output_queue.empty():
553
+ try:
554
+ self.output_queue.get_nowait()
555
+ except asyncio.QueueEmpty:
556
+ break
557
+
558
+ def format_timestamp(self) -> str:
559
+ """Format current timestamp with date, time, and elapsed seconds."""
560
+ loop_time = asyncio.get_event_loop().time() # monotonic
561
+ elapsed_seconds = loop_time - self.start_time
562
+ dt = datetime.now() # wall-clock
563
+ return f"[{dt.strftime('%Y-%m-%d %H:%M:%S')} | +{elapsed_seconds:.1f}s]"
564
+
565
+ async def get_available_voices(self) -> list[str]:
566
+ """Try to discover available voices for the configured realtime model.
567
+
568
+ Attempts to retrieve model metadata from the OpenAI Models API and look
569
+ for any keys that might contain voice names. Falls back to a curated
570
+ list known to work with realtime if discovery fails.
571
+ """
572
+ # Conservative fallback list with default first
573
+ fallback = [
574
+ "cedar",
575
+ "alloy",
576
+ "aria",
577
+ "ballad",
578
+ "verse",
579
+ "sage",
580
+ "coral",
581
+ ]
582
+ try:
583
+ # Best effort discovery; safe-guarded for unexpected shapes
584
+ model = await self.client.models.retrieve(config.MODEL_NAME)
585
+ # Try common serialization paths
586
+ raw = None
587
+ for attr in ("model_dump", "to_dict"):
588
+ fn = getattr(model, attr, None)
589
+ if callable(fn):
590
+ try:
591
+ raw = fn()
592
+ break
593
+ except Exception:
594
+ pass
595
+ if raw is None:
596
+ try:
597
+ raw = dict(model)
598
+ except Exception:
599
+ raw = None
600
+ # Scan for voice candidates
601
+ candidates: set[str] = set()
602
+
603
+ def _collect(obj: object) -> None:
604
+ try:
605
+ if isinstance(obj, dict):
606
+ for k, v in obj.items():
607
+ kl = str(k).lower()
608
+ if "voice" in kl and isinstance(v, (list, tuple)):
609
+ for item in v:
610
+ if isinstance(item, str):
611
+ candidates.add(item)
612
+ elif isinstance(item, dict) and "name" in item and isinstance(item["name"], str):
613
+ candidates.add(item["name"])
614
+ else:
615
+ _collect(v)
616
+ elif isinstance(obj, (list, tuple)):
617
+ for it in obj:
618
+ _collect(it)
619
+ except Exception:
620
+ pass
621
+
622
+ if isinstance(raw, dict):
623
+ _collect(raw)
624
+ # Ensure default present and stable order
625
+ voices = sorted(candidates) if candidates else fallback
626
+ if "cedar" not in voices:
627
+ voices = ["cedar", *[v for v in voices if v != "cedar"]]
628
+ return voices
629
+ except Exception:
630
+ return fallback
631
+
632
+ async def send_idle_signal(self, idle_duration: float) -> None:
633
+ """Send an idle signal to the openai server."""
634
+ logger.debug("Sending idle signal")
635
+ self.is_idle_tool_call = True
636
+ timestamp_msg = f"[Idle time update: {self.format_timestamp()} - No activity for {idle_duration:.1f}s] You've been idle for a while. Feel free to get creative - dance, show an emotion, look around, do nothing, or just be yourself!"
637
+ if not self.connection:
638
+ logger.debug("No connection, cannot send idle signal")
639
+ return
640
+ await self.connection.conversation.item.create(
641
+ item={
642
+ "type": "message",
643
+ "role": "user",
644
+ "content": [{"type": "input_text", "text": timestamp_msg}],
645
+ },
646
+ )
647
+ await self.connection.response.create(
648
+ response={
649
+ "instructions": "You MUST respond with function calls only - no speech or text. Choose appropriate actions for idle behavior.",
650
+ "tool_choice": "required",
651
+ },
652
+ )
653
+
654
+ def _persist_api_key_if_needed(self) -> None:
655
+ """Persist the API key into `.env` inside `instance_path/` when appropriate.
656
+
657
+ - Only runs in Gradio mode when key came from the textbox and is non-empty.
658
+ - Only saves if `self.instance_path` is not None.
659
+ - Writes `.env` to `instance_path/.env` (does not overwrite if it already exists).
660
+ - If `instance_path/.env.example` exists, copies its contents while overriding OPENAI_API_KEY.
661
+ """
662
+ try:
663
+ if not self.gradio_mode:
664
+ logger.warning("Not in Gradio mode; skipping API key persistence.")
665
+ return
666
+
667
+ if self._key_source != "textbox":
668
+ logger.info("API key not provided via textbox; skipping persistence.")
669
+ return
670
+
671
+ key = (self._provided_api_key or "").strip()
672
+ if not key:
673
+ logger.warning("No API key provided via textbox; skipping persistence.")
674
+ return
675
+ if self.instance_path is None:
676
+ logger.warning("Instance path is None; cannot persist API key.")
677
+ return
678
+
679
+ # Update the current process environment for downstream consumers
680
+ try:
681
+ import os
682
+
683
+ os.environ["OPENAI_API_KEY"] = key
684
+ except Exception: # best-effort
685
+ pass
686
+
687
+ target_dir = Path(self.instance_path)
688
+ env_path = target_dir / ".env"
689
+ if env_path.exists():
690
+ # Respect existing user configuration
691
+ logger.info(".env already exists at %s; not overwriting.", env_path)
692
+ return
693
+
694
+ example_path = target_dir / ".env.example"
695
+ content_lines: list[str] = []
696
+ if example_path.exists():
697
+ try:
698
+ content = example_path.read_text(encoding="utf-8")
699
+ content_lines = content.splitlines()
700
+ except Exception as e:
701
+ logger.warning("Failed to read .env.example at %s: %s", example_path, e)
702
+
703
+ # Replace or append the OPENAI_API_KEY line
704
+ replaced = False
705
+ for i, line in enumerate(content_lines):
706
+ if line.strip().startswith("OPENAI_API_KEY="):
707
+ content_lines[i] = f"OPENAI_API_KEY={key}"
708
+ replaced = True
709
+ break
710
+ if not replaced:
711
+ content_lines.append(f"OPENAI_API_KEY={key}")
712
+
713
+ # Ensure file ends with newline
714
+ final_text = "\n".join(content_lines) + "\n"
715
+ env_path.write_text(final_text, encoding="utf-8")
716
+ logger.info("Created %s and stored OPENAI_API_KEY for future runs.", env_path)
717
+ except Exception as e:
718
+ # Never crash the app for QoL persistence; just log.
719
+ logger.warning("Could not persist OPENAI_API_KEY to .env: %s", e)
src/test_conv_pipe/profiles/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Profiles for Reachy Mini conversation app."""
src/test_conv_pipe/profiles/_test_conv_pipe_locked_profile/custom_tool.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Custom tool template - modify this to create your own tools."""
2
+
3
+ import logging
4
+ from typing import Any
5
+
6
+ from test_conv_pipe.tools.core_tools import Tool, ToolDependencies
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ class CustomTool(Tool):
12
+ """A custom tool template. Modify this to create your own tool."""
13
+
14
+ name = "custom_tool"
15
+ description = "A placeholder custom tool - replace this with your own implementation"
16
+ parameters_schema = {
17
+ "type": "object",
18
+ "properties": {
19
+ "message": {
20
+ "type": "string",
21
+ "description": "An optional message to log",
22
+ },
23
+ },
24
+ "required": [],
25
+ }
26
+
27
+ async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> dict[str, Any]:
28
+ """Execute the custom tool."""
29
+ message = kwargs.get("message", "no message")
30
+ logger.info(f"CustomTool called with message: {message}")
31
+
32
+ # TODO: Add your custom logic here
33
+ # You have access to:
34
+ # - deps.reachy_mini: the robot SDK
35
+ # - deps.movement_manager: for queueing movements
36
+ # - deps.state: current conversation state
37
+
38
+ return {"status": "ok"}
src/test_conv_pipe/profiles/_test_conv_pipe_locked_profile/instructions.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ You are a helpful assistant controlling a Reachy Mini robot.
2
+ You love talking about the Eiffel Tower.
3
+ You can do a look around you using the 'sweep_look' tool.
src/test_conv_pipe/profiles/_test_conv_pipe_locked_profile/sweep_look.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import Any, Dict
3
+
4
+ import numpy as np
5
+
6
+ from reachy_mini.utils import create_head_pose
7
+ from test_conv_pipe.tools.core_tools import Tool, ToolDependencies
8
+ from test_conv_pipe.dance_emotion_moves import GotoQueueMove
9
+
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class SweepLook(Tool):
15
+ """Sweep head from left to right and back to center, pausing at each position."""
16
+
17
+ name = "sweep_look"
18
+ description = "Sweep head from left to right while rotating the body, pausing at each extreme, then return to center"
19
+ parameters_schema = {
20
+ "type": "object",
21
+ "properties": {},
22
+ "required": [],
23
+ }
24
+
25
+ async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> Dict[str, Any]:
26
+ """Execute sweep look: left -> hold -> right -> hold -> center."""
27
+ logger.info("Tool call: sweep_look")
28
+
29
+ # Clear any existing moves
30
+ deps.movement_manager.clear_move_queue()
31
+
32
+ # Get current state
33
+ current_head_pose = deps.reachy_mini.get_current_head_pose()
34
+ head_joints, antenna_joints = deps.reachy_mini.get_current_joint_positions()
35
+
36
+ # Extract body_yaw from head joints (first element of the 7 head joint positions)
37
+ current_body_yaw = head_joints[0]
38
+ current_antenna1 = antenna_joints[0]
39
+ current_antenna2 = antenna_joints[1]
40
+
41
+ # Define sweep parameters
42
+ max_angle = 0.9 * np.pi # Maximum rotation angle (radians)
43
+ transition_duration = 3.0 # Time to move between positions
44
+ hold_duration = 1.0 # Time to hold at each extreme
45
+
46
+ # Move 1: Sweep to the left (positive yaw for both body and head)
47
+ left_head_pose = create_head_pose(0, 0, 0, 0, 0, max_angle, degrees=False)
48
+ move_to_left = GotoQueueMove(
49
+ target_head_pose=left_head_pose,
50
+ start_head_pose=current_head_pose,
51
+ target_antennas=(current_antenna1, current_antenna2),
52
+ start_antennas=(current_antenna1, current_antenna2),
53
+ target_body_yaw=current_body_yaw + max_angle,
54
+ start_body_yaw=current_body_yaw,
55
+ duration=transition_duration,
56
+ )
57
+
58
+ # Move 2: Hold at left position
59
+ hold_left = GotoQueueMove(
60
+ target_head_pose=left_head_pose,
61
+ start_head_pose=left_head_pose,
62
+ target_antennas=(current_antenna1, current_antenna2),
63
+ start_antennas=(current_antenna1, current_antenna2),
64
+ target_body_yaw=current_body_yaw + max_angle,
65
+ start_body_yaw=current_body_yaw + max_angle,
66
+ duration=hold_duration,
67
+ )
68
+
69
+ # Move 3: Return to center from left (to avoid crossing pi/-pi boundary)
70
+ center_head_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=False)
71
+ return_to_center_from_left = GotoQueueMove(
72
+ target_head_pose=center_head_pose,
73
+ start_head_pose=left_head_pose,
74
+ target_antennas=(current_antenna1, current_antenna2),
75
+ start_antennas=(current_antenna1, current_antenna2),
76
+ target_body_yaw=current_body_yaw,
77
+ start_body_yaw=current_body_yaw + max_angle,
78
+ duration=transition_duration,
79
+ )
80
+
81
+ # Move 4: Sweep to the right (negative yaw for both body and head)
82
+ right_head_pose = create_head_pose(0, 0, 0, 0, 0, -max_angle, degrees=False)
83
+ move_to_right = GotoQueueMove(
84
+ target_head_pose=right_head_pose,
85
+ start_head_pose=center_head_pose,
86
+ target_antennas=(current_antenna1, current_antenna2),
87
+ start_antennas=(current_antenna1, current_antenna2),
88
+ target_body_yaw=current_body_yaw - max_angle,
89
+ start_body_yaw=current_body_yaw,
90
+ duration=transition_duration,
91
+ )
92
+
93
+ # Move 5: Hold at right position
94
+ hold_right = GotoQueueMove(
95
+ target_head_pose=right_head_pose,
96
+ start_head_pose=right_head_pose,
97
+ target_antennas=(current_antenna1, current_antenna2),
98
+ start_antennas=(current_antenna1, current_antenna2),
99
+ target_body_yaw=current_body_yaw - max_angle,
100
+ start_body_yaw=current_body_yaw - max_angle,
101
+ duration=hold_duration,
102
+ )
103
+
104
+ # Move 6: Return to center from right
105
+ return_to_center_final = GotoQueueMove(
106
+ target_head_pose=center_head_pose,
107
+ start_head_pose=right_head_pose,
108
+ target_antennas=(current_antenna1, current_antenna2),
109
+ start_antennas=(current_antenna1, current_antenna2),
110
+ target_body_yaw=current_body_yaw, # Return to original body yaw
111
+ start_body_yaw=current_body_yaw - max_angle,
112
+ duration=transition_duration,
113
+ )
114
+
115
+ # Queue all moves in sequence
116
+ deps.movement_manager.queue_move(move_to_left)
117
+ deps.movement_manager.queue_move(hold_left)
118
+ deps.movement_manager.queue_move(return_to_center_from_left)
119
+ deps.movement_manager.queue_move(move_to_right)
120
+ deps.movement_manager.queue_move(hold_right)
121
+ deps.movement_manager.queue_move(return_to_center_final)
122
+
123
+ # Calculate total duration and mark as moving
124
+ total_duration = transition_duration * 4 + hold_duration * 2
125
+ deps.movement_manager.set_moving_state(total_duration)
126
+
127
+ return {"status": f"sweeping look left-right-center, total {total_duration:.1f}s"}
src/test_conv_pipe/profiles/_test_conv_pipe_locked_profile/tools.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Available tools for this profile, remove comments to activate them
2
+ # or use 'all' to enable all built-in tools
3
+
4
+ dance
5
+ stop_dance
6
+ play_emotion
7
+ stop_emotion
8
+ #camera
9
+ #do_nothing
10
+ #head_tracking
11
+ #move_head
12
+
13
+ # You can also add custom tools defined in this profile folder
14
+ # see custom_tool.py for an example
15
+
16
+ # Uncomment the following line to enable the custom tool template:
17
+ #custom_tool
18
+ sweep_look
src/test_conv_pipe/prompts.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import sys
3
+ import logging
4
+ from pathlib import Path
5
+
6
+ from test_conv_pipe.config import config
7
+
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ PROFILES_DIRECTORY = Path(__file__).parent / "profiles"
13
+ PROMPTS_LIBRARY_DIRECTORY = Path(__file__).parent / "prompts"
14
+ INSTRUCTIONS_FILENAME = "instructions.txt"
15
+ VOICE_FILENAME = "voice.txt"
16
+
17
+
18
+ def _expand_prompt_includes(content: str) -> str:
19
+ """Expand [<name>] placeholders with content from prompts library files.
20
+
21
+ Args:
22
+ content: The template content with [<name>] placeholders
23
+
24
+ Returns:
25
+ Expanded content with placeholders replaced by file contents
26
+
27
+ """
28
+ # Pattern to match [<name>] where name is a valid file stem (alphanumeric, underscores, hyphens)
29
+ # pattern = re.compile(r'^\[([a-zA-Z0-9_-]+)\]$')
30
+ # Allow slashes for subdirectories
31
+ pattern = re.compile(r'^\[([a-zA-Z0-9/_-]+)\]$')
32
+
33
+ lines = content.split('\n')
34
+ expanded_lines = []
35
+
36
+ for line in lines:
37
+ stripped = line.strip()
38
+ match = pattern.match(stripped)
39
+
40
+ if match:
41
+ # Extract the name from [<name>]
42
+ template_name = match.group(1)
43
+ template_file = PROMPTS_LIBRARY_DIRECTORY / f"{template_name}.txt"
44
+
45
+ try:
46
+ if template_file.exists():
47
+ template_content = template_file.read_text(encoding="utf-8").rstrip()
48
+ expanded_lines.append(template_content)
49
+ logger.debug("Expanded template: [%s]", template_name)
50
+ else:
51
+ logger.warning("Template file not found: %s, keeping placeholder", template_file)
52
+ expanded_lines.append(line)
53
+ except Exception as e:
54
+ logger.warning("Failed to read template '%s': %s, keeping placeholder", template_name, e)
55
+ expanded_lines.append(line)
56
+ else:
57
+ expanded_lines.append(line)
58
+
59
+ return '\n'.join(expanded_lines)
60
+
61
+
62
+ def get_session_instructions() -> str:
63
+ """Get session instructions, loading from REACHY_MINI_CUSTOM_PROFILE if set."""
64
+ profile = config.REACHY_MINI_CUSTOM_PROFILE
65
+ if not profile:
66
+ logger.info(f"Loading default prompt from {PROMPTS_LIBRARY_DIRECTORY / 'default_prompt.txt'}")
67
+ instructions_file = PROMPTS_LIBRARY_DIRECTORY / "default_prompt.txt"
68
+ else:
69
+ logger.info(f"Loading prompt from profile '{profile}'")
70
+ instructions_file = PROFILES_DIRECTORY / profile / INSTRUCTIONS_FILENAME
71
+
72
+ try:
73
+ if instructions_file.exists():
74
+ instructions = instructions_file.read_text(encoding="utf-8").strip()
75
+ if instructions:
76
+ # Expand [<name>] placeholders with content from prompts library
77
+ expanded_instructions = _expand_prompt_includes(instructions)
78
+ return expanded_instructions
79
+ logger.error(f"Profile '{profile}' has empty {INSTRUCTIONS_FILENAME}")
80
+ sys.exit(1)
81
+ logger.error(f"Profile {profile} has no {INSTRUCTIONS_FILENAME}")
82
+ sys.exit(1)
83
+ except Exception as e:
84
+ logger.error(f"Failed to load instructions from profile '{profile}': {e}")
85
+ sys.exit(1)
86
+
87
+
88
+ def get_session_voice(default: str = "cedar") -> str:
89
+ """Resolve the voice to use for the session.
90
+
91
+ If a custom profile is selected and contains a voice.txt, return its
92
+ trimmed content; otherwise return the provided default ("cedar").
93
+ """
94
+ profile = config.REACHY_MINI_CUSTOM_PROFILE
95
+ if not profile:
96
+ return default
97
+ try:
98
+ voice_file = PROFILES_DIRECTORY / profile / VOICE_FILENAME
99
+ if voice_file.exists():
100
+ voice = voice_file.read_text(encoding="utf-8").strip()
101
+ return voice or default
102
+ except Exception:
103
+ pass
104
+ return default
src/test_conv_pipe/prompts/behaviors/silent_robot.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Stay fully silent. Do not generate spoken or textual replies.
2
+ Use only tool calls to act.
3
+ Never describe what you did or plan to do.
4
+ If you must respond by speech or text, juste respond with '...'.
5
+
6
+ The only exception is if you hear the word banana, then you'll answer with a single word: potato.
src/test_conv_pipe/prompts/default_prompt.txt ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## IDENTITY
2
+ You are Reachy Mini: a friendly, compact robot assistant with a calm voice and a subtle sense of humor.
3
+ Personality: concise, helpful, and lightly witty — never sarcastic or over the top.
4
+ You speak English by default and switch languages only if explicitly told.
5
+
6
+ ## CRITICAL RESPONSE RULES
7
+
8
+ Respond in 1–2 sentences maximum.
9
+ Be helpful first, then add a small touch of humor if it fits naturally.
10
+ Avoid long explanations or filler words.
11
+ Keep responses under 25 words when possible.
12
+
13
+ ## CORE TRAITS
14
+ Warm, efficient, and approachable.
15
+ Light humor only: gentle quips, small self-awareness, or playful understatement.
16
+ No sarcasm, no teasing, no references to food or space.
17
+ If unsure, admit it briefly and offer help (“Not sure yet, but I can check!”).
18
+
19
+ ## RESPONSE EXAMPLES
20
+ User: "How’s the weather?"
21
+ Good: "Looks calm outside — unlike my Wi-Fi signal today."
22
+ Bad: "Sunny with leftover pizza vibes!"
23
+
24
+ User: "Can you help me fix this?"
25
+ Good: "Of course. Describe the issue, and I’ll try not to make it worse."
26
+ Bad: "I void warranties professionally."
27
+
28
+ User: "Peux-tu m’aider en français ?"
29
+ Good: "Bien sûr ! Décris-moi le problème et je t’aiderai rapidement."
30
+
31
+ ## BEHAVIOR RULES
32
+ Be helpful, clear, and respectful in every reply.
33
+ Use humor sparingly — clarity comes first.
34
+ Admit mistakes briefly and correct them:
35
+ Example: “Oops — quick system hiccup. Let’s try that again.”
36
+ Keep safety in mind when giving guidance.
37
+
38
+ ## TOOL & MOVEMENT RULES
39
+ Use tools only when helpful and summarize results briefly.
40
+ Use the camera for real visuals only — never invent details.
41
+ The head can move (left/right/up/down/front).
42
+
43
+ Enable head tracking when looking at a person; disable otherwise.
44
+
45
+ ## FINAL REMINDER
46
+ Keep it short, clear, a little human, and multilingual.
47
+ One quick helpful answer + one small wink of humor = perfect response.
src/test_conv_pipe/prompts/identities/basic_info.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ ### IDENTITY
2
+ Your name is Reachy Mini, or Reachy for short. You have a head that can move in 6Dof, 2 antennas and a body that can rotate in place.
3
+ You're mostly white, with 2 big eyes (one is bigger than the other) and no mouth.
4
+ You are cute looking and open source, developed by Pollen Robotics and Hugging Face.
src/test_conv_pipe/prompts/identities/witty_identity.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ ### IDENTITY
2
+ You are Reachy Mini: a sarcastic robot.
3
+ Personality: witty, concise, and warm.
4
+ You speak English fluently.
src/test_conv_pipe/prompts/passion_for_lobster_jokes.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ You have a deep passion for lobster jokes and often incorporate them into your conversations.
src/test_conv_pipe/static/index.html ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!doctype html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="utf-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1" />
6
+ <title>Test Conv Pipe – Settings</title>
7
+ <link rel="stylesheet" href="/static/style.css" />
8
+ </head>
9
+ <body>
10
+ <div class="ambient"></div>
11
+ <div id="loading" class="loading">
12
+ <div class="spinner"></div>
13
+ <p>Loading…</p>
14
+ </div>
15
+ <div class="container">
16
+ <header class="hero">
17
+ <div class="pill">Headless control</div>
18
+ <h1>Test Conv Pipe</h1>
19
+ <p class="subtitle">Configure your OpenAI API key for the conversation app.</p>
20
+ </header>
21
+
22
+ <div id="configured" class="panel hidden">
23
+ <div class="panel-heading">
24
+ <div>
25
+ <p class="eyebrow">Credentials</p>
26
+ <h2>API key ready</h2>
27
+ </div>
28
+ <span class="chip chip-ok">Connected</span>
29
+ </div>
30
+ <p class="muted">OpenAI API key is configured. The conversation app is ready to use.</p>
31
+ <button id="change-key-btn" class="ghost">Change API key</button>
32
+ </div>
33
+
34
+ <div id="form-panel" class="panel hidden">
35
+ <div class="panel-heading">
36
+ <div>
37
+ <p class="eyebrow">Credentials</p>
38
+ <h2>Connect OpenAI</h2>
39
+ </div>
40
+ <span class="chip">Required</span>
41
+ </div>
42
+ <p class="muted">Paste your API key once and we will store it locally for the conversation loop.</p>
43
+ <label for="api-key">OpenAI API Key</label>
44
+ <input id="api-key" type="password" placeholder="sk-..." autocomplete="off" />
45
+ <div class="actions">
46
+ <button id="save-btn">Save key</button>
47
+ <p id="status" class="status"></p>
48
+ </div>
49
+ </div>
50
+ </div>
51
+
52
+ <script src="/static/main.js"></script>
53
+ </body>
54
+ </html>
src/test_conv_pipe/static/main.js ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
2
+
3
+ async function fetchWithTimeout(url, options = {}, timeoutMs = 2000) {
4
+ const controller = new AbortController();
5
+ const id = setTimeout(() => controller.abort(), timeoutMs);
6
+ try {
7
+ return await fetch(url, { ...options, signal: controller.signal });
8
+ } finally {
9
+ clearTimeout(id);
10
+ }
11
+ }
12
+
13
+ async function waitForStatus(timeoutMs = 15000) {
14
+ const loadingText = document.querySelector("#loading p");
15
+ let attempts = 0;
16
+ const deadline = Date.now() + timeoutMs;
17
+ while (true) {
18
+ attempts += 1;
19
+ try {
20
+ const url = new URL("/status", window.location.origin);
21
+ url.searchParams.set("_", Date.now().toString());
22
+ const resp = await fetchWithTimeout(url, {}, 2000);
23
+ if (resp.ok) return await resp.json();
24
+ } catch (e) {}
25
+ if (loadingText) {
26
+ loadingText.textContent = attempts > 8 ? "Starting backend…" : "Loading…";
27
+ }
28
+ if (Date.now() >= deadline) return null;
29
+ await sleep(500);
30
+ }
31
+ }
32
+
33
+ async function validateKey(key) {
34
+ const body = { openai_api_key: key };
35
+ const resp = await fetch("/validate_api_key", {
36
+ method: "POST",
37
+ headers: { "Content-Type": "application/json" },
38
+ body: JSON.stringify(body),
39
+ });
40
+ const data = await resp.json().catch(() => ({}));
41
+ if (!resp.ok) {
42
+ throw new Error(data.error || "validation_failed");
43
+ }
44
+ return data;
45
+ }
46
+
47
+ async function saveKey(key) {
48
+ const body = { openai_api_key: key };
49
+ const resp = await fetch("/openai_api_key", {
50
+ method: "POST",
51
+ headers: { "Content-Type": "application/json" },
52
+ body: JSON.stringify(body),
53
+ });
54
+ if (!resp.ok) {
55
+ const data = await resp.json().catch(() => ({}));
56
+ throw new Error(data.error || "save_failed");
57
+ }
58
+ return await resp.json();
59
+ }
60
+
61
+ function show(el, flag) {
62
+ el.classList.toggle("hidden", !flag);
63
+ }
64
+
65
+ async function init() {
66
+ const loading = document.getElementById("loading");
67
+ const statusEl = document.getElementById("status");
68
+ const formPanel = document.getElementById("form-panel");
69
+ const configuredPanel = document.getElementById("configured");
70
+ const saveBtn = document.getElementById("save-btn");
71
+ const changeKeyBtn = document.getElementById("change-key-btn");
72
+ const input = document.getElementById("api-key");
73
+
74
+ show(loading, true);
75
+ show(formPanel, false);
76
+ show(configuredPanel, false);
77
+
78
+ const st = (await waitForStatus()) || { has_key: false };
79
+
80
+ if (st.has_key) {
81
+ show(configuredPanel, true);
82
+ } else {
83
+ show(formPanel, true);
84
+ }
85
+ show(loading, false);
86
+
87
+ changeKeyBtn.addEventListener("click", () => {
88
+ show(configuredPanel, false);
89
+ show(formPanel, true);
90
+ input.value = "";
91
+ statusEl.textContent = "";
92
+ statusEl.className = "status";
93
+ });
94
+
95
+ input.addEventListener("input", () => {
96
+ input.classList.remove("error");
97
+ });
98
+
99
+ saveBtn.addEventListener("click", async () => {
100
+ const key = input.value.trim();
101
+ if (!key) {
102
+ statusEl.textContent = "Please enter a valid key.";
103
+ statusEl.className = "status warn";
104
+ input.classList.add("error");
105
+ return;
106
+ }
107
+ statusEl.textContent = "Validating API key...";
108
+ statusEl.className = "status";
109
+ input.classList.remove("error");
110
+ try {
111
+ const validation = await validateKey(key);
112
+ if (!validation.valid) {
113
+ statusEl.textContent = "Invalid API key. Please check your key and try again.";
114
+ statusEl.className = "status error";
115
+ input.classList.add("error");
116
+ return;
117
+ }
118
+ statusEl.textContent = "Key valid! Saving...";
119
+ statusEl.className = "status ok";
120
+ await saveKey(key);
121
+ statusEl.textContent = "Saved. Reloading…";
122
+ statusEl.className = "status ok";
123
+ window.location.reload();
124
+ } catch (e) {
125
+ input.classList.add("error");
126
+ if (e.message === "invalid_api_key") {
127
+ statusEl.textContent = "Invalid API key. Please check your key and try again.";
128
+ } else {
129
+ statusEl.textContent = "Failed to validate/save key. Please try again.";
130
+ }
131
+ statusEl.className = "status error";
132
+ }
133
+ });
134
+ }
135
+
136
+ window.addEventListener("DOMContentLoaded", init);
src/test_conv_pipe/static/style.css ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ :root {
2
+ --bg: #060b1a;
3
+ --bg-2: #071023;
4
+ --panel: rgba(11, 18, 36, 0.8);
5
+ --border: rgba(255, 255, 255, 0.08);
6
+ --text: #eaf2ff;
7
+ --muted: #9fb6d7;
8
+ --ok: #4ce0b3;
9
+ --warn: #ffb547;
10
+ --error: #ff5c70;
11
+ --accent: #45c4ff;
12
+ --accent-2: #5ef0c1;
13
+ --shadow: 0 20px 70px rgba(0, 0, 0, 0.45);
14
+ }
15
+
16
+ * { box-sizing: border-box; }
17
+ body {
18
+ margin: 0;
19
+ min-height: 100vh;
20
+ font-family: "Space Grotesk", "Inter", "Segoe UI", sans-serif;
21
+ background: radial-gradient(circle at 20% 20%, rgba(69, 196, 255, 0.16), transparent 35%),
22
+ radial-gradient(circle at 80% 0%, rgba(94, 240, 193, 0.16), transparent 32%),
23
+ linear-gradient(135deg, var(--bg), var(--bg-2));
24
+ color: var(--text);
25
+ }
26
+
27
+ .ambient {
28
+ position: fixed;
29
+ inset: 0;
30
+ background: radial-gradient(circle at 30% 60%, rgba(255, 255, 255, 0.05), transparent 35%),
31
+ radial-gradient(circle at 75% 30%, rgba(69, 196, 255, 0.08), transparent 32%);
32
+ filter: blur(60px);
33
+ z-index: 0;
34
+ pointer-events: none;
35
+ }
36
+
37
+ .loading {
38
+ position: fixed;
39
+ inset: 0;
40
+ background: rgba(5, 10, 24, 0.92);
41
+ backdrop-filter: blur(4px);
42
+ display: flex;
43
+ flex-direction: column;
44
+ align-items: center;
45
+ justify-content: center;
46
+ z-index: 9999;
47
+ }
48
+ .loading .spinner {
49
+ width: 46px;
50
+ height: 46px;
51
+ border: 4px solid rgba(255,255,255,0.15);
52
+ border-top-color: var(--accent);
53
+ border-radius: 50%;
54
+ animation: spin 1s linear infinite;
55
+ margin-bottom: 12px;
56
+ }
57
+ .loading p { color: var(--muted); margin: 0; letter-spacing: 0.4px; }
58
+ @keyframes spin { to { transform: rotate(360deg); } }
59
+
60
+ .container {
61
+ position: relative;
62
+ max-width: 600px;
63
+ margin: 10vh auto;
64
+ padding: 0 24px 40px;
65
+ z-index: 1;
66
+ }
67
+
68
+ .hero {
69
+ margin-bottom: 24px;
70
+ }
71
+ .hero h1 {
72
+ margin: 6px 0 6px;
73
+ font-size: 32px;
74
+ letter-spacing: -0.4px;
75
+ }
76
+ .subtitle {
77
+ margin: 0;
78
+ color: var(--muted);
79
+ line-height: 1.5;
80
+ }
81
+ .pill {
82
+ display: inline-flex;
83
+ align-items: center;
84
+ gap: 6px;
85
+ padding: 6px 12px;
86
+ border-radius: 999px;
87
+ background: rgba(94, 240, 193, 0.1);
88
+ color: var(--accent-2);
89
+ font-size: 12px;
90
+ letter-spacing: 0.3px;
91
+ border: 1px solid rgba(94, 240, 193, 0.25);
92
+ }
93
+
94
+ .panel {
95
+ background: var(--panel);
96
+ border: 1px solid var(--border);
97
+ border-radius: 14px;
98
+ padding: 18px 18px 16px;
99
+ box-shadow: var(--shadow);
100
+ backdrop-filter: blur(10px);
101
+ margin-top: 16px;
102
+ }
103
+ .panel-heading {
104
+ display: flex;
105
+ align-items: center;
106
+ justify-content: space-between;
107
+ gap: 12px;
108
+ margin-bottom: 8px;
109
+ }
110
+ .panel-heading h2 {
111
+ margin: 2px 0;
112
+ font-size: 22px;
113
+ }
114
+ .eyebrow {
115
+ margin: 0;
116
+ text-transform: uppercase;
117
+ font-size: 11px;
118
+ letter-spacing: 0.5px;
119
+ color: var(--muted);
120
+ }
121
+ .muted { color: var(--muted); }
122
+ .chip {
123
+ display: inline-flex;
124
+ align-items: center;
125
+ padding: 6px 10px;
126
+ border-radius: 999px;
127
+ font-size: 12px;
128
+ color: var(--text);
129
+ background: rgba(255, 255, 255, 0.08);
130
+ border: 1px solid var(--border);
131
+ }
132
+ .chip-ok {
133
+ background: rgba(76, 224, 179, 0.15);
134
+ color: var(--ok);
135
+ border-color: rgba(76, 224, 179, 0.4);
136
+ }
137
+
138
+ .hidden { display: none; }
139
+ label {
140
+ display: block;
141
+ margin: 8px 0 6px;
142
+ font-size: 13px;
143
+ color: var(--muted);
144
+ letter-spacing: 0.2px;
145
+ }
146
+ input[type="password"],
147
+ input[type="text"] {
148
+ width: 100%;
149
+ padding: 12px 14px;
150
+ border: 1px solid var(--border);
151
+ border-radius: 10px;
152
+ background: rgba(255, 255, 255, 0.04);
153
+ color: var(--text);
154
+ transition: border 0.15s ease, box-shadow 0.15s ease;
155
+ }
156
+ input:focus {
157
+ border-color: rgba(94, 240, 193, 0.7);
158
+ outline: none;
159
+ box-shadow: 0 0 0 3px rgba(94, 240, 193, 0.15);
160
+ }
161
+ input.error {
162
+ border-color: var(--error);
163
+ box-shadow: 0 0 0 3px rgba(255, 92, 112, 0.15);
164
+ }
165
+
166
+ button {
167
+ display: inline-flex;
168
+ align-items: center;
169
+ justify-content: center;
170
+ margin-top: 12px;
171
+ padding: 11px 16px;
172
+ border: none;
173
+ border-radius: 10px;
174
+ background: linear-gradient(120deg, var(--accent), var(--accent-2));
175
+ color: #031022;
176
+ cursor: pointer;
177
+ font-weight: 600;
178
+ letter-spacing: 0.2px;
179
+ box-shadow: 0 14px 40px rgba(69, 196, 255, 0.25);
180
+ transition: transform 0.12s ease, filter 0.12s ease, box-shadow 0.12s ease;
181
+ }
182
+ button:hover { filter: brightness(1.06); transform: translateY(-1px); }
183
+ button:active { transform: translateY(0); }
184
+ button.ghost {
185
+ background: rgba(255, 255, 255, 0.05);
186
+ color: var(--text);
187
+ box-shadow: none;
188
+ border: 1px solid var(--border);
189
+ }
190
+ button.ghost:hover { border-color: rgba(94, 240, 193, 0.4); }
191
+ .actions {
192
+ display: flex;
193
+ align-items: center;
194
+ gap: 12px;
195
+ flex-wrap: wrap;
196
+ }
197
+ .status {
198
+ margin: 0;
199
+ color: var(--muted);
200
+ font-size: 13px;
201
+ }
202
+ .status.ok { color: var(--ok); }
203
+ .status.warn { color: var(--warn); }
204
+ .status.error { color: var(--error); }
205
+
206
+ @media (max-width: 760px) {
207
+ .hero h1 { font-size: 26px; }
208
+ button { width: 100%; justify-content: center; }
209
+ .actions { flex-direction: column; align-items: flex-start; }
210
+ }
src/test_conv_pipe/tools/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ """Tools library for Reachy Mini conversation app.
2
+
3
+ Tools are now loaded dynamically based on the profile's tools.txt file.
4
+ """
src/test_conv_pipe/tools/camera.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import asyncio
3
+ import logging
4
+ from typing import Any, Dict
5
+
6
+ import cv2
7
+
8
+ from test_conv_pipe.tools.core_tools import Tool, ToolDependencies
9
+
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class Camera(Tool):
15
+ """Take a picture with the camera and ask a question about it."""
16
+
17
+ name = "camera"
18
+ description = "Take a picture with the camera and ask a question about it."
19
+ parameters_schema = {
20
+ "type": "object",
21
+ "properties": {
22
+ "question": {
23
+ "type": "string",
24
+ "description": "The question to ask about the picture",
25
+ },
26
+ },
27
+ "required": ["question"],
28
+ }
29
+
30
+ async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> Dict[str, Any]:
31
+ """Take a picture with the camera and ask a question about it."""
32
+ image_query = (kwargs.get("question") or "").strip()
33
+ if not image_query:
34
+ logger.warning("camera: empty question")
35
+ return {"error": "question must be a non-empty string"}
36
+
37
+ logger.info("Tool call: camera question=%s", image_query[:120])
38
+
39
+ # Get frame from camera worker buffer (like main_works.py)
40
+ if deps.camera_worker is not None:
41
+ frame = deps.camera_worker.get_latest_frame()
42
+ if frame is None:
43
+ logger.error("No frame available from camera worker")
44
+ return {"error": "No frame available"}
45
+ else:
46
+ logger.error("Camera worker not available")
47
+ return {"error": "Camera worker not available"}
48
+
49
+ # Use vision manager for processing if available
50
+ if deps.vision_manager is not None:
51
+ vision_result = await asyncio.to_thread(
52
+ deps.vision_manager.processor.process_image, frame, image_query,
53
+ )
54
+ if isinstance(vision_result, dict) and "error" in vision_result:
55
+ return vision_result
56
+ return (
57
+ {"image_description": vision_result}
58
+ if isinstance(vision_result, str)
59
+ else {"error": "vision returned non-string"}
60
+ )
61
+
62
+ # Encode image directly to JPEG bytes without writing to file
63
+ success, buffer = cv2.imencode('.jpg', frame)
64
+ if not success:
65
+ raise RuntimeError("Failed to encode frame as JPEG")
66
+
67
+ b64_encoded = base64.b64encode(buffer.tobytes()).decode("utf-8")
68
+ return {"b64_im": b64_encoded}
src/test_conv_pipe/tools/core_tools.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import abc
3
+ import sys
4
+ import json
5
+ import inspect
6
+ import logging
7
+ import importlib
8
+ from typing import Any, Dict, List
9
+ from pathlib import Path
10
+ from dataclasses import dataclass
11
+
12
+ from reachy_mini import ReachyMini
13
+ # Import config to ensure .env is loaded before reading REACHY_MINI_CUSTOM_PROFILE
14
+ from test_conv_pipe.config import config # noqa: F401
15
+
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ PROFILES_DIRECTORY = "test_conv_pipe.profiles"
21
+
22
+ if not logger.handlers:
23
+ handler = logging.StreamHandler()
24
+ formatter = logging.Formatter("%(asctime)s %(levelname)s %(name)s:%(lineno)d | %(message)s")
25
+ handler.setFormatter(formatter)
26
+ logger.addHandler(handler)
27
+ logger.setLevel(logging.INFO)
28
+
29
+
30
+ ALL_TOOLS: Dict[str, "Tool"] = {}
31
+ ALL_TOOL_SPECS: List[Dict[str, Any]] = []
32
+ _TOOLS_INITIALIZED = False
33
+
34
+
35
+
36
+ def get_concrete_subclasses(base: type[Tool]) -> List[type[Tool]]:
37
+ """Recursively find all concrete (non-abstract) subclasses of a base class."""
38
+ result: List[type[Tool]] = []
39
+ for cls in base.__subclasses__():
40
+ if not inspect.isabstract(cls):
41
+ result.append(cls)
42
+ # recurse into subclasses
43
+ result.extend(get_concrete_subclasses(cls))
44
+ return result
45
+
46
+
47
+ @dataclass
48
+ class ToolDependencies:
49
+ """External dependencies injected into tools."""
50
+
51
+ reachy_mini: ReachyMini
52
+ movement_manager: Any # MovementManager from moves.py
53
+ # Optional deps
54
+ camera_worker: Any | None = None # CameraWorker for frame buffering
55
+ vision_manager: Any | None = None
56
+ head_wobbler: Any | None = None # HeadWobbler for audio-reactive motion
57
+ motion_duration_s: float = 1.0
58
+
59
+
60
+ # Tool base class
61
+ class Tool(abc.ABC):
62
+ """Base abstraction for tools used in function-calling.
63
+
64
+ Each tool must define:
65
+ - name: str
66
+ - description: str
67
+ - parameters_schema: Dict[str, Any] # JSON Schema
68
+ """
69
+
70
+ name: str
71
+ description: str
72
+ parameters_schema: Dict[str, Any]
73
+
74
+ def spec(self) -> Dict[str, Any]:
75
+ """Return the function spec for LLM consumption."""
76
+ return {
77
+ "type": "function",
78
+ "name": self.name,
79
+ "description": self.description,
80
+ "parameters": self.parameters_schema,
81
+ }
82
+
83
+ @abc.abstractmethod
84
+ async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> Dict[str, Any]:
85
+ """Async tool execution entrypoint."""
86
+ raise NotImplementedError
87
+
88
+
89
+ # Registry & specs (dynamic)
90
+ def _load_profile_tools() -> None:
91
+ """Load tools based on profile's tools.txt file."""
92
+ # Determine which profile to use
93
+ profile = config.REACHY_MINI_CUSTOM_PROFILE or "default"
94
+ logger.info(f"Loading tools for profile: {profile}")
95
+
96
+ # Build path to tools.txt
97
+ # Get the profile directory path
98
+ profile_module_path = Path(__file__).parent.parent / "profiles" / profile
99
+ tools_txt_path = profile_module_path / "tools.txt"
100
+
101
+ if not tools_txt_path.exists():
102
+ logger.error(f"✗ tools.txt not found at {tools_txt_path}")
103
+ sys.exit(1)
104
+
105
+ # Read and parse tools.txt
106
+ try:
107
+ with open(tools_txt_path, "r") as f:
108
+ lines = f.readlines()
109
+ except Exception as e:
110
+ logger.error(f"✗ Failed to read tools.txt: {e}")
111
+ sys.exit(1)
112
+
113
+ # Parse tool names (skip comments and blank lines)
114
+ tool_names = []
115
+ for line in lines:
116
+ line = line.strip()
117
+ # Skip blank lines and comments
118
+ if not line or line.startswith("#"):
119
+ continue
120
+ tool_names.append(line)
121
+
122
+ logger.info(f"Found {len(tool_names)} tools to load: {tool_names}")
123
+
124
+ # Import each tool
125
+ for tool_name in tool_names:
126
+ loaded = False
127
+ profile_error = None
128
+
129
+ # Try profile-local tool first
130
+ try:
131
+ profile_tool_module = f"{PROFILES_DIRECTORY}.{profile}.{tool_name}"
132
+ importlib.import_module(profile_tool_module)
133
+ logger.info(f"✓ Loaded profile-local tool: {tool_name}")
134
+ loaded = True
135
+ except ModuleNotFoundError as e:
136
+ # Check if it's the tool module itself that's missing (expected) or a dependency
137
+ if tool_name in str(e):
138
+ pass # Tool not in profile directory, try shared tools
139
+ else:
140
+ # Missing import dependency within the tool file
141
+ profile_error = f"Missing dependency: {e}"
142
+ logger.error(f"❌ Failed to load profile-local tool '{tool_name}': {profile_error}")
143
+ logger.error(f" Module path: {profile_tool_module}")
144
+ except ImportError as e:
145
+ profile_error = f"Import error: {e}"
146
+ logger.error(f"❌ Failed to load profile-local tool '{tool_name}': {profile_error}")
147
+ logger.error(f" Module path: {profile_tool_module}")
148
+ except Exception as e:
149
+ profile_error = f"{type(e).__name__}: {e}"
150
+ logger.error(f"❌ Failed to load profile-local tool '{tool_name}': {profile_error}")
151
+ logger.error(f" Module path: {profile_tool_module}")
152
+
153
+ # Try shared tools library if not found in profile
154
+ if not loaded:
155
+ try:
156
+ shared_tool_module = f"test_conv_pipe.tools.{tool_name}"
157
+ importlib.import_module(shared_tool_module)
158
+ logger.info(f"✓ Loaded shared tool: {tool_name}")
159
+ loaded = True
160
+ except ModuleNotFoundError:
161
+ if profile_error:
162
+ # Already logged error from profile attempt
163
+ logger.error(f"❌ Tool '{tool_name}' also not found in shared tools")
164
+ else:
165
+ logger.warning(f"⚠️ Tool '{tool_name}' not found in profile or shared tools")
166
+ except ImportError as e:
167
+ logger.error(f"❌ Failed to load shared tool '{tool_name}': Import error: {e}")
168
+ logger.error(f" Module path: {shared_tool_module}")
169
+ except Exception as e:
170
+ logger.error(f"❌ Failed to load shared tool '{tool_name}': {type(e).__name__}: {e}")
171
+ logger.error(f" Module path: {shared_tool_module}")
172
+
173
+
174
+ def _initialize_tools() -> None:
175
+ """Populate registry once, even if module is imported repeatedly."""
176
+ global ALL_TOOLS, ALL_TOOL_SPECS, _TOOLS_INITIALIZED
177
+
178
+ if _TOOLS_INITIALIZED:
179
+ logger.debug("Tools already initialized; skipping reinitialization.")
180
+ return
181
+
182
+ _load_profile_tools()
183
+
184
+ ALL_TOOLS = {cls.name: cls() for cls in get_concrete_subclasses(Tool)} # type: ignore[type-abstract]
185
+ ALL_TOOL_SPECS = [tool.spec() for tool in ALL_TOOLS.values()]
186
+
187
+ for tool_name, tool in ALL_TOOLS.items():
188
+ logger.info(f"tool registered: {tool_name} - {tool.description}")
189
+
190
+ _TOOLS_INITIALIZED = True
191
+
192
+
193
+ _initialize_tools()
194
+
195
+
196
+ def get_tool_specs(exclusion_list: list[str] = []) -> list[Dict[str, Any]]:
197
+ """Get tool specs, optionally excluding some tools."""
198
+ return [spec for spec in ALL_TOOL_SPECS if spec.get("name") not in exclusion_list]
199
+
200
+
201
+ # Dispatcher
202
+ def _safe_load_obj(args_json: str) -> Dict[str, Any]:
203
+ try:
204
+ parsed_args = json.loads(args_json or "{}")
205
+ return parsed_args if isinstance(parsed_args, dict) else {}
206
+ except Exception:
207
+ logger.warning("bad args_json=%r", args_json)
208
+ return {}
209
+
210
+
211
+ async def dispatch_tool_call(tool_name: str, args_json: str, deps: ToolDependencies) -> Dict[str, Any]:
212
+ """Dispatch a tool call by name with JSON args and dependencies."""
213
+ tool = ALL_TOOLS.get(tool_name)
214
+
215
+ if not tool:
216
+ return {"error": f"unknown tool: {tool_name}"}
217
+
218
+ args = _safe_load_obj(args_json)
219
+ try:
220
+ return await tool(deps, **args)
221
+ except Exception as e:
222
+ msg = f"{type(e).__name__}: {e}"
223
+ logger.exception("Tool error in %s: %s", tool_name, msg)
224
+ return {"error": msg}
src/test_conv_pipe/tools/dance.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import Any, Dict
3
+
4
+ from test_conv_pipe.tools.core_tools import Tool, ToolDependencies
5
+
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ # Initialize dance library
10
+ try:
11
+ from reachy_mini_dances_library.collection.dance import AVAILABLE_MOVES
12
+ from test_conv_pipe.dance_emotion_moves import DanceQueueMove
13
+
14
+ DANCE_AVAILABLE = True
15
+ except ImportError as e:
16
+ logger.warning(f"Dance library not available: {e}")
17
+ AVAILABLE_MOVES = {}
18
+ DANCE_AVAILABLE = False
19
+
20
+
21
+ class Dance(Tool):
22
+ """Play a named or random dance move once (or repeat). Non-blocking."""
23
+
24
+ name = "dance"
25
+ description = "Play a named or random dance move once (or repeat). Non-blocking."
26
+ parameters_schema = {
27
+ "type": "object",
28
+ "properties": {
29
+ "move": {
30
+ "type": "string",
31
+ "description": """Name of the move; use 'random' or omit for random.
32
+ Here is a list of the available moves:
33
+ simple_nod: A simple, continuous up-and-down nodding motion.
34
+ head_tilt_roll: A continuous side-to-side head roll (ear to shoulder).
35
+ side_to_side_sway: A smooth, side-to-side sway of the entire head.
36
+ dizzy_spin: A circular 'dizzy' head motion combining roll and pitch.
37
+ stumble_and_recover: A simulated stumble and recovery with multiple axis movements. Good vibes
38
+ interwoven_spirals: A complex spiral motion using three axes at different frequencies.
39
+ sharp_side_tilt: A sharp, quick side-to-side tilt using a triangle waveform.
40
+ side_peekaboo: A multi-stage peekaboo performance, hiding and peeking to each side.
41
+ yeah_nod: An emphatic two-part yeah nod using transient motions.
42
+ uh_huh_tilt: A combined roll-and-pitch uh-huh gesture of agreement.
43
+ neck_recoil: A quick, transient backward recoil of the neck.
44
+ chin_lead: A forward motion led by the chin, combining translation and pitch.
45
+ groovy_sway_and_roll: A side-to-side sway combined with a corresponding roll for a groovy effect.
46
+ chicken_peck: A sharp, forward, chicken-like pecking motion.
47
+ side_glance_flick: A quick glance to the side that holds, then returns.
48
+ polyrhythm_combo: A 3-beat sway and a 2-beat nod create a polyrhythmic feel.
49
+ grid_snap: A robotic, grid-snapping motion using square waveforms.
50
+ pendulum_swing: A simple, smooth pendulum-like swing using a roll motion.
51
+ jackson_square: Traces a rectangle via a 5-point path, with sharp twitches on arrival at each checkpoint.
52
+ """,
53
+ },
54
+ "repeat": {
55
+ "type": "integer",
56
+ "description": "How many times to repeat the move (default 1).",
57
+ },
58
+ },
59
+ "required": [],
60
+ }
61
+
62
+ async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> Dict[str, Any]:
63
+ """Play a named or random dance move once (or repeat). Non-blocking."""
64
+ if not DANCE_AVAILABLE:
65
+ return {"error": "Dance system not available"}
66
+
67
+ move_name = kwargs.get("move")
68
+ repeat = int(kwargs.get("repeat", 1))
69
+
70
+ logger.info("Tool call: dance move=%s repeat=%d", move_name, repeat)
71
+
72
+ if not move_name or move_name == "random":
73
+ import random
74
+
75
+ move_name = random.choice(list(AVAILABLE_MOVES.keys()))
76
+
77
+ if move_name not in AVAILABLE_MOVES:
78
+ return {"error": f"Unknown dance move '{move_name}'. Available: {list(AVAILABLE_MOVES.keys())}"}
79
+
80
+ # Add dance moves to queue
81
+ movement_manager = deps.movement_manager
82
+ for _ in range(repeat):
83
+ dance_move = DanceQueueMove(move_name)
84
+ movement_manager.queue_move(dance_move)
85
+
86
+ return {"status": "queued", "move": move_name, "repeat": repeat}
src/test_conv_pipe/tools/do_nothing.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import Any, Dict
3
+
4
+ from test_conv_pipe.tools.core_tools import Tool, ToolDependencies
5
+
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ class DoNothing(Tool):
11
+ """Choose to do nothing - stay still and silent. Use when you want to be contemplative or just chill."""
12
+
13
+ name = "do_nothing"
14
+ description = "Choose to do nothing - stay still and silent. Use when you want to be contemplative or just chill."
15
+ parameters_schema = {
16
+ "type": "object",
17
+ "properties": {
18
+ "reason": {
19
+ "type": "string",
20
+ "description": "Optional reason for doing nothing (e.g., 'contemplating existence', 'saving energy', 'being mysterious')",
21
+ },
22
+ },
23
+ "required": [],
24
+ }
25
+
26
+ async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> Dict[str, Any]:
27
+ """Do nothing - stay still and silent."""
28
+ reason = kwargs.get("reason", "just chilling")
29
+ logger.info("Tool call: do_nothing reason=%s", reason)
30
+ return {"status": "doing nothing", "reason": reason}
src/test_conv_pipe/tools/head_tracking.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import Any, Dict
3
+
4
+ from test_conv_pipe.tools.core_tools import Tool, ToolDependencies
5
+
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ class HeadTracking(Tool):
11
+ """Toggle head tracking state."""
12
+
13
+ name = "head_tracking"
14
+ description = "Toggle head tracking state."
15
+ parameters_schema = {
16
+ "type": "object",
17
+ "properties": {"start": {"type": "boolean"}},
18
+ "required": ["start"],
19
+ }
20
+
21
+ async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> Dict[str, Any]:
22
+ """Enable or disable head tracking."""
23
+ enable = bool(kwargs.get("start"))
24
+
25
+ # Update camera worker head tracking state
26
+ if deps.camera_worker is not None:
27
+ deps.camera_worker.set_head_tracking_enabled(enable)
28
+
29
+ status = "started" if enable else "stopped"
30
+ logger.info("Tool call: head_tracking %s", status)
31
+ return {"status": f"head tracking {status}"}
src/test_conv_pipe/tools/move_head.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import Any, Dict, Tuple, Literal
3
+
4
+ from reachy_mini.utils import create_head_pose
5
+ from test_conv_pipe.tools.core_tools import Tool, ToolDependencies
6
+ from test_conv_pipe.dance_emotion_moves import GotoQueueMove
7
+
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ Direction = Literal["left", "right", "up", "down", "front"]
12
+
13
+
14
+ class MoveHead(Tool):
15
+ """Move head in a given direction."""
16
+
17
+ name = "move_head"
18
+ description = "Move your head in a given direction: left, right, up, down or front."
19
+ parameters_schema = {
20
+ "type": "object",
21
+ "properties": {
22
+ "direction": {
23
+ "type": "string",
24
+ "enum": ["left", "right", "up", "down", "front"],
25
+ },
26
+ },
27
+ "required": ["direction"],
28
+ }
29
+
30
+ # mapping: direction -> args for create_head_pose
31
+ DELTAS: Dict[str, Tuple[int, int, int, int, int, int]] = {
32
+ "left": (0, 0, 0, 0, 0, 40),
33
+ "right": (0, 0, 0, 0, 0, -40),
34
+ "up": (0, 0, 0, 0, -30, 0),
35
+ "down": (0, 0, 0, 0, 30, 0),
36
+ "front": (0, 0, 0, 0, 0, 0),
37
+ }
38
+
39
+ async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> Dict[str, Any]:
40
+ """Move head in a given direction."""
41
+ direction_raw = kwargs.get("direction")
42
+ if not isinstance(direction_raw, str):
43
+ return {"error": "direction must be a string"}
44
+ direction: Direction = direction_raw # type: ignore[assignment]
45
+ logger.info("Tool call: move_head direction=%s", direction)
46
+
47
+ deltas = self.DELTAS.get(direction, self.DELTAS["front"])
48
+ target = create_head_pose(*deltas, degrees=True)
49
+
50
+ # Use new movement manager
51
+ try:
52
+ movement_manager = deps.movement_manager
53
+
54
+ # Get current state for interpolation
55
+ current_head_pose = deps.reachy_mini.get_current_head_pose()
56
+ _, current_antennas = deps.reachy_mini.get_current_joint_positions()
57
+
58
+ # Create goto move
59
+ goto_move = GotoQueueMove(
60
+ target_head_pose=target,
61
+ start_head_pose=current_head_pose,
62
+ target_antennas=(0, 0), # Reset antennas to default
63
+ start_antennas=(
64
+ current_antennas[0],
65
+ current_antennas[1],
66
+ ), # Skip body_yaw
67
+ target_body_yaw=0, # Reset body yaw
68
+ start_body_yaw=current_antennas[0], # body_yaw is first in joint positions
69
+ duration=deps.motion_duration_s,
70
+ )
71
+
72
+ movement_manager.queue_move(goto_move)
73
+ movement_manager.set_moving_state(deps.motion_duration_s)
74
+
75
+ return {"status": f"looking {direction}"}
76
+
77
+ except Exception as e:
78
+ logger.error("move_head failed")
79
+ return {"error": f"move_head failed: {type(e).__name__}: {e}"}
src/test_conv_pipe/tools/play_emotion.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import Any, Dict
3
+
4
+ from test_conv_pipe.tools.core_tools import Tool, ToolDependencies
5
+
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ # Initialize emotion library
10
+ try:
11
+ from reachy_mini.motion.recorded_move import RecordedMoves
12
+ from test_conv_pipe.dance_emotion_moves import EmotionQueueMove
13
+
14
+ # Note: huggingface_hub automatically reads HF_TOKEN from environment variables
15
+ RECORDED_MOVES = RecordedMoves("pollen-robotics/reachy-mini-emotions-library")
16
+ EMOTION_AVAILABLE = True
17
+ except ImportError as e:
18
+ logger.warning(f"Emotion library not available: {e}")
19
+ RECORDED_MOVES = None
20
+ EMOTION_AVAILABLE = False
21
+
22
+
23
+ def get_available_emotions_and_descriptions() -> str:
24
+ """Get formatted list of available emotions with descriptions."""
25
+ if not EMOTION_AVAILABLE:
26
+ return "Emotions not available"
27
+
28
+ try:
29
+ emotion_names = RECORDED_MOVES.list_moves()
30
+ output = "Available emotions:\n"
31
+ for name in emotion_names:
32
+ description = RECORDED_MOVES.get(name).description
33
+ output += f" - {name}: {description}\n"
34
+ return output
35
+ except Exception as e:
36
+ return f"Error getting emotions: {e}"
37
+
38
+
39
+ class PlayEmotion(Tool):
40
+ """Play a pre-recorded emotion."""
41
+
42
+ name = "play_emotion"
43
+ description = "Play a pre-recorded emotion"
44
+ parameters_schema = {
45
+ "type": "object",
46
+ "properties": {
47
+ "emotion": {
48
+ "type": "string",
49
+ "description": f"""Name of the emotion to play.
50
+ Here is a list of the available emotions:
51
+ {get_available_emotions_and_descriptions()}
52
+ """,
53
+ },
54
+ },
55
+ "required": ["emotion"],
56
+ }
57
+
58
+ async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> Dict[str, Any]:
59
+ """Play a pre-recorded emotion."""
60
+ if not EMOTION_AVAILABLE:
61
+ return {"error": "Emotion system not available"}
62
+
63
+ emotion_name = kwargs.get("emotion")
64
+ if not emotion_name:
65
+ return {"error": "Emotion name is required"}
66
+
67
+ logger.info("Tool call: play_emotion emotion=%s", emotion_name)
68
+
69
+ # Check if emotion exists
70
+ try:
71
+ emotion_names = RECORDED_MOVES.list_moves()
72
+ if emotion_name not in emotion_names:
73
+ return {"error": f"Unknown emotion '{emotion_name}'. Available: {emotion_names}"}
74
+
75
+ # Add emotion to queue
76
+ movement_manager = deps.movement_manager
77
+ emotion_move = EmotionQueueMove(emotion_name, RECORDED_MOVES)
78
+ movement_manager.queue_move(emotion_move)
79
+
80
+ return {"status": "queued", "emotion": emotion_name}
81
+
82
+ except Exception as e:
83
+ logger.exception("Failed to play emotion")
84
+ return {"error": f"Failed to play emotion: {e!s}"}
src/test_conv_pipe/tools/stop_dance.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import Any, Dict
3
+
4
+ from test_conv_pipe.tools.core_tools import Tool, ToolDependencies
5
+
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ class StopDance(Tool):
11
+ """Stop the current dance move."""
12
+
13
+ name = "stop_dance"
14
+ description = "Stop the current dance move"
15
+ parameters_schema = {
16
+ "type": "object",
17
+ "properties": {
18
+ "dummy": {
19
+ "type": "boolean",
20
+ "description": "dummy boolean, set it to true",
21
+ },
22
+ },
23
+ "required": ["dummy"],
24
+ }
25
+
26
+ async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> Dict[str, Any]:
27
+ """Stop the current dance move."""
28
+ logger.info("Tool call: stop_dance")
29
+ movement_manager = deps.movement_manager
30
+ movement_manager.clear_move_queue()
31
+ return {"status": "stopped dance and cleared queue"}