robertkeus commited on
Commit
f7c0a2b
·
verified ·
1 Parent(s): 1df7311

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .env.example +15 -0
  2. .gitattributes +44 -35
  3. .github/pull_request_template.md +36 -0
  4. .github/workflows/lint.yml +10 -0
  5. .github/workflows/tests.yml +74 -0
  6. .github/workflows/typecheck.yml +29 -0
  7. .gitignore +61 -0
  8. LICENSE +201 -0
  9. README.md +251 -4
  10. app.py +194 -0
  11. deploy_to_reachy.sh +42 -0
  12. docs/assets/conversation_app_arch.svg +0 -0
  13. docs/assets/reachy_mini_dance.gif +3 -0
  14. docs/scheme.mmd +58 -0
  15. index.html +125 -0
  16. pyproject.toml +123 -0
  17. requirements.txt +6 -0
  18. src/reachy_mini_conversation_app/__init__.py +1 -0
  19. src/reachy_mini_conversation_app/audio/__init__.py +1 -0
  20. src/reachy_mini_conversation_app/audio/head_wobbler.py +181 -0
  21. src/reachy_mini_conversation_app/audio/speech_tapper.py +268 -0
  22. src/reachy_mini_conversation_app/camera_worker.py +241 -0
  23. src/reachy_mini_conversation_app/config.py +66 -0
  24. src/reachy_mini_conversation_app/console.py +499 -0
  25. src/reachy_mini_conversation_app/dance_emotion_moves.py +154 -0
  26. src/reachy_mini_conversation_app/gradio_personality.py +301 -0
  27. src/reachy_mini_conversation_app/headless_personality.py +102 -0
  28. src/reachy_mini_conversation_app/headless_personality_ui.py +276 -0
  29. src/reachy_mini_conversation_app/images/reachymini_avatar.png +3 -0
  30. src/reachy_mini_conversation_app/images/user_avatar.png +3 -0
  31. src/reachy_mini_conversation_app/main.py +242 -0
  32. src/reachy_mini_conversation_app/mcp/__init__.py +8 -0
  33. src/reachy_mini_conversation_app/mcp/client.py +193 -0
  34. src/reachy_mini_conversation_app/mcp/cursor_bridge.py +344 -0
  35. src/reachy_mini_conversation_app/mcp/figma.py +288 -0
  36. src/reachy_mini_conversation_app/moves.py +849 -0
  37. src/reachy_mini_conversation_app/openai_realtime.py +754 -0
  38. src/reachy_mini_conversation_app/profiles/__init__.py +1 -0
  39. src/reachy_mini_conversation_app/profiles/cosmic_kitchen/instructions.txt +49 -0
  40. src/reachy_mini_conversation_app/profiles/cosmic_kitchen/tools.txt +8 -0
  41. src/reachy_mini_conversation_app/profiles/default/instructions.txt +1 -0
  42. src/reachy_mini_conversation_app/profiles/default/tools.txt +11 -0
  43. src/reachy_mini_conversation_app/profiles/designer/instructions.txt +54 -0
  44. src/reachy_mini_conversation_app/profiles/designer/tools.txt +18 -0
  45. src/reachy_mini_conversation_app/profiles/example/instructions.txt +3 -0
  46. src/reachy_mini_conversation_app/profiles/example/sweep_look.py +127 -0
  47. src/reachy_mini_conversation_app/profiles/example/tools.txt +13 -0
  48. src/reachy_mini_conversation_app/profiles/mars_rover/instructions.txt +25 -0
  49. src/reachy_mini_conversation_app/profiles/mars_rover/tools.txt +8 -0
  50. src/reachy_mini_conversation_app/profiles/short_bored_teenager/instructions.txt +1 -0
.env.example ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ OPENAI_API_KEY=
2
+ MODEL_NAME="gpt-realtime"
3
+
4
+ # Local vision model (only used with --local-vision CLI flag)
5
+ # By default, vision is handled by gpt-realtime when the camera tool is used
6
+ LOCAL_VISION_MODEL=HuggingFaceTB/SmolVLM2-2.2B-Instruct
7
+
8
+ # Cache for local VLM (only used with --local-vision CLI flag)
9
+ HF_HOME=./cache
10
+
11
+ # Hugging Face token for accessing datasets/models
12
+ HF_TOKEN=
13
+
14
+ # To select a specific profile with custom instructions and tools, to be placed in profiles/<myprofile>/__init__.py
15
+ REACHY_MINI_CUSTOM_PROFILE="example"
.gitattributes CHANGED
@@ -1,35 +1,44 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
1
+ # Macro for all binary files that should use Git LFS.
2
+ [attr]lfs -text filter=lfs diff=lfs merge=lfs
3
+
4
+ # Image
5
+ *.jpg lfs
6
+ *.jpeg lfs
7
+ *.png lfs
8
+ *.apng lfs
9
+ *.atsc lfs
10
+ *.gif lfs
11
+ *.bmp lfs
12
+ *.exr lfs
13
+ *.tga lfs
14
+ *.tiff lfs
15
+ *.tif lfs
16
+ *.iff lfs
17
+ *.pict lfs
18
+ *.dds lfs
19
+ *.xcf lfs
20
+ *.leo lfs
21
+ *.kra lfs
22
+ *.kpp lfs
23
+ *.clip lfs
24
+ *.webm lfs
25
+ *.webp lfs
26
+ *.svg lfs
27
+ *.svgz lfs
28
+ *.psd lfs
29
+ *.afphoto lfs
30
+ *.afdesign lfs
31
+ # Models
32
+ *.pth lfs
33
+ # Binaries
34
+ *.bin lfs
35
+ *.pkl lfs
36
+ *.pckl lfs
37
+ # 3D
38
+ *.ply lfs
39
+ *.vis lfs
40
+ *.db lfs
41
+ *.ply lfs
42
+ docs/assets/reachy_mini_dance.gif filter=lfs diff=lfs merge=lfs -text
43
+ src/reachy_mini_conversation_app/images/reachymini_avatar.png filter=lfs diff=lfs merge=lfs -text
44
+ src/reachy_mini_conversation_app/images/user_avatar.png filter=lfs diff=lfs merge=lfs -text
.github/pull_request_template.md ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Summary
2
+ <!-- What does this PR change and why? -->
3
+
4
+ ## Category
5
+ - [ ] Fix
6
+ - [ ] Feature
7
+ - [ ] Refactor
8
+ - [ ] Docs
9
+ - [ ] CI/CD
10
+ - [ ] Other
11
+
12
+ ## Check before merging
13
+ ### Basic
14
+ - [ ] CI green (Ruff, Tests, Mypy)
15
+ - [ ] Code update is clear (types, docs, comments)
16
+
17
+ ### Run modes
18
+ - [ ] Headless mode (default)
19
+ - [ ] Gradio UI (`--gradio`)
20
+ - [ ] Everything is tested in simulation as well (`--gradio` required)
21
+
22
+ ### Vision / motion
23
+ - [ ] Local vision (`--local-vision`)
24
+ - [ ] YOLO or MediaPipe head tracker (`--head-tracker {yolo,mediapipe}`)
25
+ - [ ] Camera pipeline (with/without `--no-camera`)
26
+ - [ ] Movement manager (dances, emotions, head motion)
27
+ - [ ] Head wobble
28
+ - [ ] Profiles or custom tools
29
+
30
+ ### Dependencies & config
31
+ - [ ] Updated `pyproject.toml` if deps/extras changed
32
+ - [ ] Regenerated `uv.lock` if deps changed
33
+ - [ ] Updated `.env.example` if new config vars added
34
+
35
+ ## Notes
36
+ <!-- Optional: context, caveats, migration notes -->
.github/workflows/lint.yml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Ruff
2
+ on: [ push, pull_request ]
3
+ jobs:
4
+ ruff:
5
+ runs-on: ubuntu-latest
6
+ steps:
7
+ - uses: actions/checkout@v4
8
+ - uses: astral-sh/ruff-action@v3
9
+ with:
10
+ version: "0.12.0"
.github/workflows/tests.yml ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Tests
2
+ on:
3
+ push:
4
+ pull_request:
5
+
6
+ permissions:
7
+ contents: read
8
+ actions: write
9
+
10
+ concurrency:
11
+ group: ${{ github.workflow }}-${{ github.ref }}
12
+ cancel-in-progress: true
13
+
14
+ jobs:
15
+ tests:
16
+ name: pytest (py${{ matrix.python-version }})
17
+ runs-on: ubuntu-latest
18
+ timeout-minutes: 15
19
+ strategy:
20
+ fail-fast: false
21
+ matrix:
22
+ python-version: ["3.12"]
23
+
24
+ env:
25
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
26
+ HF_HUB_ETAG_TIMEOUT: "120"
27
+ HF_HUB_DOWNLOAD_TIMEOUT: "120"
28
+
29
+ steps:
30
+ - uses: actions/checkout@v4
31
+
32
+ - uses: actions/setup-python@v5
33
+ with:
34
+ python-version: ${{ matrix.python-version }}
35
+
36
+ - uses: astral-sh/setup-uv@v5
37
+
38
+ - name: Set HF_HOME
39
+ shell: bash
40
+ run: |
41
+ echo "HF_HOME=${RUNNER_TEMP}/.hf" >> "$GITHUB_ENV"
42
+ mkdir -p "${RUNNER_TEMP}/.hf"
43
+
44
+ - name: Cache Hugging Face hub
45
+ uses: actions/cache@v4
46
+ with:
47
+ path: ${{ runner.temp }}/.hf
48
+ key: hf-${{ runner.os }}-${{ hashFiles('uv.lock', 'pyproject.toml') }}
49
+ restore-keys: hf-${{ runner.os }}-
50
+
51
+ # test-only .env file
52
+ - name: Create test .env
53
+ run: |
54
+ printf "OPENAI_API_KEY=test-dummy\n" > .env
55
+
56
+ - name: Install (locked)
57
+ run: |
58
+ uv sync --frozen --group dev --extra all_vision
59
+
60
+ # Prefetch HF dataset to avoid download during test collection
61
+ - name: Prefetch HF dataset
62
+ run: |
63
+ .venv/bin/python - <<'PY'
64
+ from huggingface_hub import snapshot_download
65
+ snapshot_download(
66
+ repo_id="pollen-robotics/reachy-mini-emotions-library",
67
+ repo_type="dataset",
68
+ etag_timeout=120,
69
+ max_workers=4,
70
+ )
71
+ PY
72
+
73
+ - name: Run tests
74
+ run: .venv/bin/pytest -q
.github/workflows/typecheck.yml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Type check
2
+
3
+ on: [push, pull_request]
4
+
5
+ permissions:
6
+ contents: read
7
+
8
+ concurrency:
9
+ group: ${{ github.workflow }}-${{ github.ref }}
10
+ cancel-in-progress: true
11
+
12
+ jobs:
13
+ mypy:
14
+ runs-on: ubuntu-latest
15
+ timeout-minutes: 10
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - uses: actions/setup-python@v5
20
+ with:
21
+ python-version: "3.12"
22
+
23
+ - uses: astral-sh/setup-uv@v5
24
+
25
+ - name: Install deps (locked) incl. vision extras
26
+ run: uv sync --frozen --group dev --extra all_vision
27
+
28
+ - name: Run mypy
29
+ run: .venv/bin/mypy --pretty --show-error-codes .
.gitignore ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+
7
+ # Virtual environments
8
+ .venv/
9
+ venv/
10
+ ENV/
11
+ env/
12
+
13
+ # Environment variables
14
+ .env
15
+
16
+ # Build and distribution
17
+ build/
18
+ dist/
19
+ *.egg-info/
20
+ .eggs/
21
+
22
+ # Testing
23
+ .pytest_cache/
24
+ .coverage
25
+ .hypothesis/
26
+ htmlcov/
27
+ coverage.xml
28
+ *.cover
29
+
30
+ # Linting and formatting
31
+ .ruff_cache/
32
+ .mypy_cache/
33
+
34
+ # IDE
35
+ .vscode/
36
+ .idea/
37
+ *.swp
38
+ *.swo
39
+
40
+ # Security
41
+ *.key
42
+ *.pem
43
+ *.crt
44
+ *.csr
45
+
46
+ # Temporary files
47
+ tmp/
48
+ *.log
49
+ cache/
50
+
51
+ # macOS
52
+ .DS_Store
53
+
54
+ # Linux
55
+ *~
56
+ .directory
57
+ .Trash-*
58
+ .nfs*
59
+
60
+ # User-created personalities (managed by UI)
61
+ src/reachy_mini_conversation_app/profiles/user_personalities/
LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
README.md CHANGED
@@ -1,12 +1,259 @@
1
  ---
2
- title: Reachy Vibe Coder
3
- emoji: 👀
4
  colorFrom: blue
5
  colorTo: purple
6
  sdk: gradio
7
- sdk_version: 6.2.0
8
  app_file: app.py
9
  pinned: false
 
 
 
 
 
 
 
 
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Reachy the Vibe Coder
3
+ emoji: 🎨
4
  colorFrom: blue
5
  colorTo: purple
6
  sdk: gradio
7
+ sdk_version: "5.9.1"
8
  app_file: app.py
9
  pinned: false
10
+ short_description: Voice-activated coding with Reachy Mini!
11
+ license: apache-2.0
12
+ tags:
13
+ - reachy_mini
14
+ - reachy_mini_python_app
15
+ - vibe-coding
16
+ - cursor
17
+ - robotics
18
+ - voice-assistant
19
  ---
20
 
21
+ # 🎨 Reachy the Vibe Coder
22
+
23
+ **Voice-activated coding with Reachy Mini!** Tell Reachy what to build, and watch it come to life in Cursor IDE.
24
+
25
+ > "Reachy, build me a website" → Reachy decides the colors, layout, animations, everything, and codes it for you!
26
+
27
+ Conversational app for the Reachy Mini robot combining OpenAI's realtime APIs, vision pipelines, choreographed motion libraries, and **Cursor IDE integration for vibe coding**.
28
+
29
+ ![Reachy Mini Dance](docs/assets/reachy_mini_dance.gif)
30
+
31
+ ## Architecture
32
+
33
+ The app follows a layered architecture connecting the user, AI services, and robot hardware:
34
+
35
+ <p align="center">
36
+ <img src="docs/assets/conversation_app_arch.svg" alt="Architecture Diagram" width="600"/>
37
+ </p>
38
+
39
+ ## 🚀 Vibe Coding Features
40
+
41
+ | Tool | Description |
42
+ |------|-------------|
43
+ | `vibe_code` | 🎨 Tell Reachy to build something - it decides ALL the details (tech stack, colors, layout, animations) and sends a detailed prompt to Cursor! |
44
+ | `vibe_big_project` | 🚀 For epic builds - entire apps, refactors, multi-file projects using Cursor's Agent mode |
45
+
46
+ **How it works:**
47
+ 1. Say "Reachy, build me a website"
48
+ 2. Reachy becomes your **creative director** - deciding colors, fonts, layout, tech stack
49
+ 3. Opens Cursor IDE with a new window
50
+ 4. Types a detailed prompt and submits it
51
+ 5. Cursor's AI starts coding!
52
+
53
+ ## Overview
54
+ - Real-time audio conversation loop powered by the OpenAI realtime API and `fastrtc` for low-latency streaming.
55
+ - Vision processing uses gpt-realtime by default (when camera tool is used), with optional local vision processing using SmolVLM2 model running on-device (CPU/GPU/MPS) via `--local-vision` flag.
56
+ - Layered motion system queues primary moves (dances, emotions, goto poses, breathing) while blending speech-reactive wobble and face-tracking.
57
+ - Async tool dispatch integrates robot motion, camera capture, and optional face-tracking capabilities through a Gradio web UI with live transcripts.
58
+
59
+ ## Installation
60
+
61
+ > [!IMPORTANT]
62
+ > Before using this app, you need to install [Reachy Mini's SDK](https://github.com/pollen-robotics/reachy_mini/).<br>
63
+ > Windows support is currently experimental and has not been extensively tested. Use with caution.
64
+
65
+ ### Using uv
66
+ You can set up the project quickly using [uv](https://docs.astral.sh/uv/):
67
+
68
+ ```bash
69
+ uv venv --python 3.12.1 # Create a virtual environment with Python 3.12.1
70
+ source .venv/bin/activate
71
+ uv sync
72
+ ```
73
+
74
+ > [!NOTE]
75
+ > To reproduce the exact dependency set from this repo's `uv.lock`, run `uv sync` with `--locked` (or `--frozen`). This ensures `uv` installs directly from the lockfile without re-resolving or updating any versions.
76
+
77
+ To include optional dependencies:
78
+ ```
79
+ uv sync --extra reachy_mini_wireless # For wireless Reachy Mini with GStreamer support
80
+ uv sync --extra local_vision # For local PyTorch/Transformers vision
81
+ uv sync --extra yolo_vision # For YOLO-based vision
82
+ uv sync --extra mediapipe_vision # For MediaPipe-based vision
83
+ uv sync --extra all_vision # For all vision features
84
+ ```
85
+
86
+ You can combine extras or include dev dependencies:
87
+ ```
88
+ uv sync --extra all_vision --group dev
89
+ ```
90
+
91
+ ### Using pip
92
+
93
+ ```bash
94
+ python -m venv .venv # Create a virtual environment
95
+ source .venv/bin/activate
96
+ pip install -e .
97
+ ```
98
+
99
+ Install optional extras depending on the feature set you need:
100
+
101
+ ```bash
102
+ # Wireless Reachy Mini support
103
+ pip install -e .[reachy_mini_wireless]
104
+
105
+ # Vision stacks (choose at least one if you plan to run face tracking)
106
+ pip install -e .[local_vision]
107
+ pip install -e .[yolo_vision]
108
+ pip install -e .[mediapipe_vision]
109
+ pip install -e .[all_vision] # installs every vision extra
110
+
111
+ # Tooling for development workflows
112
+ pip install -e .[dev]
113
+ ```
114
+
115
+ Some wheels (e.g. PyTorch) are large and require compatible CUDA or CPU builds—make sure your platform matches the binaries pulled in by each extra.
116
+
117
+ ## Optional dependency groups
118
+
119
+ | Extra | Purpose | Notes |
120
+ |-------|---------|-------|
121
+ | `reachy_mini_wireless` | Wireless Reachy Mini with GStreamer support. | Required for wireless versions of Reachy Mini, includes GStreamer dependencies.
122
+ | `local_vision` | Run the local VLM (SmolVLM2) through PyTorch/Transformers. | GPU recommended; ensure compatible PyTorch builds for your platform.
123
+ | `yolo_vision` | YOLOv8 tracking via `ultralytics` and `supervision`. | CPU friendly; supports the `--head-tracker yolo` option.
124
+ | `mediapipe_vision` | Lightweight landmark tracking with MediaPipe. | Works on CPU; enables `--head-tracker mediapipe`.
125
+ | `all_vision` | Convenience alias installing every vision extra. | Install when you want the flexibility to experiment with every provider.
126
+ | `dev` | Developer tooling (`pytest`, `ruff`). | Add on top of either base or `all_vision` environments.
127
+
128
+ ## Configuration
129
+
130
+ 1. Copy `.env.example` to `.env`.
131
+ 2. Fill in the required values, notably the OpenAI API key.
132
+
133
+ | Variable | Description |
134
+ |----------|-------------|
135
+ | `OPENAI_API_KEY` | Required. Grants access to the OpenAI realtime endpoint.
136
+ | `MODEL_NAME` | Override the realtime model (defaults to `gpt-realtime`). Used for both conversation and vision (unless `--local-vision` flag is used).
137
+ | `HF_HOME` | Cache directory for local Hugging Face downloads (only used with `--local-vision` flag, defaults to `./cache`).
138
+ | `HF_TOKEN` | Optional token for Hugging Face models (only used with `--local-vision` flag, falls back to `huggingface-cli login`).
139
+ | `LOCAL_VISION_MODEL` | Hugging Face model path for local vision processing (only used with `--local-vision` flag, defaults to `HuggingFaceTB/SmolVLM2-2.2B-Instruct`).
140
+
141
+ ## Running the app
142
+
143
+ Activate your virtual environment, ensure the Reachy Mini robot (or simulator) is reachable, then launch:
144
+
145
+ ```bash
146
+ reachy-mini-conversation-app
147
+ ```
148
+
149
+ By default, the app runs in console mode for direct audio interaction. Use the `--gradio` flag to launch a web UI served locally at http://127.0.0.1:7860/ (required when running in simulation mode). With a camera attached, vision is handled by the gpt-realtime model when the camera tool is used. For local vision processing, use the `--local-vision` flag to process frames periodically using the SmolVLM2 model. Additionally, you can enable face tracking via YOLO or MediaPipe pipelines depending on the extras you installed.
150
+
151
+ ### CLI options
152
+
153
+ | Option | Default | Description |
154
+ |--------|---------|-------------|
155
+ | `--head-tracker {yolo,mediapipe}` | `None` | Select a face-tracking backend when a camera is available. YOLO is implemented locally, MediaPipe comes from the `reachy_mini_toolbox` package. Requires the matching optional extra. |
156
+ | `--no-camera` | `False` | Run without camera capture or face tracking. |
157
+ | `--local-vision` | `False` | Use local vision model (SmolVLM2) for periodic image processing instead of gpt-realtime vision. Requires `local_vision` extra to be installed. |
158
+ | `--gradio` | `False` | Launch the Gradio web UI. Without this flag, runs in console mode. Required when running in simulation mode. |
159
+ | `--debug` | `False` | Enable verbose logging for troubleshooting. |
160
+ | `--wireless-version` | `False` | Use GStreamer backend for wireless version of the robot. Requires `reachy_mini_wireless` extra to be installed.
161
+
162
+
163
+ ### Examples
164
+ - Run on hardware with MediaPipe face tracking:
165
+
166
+ ```bash
167
+ reachy-mini-conversation-app --head-tracker mediapipe
168
+ ```
169
+
170
+ - Run with local vision processing (requires `local_vision` extra):
171
+
172
+ ```bash
173
+ reachy-mini-conversation-app --local-vision
174
+ ```
175
+
176
+ - Run with wireless support (requires `reachy_mini_wireless` extra and daemon started with `--wireless-version`):
177
+
178
+ ```bash
179
+ reachy-mini-conversation-app --wireless-version
180
+ ```
181
+
182
+ - Disable the camera pipeline (audio-only conversation):
183
+
184
+ ```bash
185
+ reachy-mini-conversation-app --no-camera
186
+ ```
187
+
188
+ ### Troubleshooting
189
+
190
+ - Timeout error:
191
+ If you get an error like this:
192
+ ```bash
193
+ TimeoutError: Timeout while waiting for connection with the server.
194
+ ```
195
+ It probably means that the Reachy Mini's daemon isn't running. Install [Reachy Mini's SDK](https://github.com/pollen-robotics/reachy_mini/) and start the daemon.
196
+
197
+ ## LLM tools exposed to the assistant
198
+
199
+ | Tool | Action | Dependencies |
200
+ |------|--------|--------------|
201
+ | `move_head` | Queue a head pose change (left/right/up/down/front). | Core install only. |
202
+ | `camera` | Capture the latest camera frame and send it to gpt-realtime for vision analysis. | Requires camera worker; uses gpt-realtime vision by default. |
203
+ | `head_tracking` | Enable or disable face-tracking offsets (not facial recognition - only detects and tracks face position). | Camera worker with configured head tracker. |
204
+ | `dance` | Queue a dance from `reachy_mini_dances_library`. | Core install only. |
205
+ | `stop_dance` | Clear queued dances. | Core install only. |
206
+ | `play_emotion` | Play a recorded emotion clip via Hugging Face assets. | Needs `HF_TOKEN` for the recorded emotions dataset. |
207
+ | `stop_emotion` | Clear queued emotions. | Core install only. |
208
+ | `do_nothing` | Explicitly remain idle. | Core install only. |
209
+
210
+ ## Using custom profiles
211
+ Create custom profiles with dedicated instructions and enabled tools!
212
+
213
+ Set `REACHY_MINI_CUSTOM_PROFILE=<name>` to load `src/reachy_mini_conversation_app/profiles/<name>/` (see `.env.example`). If unset, the `default` profile is used.
214
+
215
+ Each profile requires two files: `instructions.txt` (prompt text) and `tools.txt` (list of allowed tools), and optionally contains custom tools implementations.
216
+
217
+ ### Custom instructions
218
+ Write plain-text prompts in `instructions.txt`. To reuse shared prompt pieces, add lines like:
219
+ ```
220
+ [passion_for_lobster_jokes]
221
+ [identities/witty_identity]
222
+ ```
223
+ Each placeholder pulls the matching file under `src/reachy_mini_conversation_app/prompts/` (nested paths allowed). See `src/reachy_mini_conversation_app/profiles/example/` for a reference layout.
224
+
225
+ ### Enabling tools
226
+ List enabled tools in `tools.txt`, one per line; prefix with `#` to comment out. For example:
227
+
228
+ ```
229
+ play_emotion
230
+ # move_head
231
+
232
+ # My custom tool defined locally
233
+ sweep_look
234
+ ```
235
+ Tools are resolved first from Python files in the profile folder (custom tools), then from the shared library `src/reachy_mini_conversation_app/tools/` (e.g., `dance`, `head_tracking`).
236
+
237
+ ### Custom tools
238
+ On top of built-in tools found in the shared library, you can implement custom tools specific to your profile by adding Python files in the profile folder.
239
+ Custom tools must subclass `reachy_mini_conversation_app.tools.core_tools.Tool` (see `profiles/example/sweep_look.py`).
240
+
241
+ ### Edit personalities from the UI
242
+ When running with `--gradio`, open the “Personality” accordion:
243
+ - Select among available profiles (folders under `src/reachy_mini_conversation_app/profiles/`) or the built‑in default.
244
+ - Click “Apply” to update the current session instructions live.
245
+ - Create a new personality by entering a name and instructions text; it stores files under `profiles/<name>/` and copies `tools.txt` from the `default` profile.
246
+
247
+ Note: The “Personality” panel updates the conversation instructions. Tool sets are loaded at startup from `tools.txt` and are not hot‑reloaded.
248
+
249
+
250
+
251
+
252
+ ## Development workflow
253
+ - Install the dev group extras: `uv sync --group dev` or `pip install -e .[dev]`.
254
+ - Run formatting and linting: `ruff check .`.
255
+ - Execute the test suite: `pytest`.
256
+ - When iterating on robot motions, keep the control loop responsive => offload blocking work using the helpers in `tools.py`.
257
+
258
+ ## License
259
+ Apache 2.0
app.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Hugging Face Spaces entry point for Reachy Mini Conversation App.
2
+
3
+ This Space serves as documentation and installation hub for the Reachy Mini
4
+ Conversation App - a voice-activated coding assistant for the Reachy Mini robot.
5
+ """
6
+
7
+ import gradio as gr
8
+
9
+
10
+ DESCRIPTION = """
11
+ # 🎨 Reachy the Vibe Coder
12
+
13
+ **Voice-activated coding with Reachy Mini!** Tell Reachy what to build, and watch it come to life in Cursor IDE.
14
+
15
+ > "Reachy, build me a website" → Reachy decides the colors, layout, animations, everything, and codes it for you!
16
+
17
+ ## 🚀 Quick Install
18
+
19
+ ```bash
20
+ pip install reachy-mini-conversation-app
21
+ ```
22
+
23
+ Or install from this Space:
24
+
25
+ ```bash
26
+ pip install git+https://huggingface.co/spaces/robertkeus/reachy-vibe-coder
27
+ ```
28
+
29
+ ## 📋 Requirements
30
+
31
+ - Python 3.10+
32
+ - [Reachy Mini SDK](https://github.com/pollen-robotics/reachy_mini/) installed
33
+ - OpenAI API key (for realtime conversation)
34
+ - Reachy Mini robot (hardware or simulator)
35
+
36
+ ## 🎮 Running the App
37
+
38
+ ```bash
39
+ # Activate your environment
40
+ source .venv/bin/activate
41
+
42
+ # Run with Gradio UI
43
+ reachy-mini-conversation-app --gradio
44
+
45
+ # Run with face tracking
46
+ reachy-mini-conversation-app --head-tracker mediapipe
47
+
48
+ # Audio only (no camera)
49
+ reachy-mini-conversation-app --no-camera
50
+ ```
51
+
52
+ ## ✨ Features
53
+
54
+ | Tool | Description |
55
+ |------|-------------|
56
+ | `vibe_code` | 🎨 Tell Reachy to build something - it decides ALL the details! |
57
+ | `vibe_big_project` | 🚀 For epic builds using Cursor's Agent mode |
58
+ | `dance` | 💃 Queue choreographed dances |
59
+ | `play_emotion` | 😊 Play recorded emotion clips |
60
+ | `camera` | 📷 Capture and analyze camera frames |
61
+ | `head_tracking` | 👀 Enable/disable face tracking |
62
+
63
+ ## 🏗️ Architecture
64
+
65
+ The app combines:
66
+ - **OpenAI Realtime API** for voice conversation
67
+ - **FastRTC** for low-latency audio streaming
68
+ - **Gradio** for the web interface
69
+ - **Reachy Mini SDK** for robot control
70
+ - **Cursor IDE integration** for vibe coding
71
+
72
+ ---
73
+
74
+ *This Space provides installation and documentation. The actual app runs locally with your Reachy Mini robot.*
75
+ """
76
+
77
+ INSTALL_INSTRUCTIONS = """
78
+ ## 📦 Installation Methods
79
+
80
+ ### Using uv (recommended)
81
+
82
+ ```bash
83
+ git clone https://huggingface.co/spaces/robertkeus/reachy-vibe-coder
84
+ cd reachy-mini-conversation-app
85
+
86
+ uv venv --python 3.12.1
87
+ source .venv/bin/activate
88
+ uv sync
89
+ ```
90
+
91
+ ### Using pip
92
+
93
+ ```bash
94
+ git clone https://huggingface.co/spaces/robertkeus/reachy-vibe-coder
95
+ cd reachy-mini-conversation-app
96
+
97
+ python -m venv .venv
98
+ source .venv/bin/activate
99
+ pip install -e .
100
+ ```
101
+
102
+ ### Optional Dependencies
103
+
104
+ ```bash
105
+ # Wireless Reachy Mini support
106
+ pip install -e .[reachy_mini_wireless]
107
+
108
+ # Vision options
109
+ pip install -e .[local_vision] # PyTorch/Transformers
110
+ pip install -e .[yolo_vision] # YOLO tracking
111
+ pip install -e .[mediapipe_vision] # MediaPipe
112
+ pip install -e .[all_vision] # Everything
113
+ ```
114
+
115
+ ## ⚙️ Configuration
116
+
117
+ 1. Copy `.env.example` to `.env`
118
+ 2. Add your OpenAI API key:
119
+
120
+ ```env
121
+ OPENAI_API_KEY=your-key-here
122
+ ```
123
+
124
+ ## 🔧 Troubleshooting
125
+
126
+ **Timeout error?**
127
+ Make sure the Reachy Mini daemon is running:
128
+ ```bash
129
+ # Install and start the SDK first
130
+ # See: https://github.com/pollen-robotics/reachy_mini/
131
+ ```
132
+ """
133
+
134
+
135
+ def create_demo():
136
+ """Create the Gradio demo interface."""
137
+ with gr.Blocks(
138
+ title="Reachy the Vibe Coder",
139
+ theme=gr.themes.Soft(
140
+ primary_hue="blue",
141
+ secondary_hue="purple",
142
+ ),
143
+ css="""
144
+ .main-header {
145
+ background: linear-gradient(135deg, #00d4aa 0%, #7c3aed 100%);
146
+ -webkit-background-clip: text;
147
+ -webkit-text-fill-color: transparent;
148
+ background-clip: text;
149
+ }
150
+ .install-box {
151
+ background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%);
152
+ border-radius: 12px;
153
+ padding: 20px;
154
+ }
155
+ footer {
156
+ visibility: hidden;
157
+ }
158
+ """
159
+ ) as demo:
160
+ gr.Markdown(DESCRIPTION)
161
+
162
+ with gr.Accordion("📦 Detailed Installation Guide", open=False):
163
+ gr.Markdown(INSTALL_INSTRUCTIONS)
164
+
165
+ with gr.Accordion("🎬 Demo Video", open=False):
166
+ gr.Markdown("""
167
+ *Coming soon: Video demonstration of Reachy the Vibe Coder in action!*
168
+
169
+ ![Reachy Mini Dance](https://raw.githubusercontent.com/pollen-robotics/reachy_mini/main/docs/assets/reachy_mini_dance.gif)
170
+ """)
171
+
172
+ with gr.Row():
173
+ gr.Markdown("""
174
+ ### 🔗 Links
175
+
176
+ - [Reachy Mini SDK](https://github.com/pollen-robotics/reachy_mini/)
177
+ - [Pollen Robotics](https://www.pollen-robotics.com/)
178
+ """)
179
+
180
+ gr.Markdown("""
181
+ ### 📄 License
182
+
183
+ Apache 2.0
184
+
185
+ Made with ❤️ by Robert Keus
186
+ """)
187
+
188
+ return demo
189
+
190
+
191
+ if __name__ == "__main__":
192
+ demo = create_demo()
193
+ demo.launch()
194
+
deploy_to_reachy.sh ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # Deploy Reachy the Vibe Coder to your real Reachy Mini
3
+ # Usage: ./deploy_to_reachy.sh
4
+
5
+ REACHY_IP="${1:-192.168.1.35}"
6
+ REACHY_USER="reachy"
7
+
8
+ echo "🎨 Deploying Reachy the Vibe Coder to $REACHY_IP..."
9
+ echo ""
10
+
11
+ # Create a temporary package
12
+ echo "📦 Building package..."
13
+ cd "$(dirname "$0")"
14
+ pip install build -q
15
+ python -m build --wheel -q
16
+
17
+ # Find the wheel file
18
+ WHEEL=$(ls -t dist/*.whl | head -1)
19
+ echo "✅ Built: $WHEEL"
20
+
21
+ # Copy to Reachy
22
+ echo ""
23
+ echo "📤 Copying to Reachy (you'll need to enter the password)..."
24
+ scp "$WHEEL" "${REACHY_USER}@${REACHY_IP}:/tmp/"
25
+
26
+ # Install on Reachy
27
+ echo ""
28
+ echo "🔧 Installing on Reachy..."
29
+ WHEEL_NAME=$(basename "$WHEEL")
30
+ ssh "${REACHY_USER}@${REACHY_IP}" << EOF
31
+ cd /tmp
32
+ pip install --upgrade "$WHEEL_NAME"
33
+ echo ""
34
+ echo "✅ Installed! Restarting the conversation app..."
35
+ # Find and restart the app if it's running
36
+ pkill -f reachy_mini_conversation_app 2>/dev/null || true
37
+ echo "🎉 Deployment complete! Go to http://${REACHY_IP}:8000 and turn on the Conversation App"
38
+ EOF
39
+
40
+ echo ""
41
+ echo "🚀 Done! Reachy the Vibe Coder is ready!"
42
+
docs/assets/conversation_app_arch.svg ADDED
docs/assets/reachy_mini_dance.gif ADDED

Git LFS Details

  • SHA256: 75914c3cb7af982e0b1c6369e25fc46d8c08a0ab5ad022240ae9c1a0d93967c3
  • Pointer size: 132 Bytes
  • Size of remote file: 3.93 MB
docs/scheme.mmd ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ config:
3
+ layout: dagre
4
+ flowchart:
5
+ htmlLabels: true
6
+ ---
7
+ flowchart TB
8
+ User(["<span style='font-size:16px;font-weight:bold;'>User</span><br><span style='font-size:13px;color:#01579b;'>Person interacting with system</span>"])
9
+ -- audio stream -->
10
+ UI@{ label: "<span style='font-size:16px;font-weight:bold;'>UI Layer</span><br><span style='font-size:13px;color:#0277bd;'>Gradio/Console</span>" }
11
+
12
+ UI -- audio stream -->
13
+ OpenAI@{ label: "<span style='font-size:17px;font-weight:bold;'>gpt-realtime API</span><br><span style='font-size:13px; color:#7b1fa2;'>Audio+Tool Calls+Vision</span>" }
14
+
15
+ OpenAI -- audio stream -->
16
+ Motion@{ label: "<span style='font-size:16px;font-weight:bold;'>Motion Control</span><br><span style='font-size:13px;color:#f57f17;'>Audio Sync + Tracking</span>" }
17
+
18
+ OpenAI -- tool calls -->
19
+ Handlers@{ label: "<span style='font-size:16px;font-weight:bold;'>Tool Handlers</span><br><span style='font-size:12px;color:#f9a825;'>move_head, camera, head_tracking,<br/>dance, play_emotion, do_nothing</span>" }
20
+
21
+ Handlers -- movement
22
+ requests --> Motion
23
+
24
+ Handlers -- camera frames, face tracking -->
25
+ Camera@{ label: "<span style='font-size:16px;font-weight:bold;'>Camera Worker</span><br><span style='font-size:13px;color:#f57f17;'>Frame Buffer + Face Tracking</span>" }
26
+
27
+ Handlers -. image for
28
+ analysis .-> OpenAI
29
+
30
+ Camera -- face tracking --> Motion
31
+
32
+ Camera -. frames .->
33
+ Vision@{ label: "<span style='font-size:16px;font-weight:bold;'>Vision Processor</span><br><span style='font-size:13px;color:#7b1fa2;'>Local VLM (optional)</span>" }
34
+
35
+ Vision -. description .-> Handlers
36
+
37
+ Robot@{ label: "<span style='font-size:16px;font-weight:bold;'>reachy_mini</span><br><span style='font-size:13px;color:#c62828;'>Robot Control Library</span>" }
38
+ -- camera
39
+ frames --> Camera
40
+
41
+ Motion -- commands --> Robot
42
+
43
+ Handlers -- results --> OpenAI
44
+
45
+ User:::userStyle
46
+ UI:::uiStyle
47
+ OpenAI:::aiStyle
48
+ Motion:::coreStyle
49
+ Handlers:::toolStyle
50
+ Camera:::coreStyle
51
+ Vision:::aiStyle
52
+ Robot:::hardwareStyle
53
+ classDef userStyle fill:#e1f5fe,stroke:#01579b,stroke-width:3px
54
+ classDef uiStyle fill:#b3e5fc,stroke:#0277bd,stroke-width:2px
55
+ classDef aiStyle fill:#e1bee7,stroke:#7b1fa2,stroke-width:3px
56
+ classDef coreStyle fill:#fff9c4,stroke:#f57f17,stroke-width:2px
57
+ classDef hardwareStyle fill:#ef9a9a,stroke:#c62828,stroke-width:3px
58
+ classDef toolStyle fill:#fffde7,stroke:#f9a825,stroke-width:1px
index.html ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!doctype html>
2
+ <html>
3
+
4
+ <head>
5
+ <meta charset="utf-8" />
6
+ <meta name="viewport" content="width=device-width, initial-scale=1" />
7
+ <title>Reachy Mini Conversation App</title>
8
+ <link rel="preconnect" href="https://fonts.googleapis.com">
9
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
10
+ <link href="https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@400;500;600;700&family=Manrope:wght@400;500;600&display=swap" rel="stylesheet">
11
+ <link rel="stylesheet" href="style.css" />
12
+ </head>
13
+
14
+ <body>
15
+ <header class="hero">
16
+ <div class="topline">
17
+ <div class="brand">
18
+ <span class="logo">🤖</span>
19
+ <span class="brand-name">Reachy Mini</span>
20
+ </div>
21
+ <div class="pill">Realtime voice · Vision aware · Expressive motion</div>
22
+ </div>
23
+ <div class="hero-grid">
24
+ <div class="hero-copy">
25
+ <p class="eyebrow">Conversation App</p>
26
+ <h1>Talk, see, and move together.</h1>
27
+ <p class="lede">
28
+ A friendly, camera-aware companion for Reachy Mini. Chat out loud, watch it follow faces, dance, or react with recorded emotions—all while streaming transcripts in a clean web UI.
29
+ </p>
30
+ <div class="hero-actions">
31
+ <a class="btn primary" href="#highlights">Explore features</a>
32
+ <a class="btn ghost" href="#story">See how it feels</a>
33
+ </div>
34
+ <div class="hero-badges">
35
+ <span>Low-latency voice loop</span>
36
+ <span>Camera insights on demand</span>
37
+ <span>Choreographed dances & emotions</span>
38
+ <span>Personality profiles via web UI</span>
39
+ </div>
40
+ </div>
41
+ <div class="hero-visual">
42
+ <div class="glass-card">
43
+ <img src="docs/assets/reachy_mini_dance.gif" alt="Reachy Mini dancing" class="hero-gif">
44
+ <p class="caption">Reachy Mini can move, dance, and emote while holding a natural conversation.</p>
45
+ </div>
46
+ </div>
47
+ </div>
48
+ </header>
49
+
50
+ <section id="highlights" class="section features">
51
+ <div class="section-header">
52
+ <p class="eyebrow">What’s inside</p>
53
+ <h2>All-in-one conversational layer for your robot</h2>
54
+ <p class="intro">
55
+ The app blends realtime speech, vision, and motion so Reachy Mini feels present..
56
+ </p>
57
+ </div>
58
+ <div class="feature-grid">
59
+ <div class="feature-card">
60
+ <span class="icon">🎤</span>
61
+ <h3>Natural voice chat</h3>
62
+ <p>Talk freely and get fast, high-quality replies powered by realtime models.</p>
63
+ </div>
64
+ <div class="feature-card">
65
+ <span class="icon">🎥</span>
66
+ <h3>Vision-aware replies</h3>
67
+ <p>Ask the camera tool to see what’s in front, track a face, or keep attention on whoever is speaking.</p>
68
+ </div>
69
+ <div class="feature-card">
70
+ <span class="icon">💃</span>
71
+ <h3>Expressive motion</h3>
72
+ <p>Queue dances, play recorded emotions while Reachy listens and talks.</p>
73
+ </div>
74
+ <div class="feature-card">
75
+ <span class="icon">🧠</span>
76
+ <h3>Personalities on demand</h3>
77
+ <p>Switch conversation styles through profiles and decide which tools (dance, camera, tracking) each persona can use.</p>
78
+ </div>
79
+ <div class="feature-card">
80
+ <span class="icon">🌐</span>
81
+ <h3>Ready for your setup</h3>
82
+ <p>Works with wired or wireless Reachy Mini, and can run vision locally or through the default cloud model.</p>
83
+ </div>
84
+ </div>
85
+ </section>
86
+
87
+ <section id="story" class="section story">
88
+ <div class="story-grid">
89
+ <div class="story-card">
90
+ <p class="eyebrow">How it feels</p>
91
+ <h3>From hello to helpful in seconds</h3>
92
+ <ul class="story-list">
93
+ <li><span>👋</span> Say “Hey Reachy” and start chatting—no extra setup in the moment.</li>
94
+ <li><span>👀</span> Ask what it sees; it can peek through the camera or keep focus on your face.</li>
95
+ <li><span>🎭</span> Trigger emotions or dance breaks to keep the conversation lively.</li>
96
+ <li><span>📝</span> Follow along with live transcripts in the web UI or run audio-only from the console.</li>
97
+ </ul>
98
+ </div>
99
+ <div class="story-card secondary">
100
+ <p class="eyebrow">Where it shines</p>
101
+ <h3>Great for demos, teaching, and playful exploration</h3>
102
+ <p class="story-text">
103
+ Show off how Reachy Mini listens, responds, and moves in sync. Whether you’re guiding a class, hosting a booth, or experimenting at home, the app keeps the robot expressive without juggling scripts or joystick controls.
104
+ </p>
105
+ <div class="chips">
106
+ <span class="chip">Live conversation</span>
107
+ <span class="chip">Face tracking</span>
108
+ <span class="chip">Camera tool</span>
109
+ <span class="chip">Dance library</span>
110
+ <span class="chip">Profiles & tools</span>
111
+ </div>
112
+ </div>
113
+ </div>
114
+ </section>
115
+
116
+ <footer class="footer">
117
+ <p>
118
+ Reachy Mini Conversation App by <a href="https://github.com/pollen-robotics" target="_blank" rel="noopener">Pollen Robotics</a>.
119
+ Explore more apps on <a href="https://huggingface.co/spaces/pollen-robotics/Reachy_Mini_Apps" target="_blank" rel="noopener">Hugging Face Spaces</a>.
120
+ </p>
121
+ </footer>
122
+
123
+ </body>
124
+
125
+ </html>
pyproject.toml ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["setuptools"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "reachy_mini_conversation_app"
7
+ version = "0.2.0" # Reachy the Vibe Coder edition!
8
+ authors = [{ name = "Pollen Robotics", email = "contact@pollen-robotics.com" }]
9
+ description = ""
10
+ readme = "README.md"
11
+ requires-python = ">=3.10"
12
+ dependencies = [
13
+ #Media
14
+ "aiortc>=1.13.0",
15
+ "fastrtc>=0.0.34",
16
+ "gradio==5.50.1.dev1",
17
+ "huggingface_hub>=0.34.4",
18
+ "opencv-python>=4.12.0.88",
19
+
20
+ #Environment variables
21
+ "python-dotenv",
22
+
23
+ #OpenAI
24
+ "openai>=2.1",
25
+
26
+ #Reachy mini
27
+ "reachy_mini_dances_library",
28
+ "reachy_mini_toolbox",
29
+ "reachy_mini>=1.2.3rc1",
30
+ "eclipse-zenoh~=1.7.0",
31
+ "gradio_client>=1.13.3",
32
+ ]
33
+
34
+ [project.optional-dependencies]
35
+ reachy_mini_wireless = [
36
+ "PyGObject>=3.42.2,<=3.46.0",
37
+ "gst-signalling>=1.1.2",
38
+ ]
39
+ mcp = ["mcp>=1.0.0", "httpx-sse>=0.4.0"]
40
+ local_vision = ["torch", "transformers", "num2words"]
41
+ yolo_vision = ["ultralytics", "supervision"]
42
+ mediapipe_vision = ["mediapipe==0.10.14"]
43
+ all_vision = [
44
+ "torch", "transformers", "num2words",
45
+ "ultralytics", "supervision",
46
+ "mediapipe==0.10.14",
47
+ ]
48
+
49
+ [dependency-groups]
50
+ dev = [
51
+ "pytest",
52
+ "pytest-asyncio",
53
+ "ruff==0.12.0",
54
+ "mypy==1.18.2",
55
+ "pre-commit",
56
+ "types-requests",
57
+ ]
58
+
59
+ [project.scripts]
60
+ reachy-mini-conversation-app = "reachy_mini_conversation_app.main:main"
61
+
62
+ [project.entry-points."reachy_mini_apps"]
63
+ reachy_mini_conversation_app = "reachy_mini_conversation_app.main:ReachyMiniConversationApp"
64
+
65
+ [tool.setuptools]
66
+ package-dir = { "" = "src" }
67
+ include-package-data = true
68
+
69
+ [tool.setuptools.packages.find]
70
+ where = ["src"]
71
+
72
+ [tool.setuptools.package-data]
73
+ reachy_mini_conversation_app = [
74
+ "images/*",
75
+ "static/*",
76
+ ".env.example",
77
+ "demos/**/*.txt",
78
+ "prompts_library/*.txt",
79
+ "profiles/**/*.txt",
80
+ "prompts/**/*.txt",
81
+ ]
82
+
83
+ [tool.ruff]
84
+ line-length = 119
85
+ exclude = [".venv", "dist", "build", "**/__pycache__", "*.egg-info", ".mypy_cache", ".pytest_cache"]
86
+
87
+ [tool.ruff.lint]
88
+ select = [
89
+ "E", # pycodestyle errors
90
+ "F", # pyflakes
91
+ "W", # pycodestyle warnings
92
+ "I", # isort
93
+ "C4", # flake8-comprehensions
94
+ "D", # pydocstyle
95
+ ]
96
+ ignore = [
97
+ "E501", # handled by formatter
98
+ "D100", # ignore missing module docstrings
99
+ "D203", # blank line before class docstring (conflicts with D211)
100
+ "D213", # summary on second line (conflicts with D212)
101
+ ]
102
+
103
+ [tool.ruff.lint.isort]
104
+ length-sort = true
105
+ lines-after-imports = 2
106
+ no-lines-before = ["standard-library", "local-folder"]
107
+ known-local-folder = ["reachy_mini_conversation_app"]
108
+ known-first-party = ["reachy_mini", "reachy_mini_dances_library", "reachy_mini_toolbox"]
109
+ split-on-trailing-comma = true
110
+
111
+ [tool.ruff.format]
112
+ quote-style = "double"
113
+ indent-style = "space"
114
+ skip-magic-trailing-comma = false
115
+ line-ending = "auto"
116
+
117
+ [tool.mypy]
118
+ python_version = "3.12"
119
+ files = ["src/"]
120
+ ignore_missing_imports = true
121
+ strict = true
122
+ show_error_codes = true
123
+ warn_unused_ignores = true
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # Hugging Face Spaces requirements
2
+ # For the documentation/installation Space UI only
3
+ gradio>=5.0.0
4
+
5
+ # Full app dependencies are in pyproject.toml
6
+ # Install with: pip install -e .
src/reachy_mini_conversation_app/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Nothing (for ruff)."""
src/reachy_mini_conversation_app/audio/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Nothing (for ruff)."""
src/reachy_mini_conversation_app/audio/head_wobbler.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Moves head given audio samples."""
2
+
3
+ import time
4
+ import queue
5
+ import base64
6
+ import logging
7
+ import threading
8
+ from typing import Tuple
9
+ from collections.abc import Callable
10
+
11
+ import numpy as np
12
+ from numpy.typing import NDArray
13
+
14
+ from reachy_mini_conversation_app.audio.speech_tapper import HOP_MS, SwayRollRT
15
+
16
+
17
+ SAMPLE_RATE = 24000
18
+ MOVEMENT_LATENCY_S = 0.08 # seconds between audio and robot movement
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class HeadWobbler:
23
+ """Converts audio deltas (base64) into head movement offsets."""
24
+
25
+ def __init__(self, set_speech_offsets: Callable[[Tuple[float, float, float, float, float, float]], None]) -> None:
26
+ """Initialize the head wobbler."""
27
+ self._apply_offsets = set_speech_offsets
28
+ self._base_ts: float | None = None
29
+ self._hops_done: int = 0
30
+
31
+ self.audio_queue: "queue.Queue[Tuple[int, int, NDArray[np.int16]]]" = queue.Queue()
32
+ self.sway = SwayRollRT()
33
+
34
+ # Synchronization primitives
35
+ self._state_lock = threading.Lock()
36
+ self._sway_lock = threading.Lock()
37
+ self._generation = 0
38
+
39
+ self._stop_event = threading.Event()
40
+ self._thread: threading.Thread | None = None
41
+
42
+ def feed(self, delta_b64: str) -> None:
43
+ """Thread-safe: push audio into the consumer queue."""
44
+ buf = np.frombuffer(base64.b64decode(delta_b64), dtype=np.int16).reshape(1, -1)
45
+ with self._state_lock:
46
+ generation = self._generation
47
+ self.audio_queue.put((generation, SAMPLE_RATE, buf))
48
+
49
+ def start(self) -> None:
50
+ """Start the head wobbler loop in a thread."""
51
+ self._stop_event.clear()
52
+ self._thread = threading.Thread(target=self.working_loop, daemon=True)
53
+ self._thread.start()
54
+ logger.debug("Head wobbler started")
55
+
56
+ def stop(self) -> None:
57
+ """Stop the head wobbler loop."""
58
+ self._stop_event.set()
59
+ if self._thread is not None:
60
+ self._thread.join()
61
+ logger.debug("Head wobbler stopped")
62
+
63
+ def working_loop(self) -> None:
64
+ """Convert audio deltas into head movement offsets."""
65
+ hop_dt = HOP_MS / 1000.0
66
+
67
+ logger.debug("Head wobbler thread started")
68
+ while not self._stop_event.is_set():
69
+ queue_ref = self.audio_queue
70
+ try:
71
+ chunk_generation, sr, chunk = queue_ref.get_nowait() # (gen, sr, data)
72
+ except queue.Empty:
73
+ # avoid while to never exit
74
+ time.sleep(MOVEMENT_LATENCY_S)
75
+ continue
76
+
77
+ try:
78
+ with self._state_lock:
79
+ current_generation = self._generation
80
+ if chunk_generation != current_generation:
81
+ continue
82
+
83
+ if self._base_ts is None:
84
+ with self._state_lock:
85
+ if self._base_ts is None:
86
+ self._base_ts = time.monotonic()
87
+
88
+ pcm = np.asarray(chunk).squeeze(0)
89
+ with self._sway_lock:
90
+ results = self.sway.feed(pcm, sr)
91
+
92
+ i = 0
93
+ while i < len(results):
94
+ with self._state_lock:
95
+ if self._generation != current_generation:
96
+ break
97
+ base_ts = self._base_ts
98
+ hops_done = self._hops_done
99
+
100
+ if base_ts is None:
101
+ base_ts = time.monotonic()
102
+ with self._state_lock:
103
+ if self._base_ts is None:
104
+ self._base_ts = base_ts
105
+ hops_done = self._hops_done
106
+
107
+ target = base_ts + MOVEMENT_LATENCY_S + hops_done * hop_dt
108
+ now = time.monotonic()
109
+
110
+ if now - target >= hop_dt:
111
+ lag_hops = int((now - target) / hop_dt)
112
+ drop = min(lag_hops, len(results) - i - 1)
113
+ if drop > 0:
114
+ with self._state_lock:
115
+ self._hops_done += drop
116
+ hops_done = self._hops_done
117
+ i += drop
118
+ continue
119
+
120
+ if target > now:
121
+ time.sleep(target - now)
122
+ with self._state_lock:
123
+ if self._generation != current_generation:
124
+ break
125
+
126
+ r = results[i]
127
+ offsets = (
128
+ r["x_mm"] / 1000.0,
129
+ r["y_mm"] / 1000.0,
130
+ r["z_mm"] / 1000.0,
131
+ r["roll_rad"],
132
+ r["pitch_rad"],
133
+ r["yaw_rad"],
134
+ )
135
+
136
+ with self._state_lock:
137
+ if self._generation != current_generation:
138
+ break
139
+
140
+ self._apply_offsets(offsets)
141
+
142
+ with self._state_lock:
143
+ self._hops_done += 1
144
+ i += 1
145
+ finally:
146
+ queue_ref.task_done()
147
+ logger.debug("Head wobbler thread exited")
148
+
149
+ '''
150
+ def drain_audio_queue(self) -> None:
151
+ """Empty the audio queue."""
152
+ try:
153
+ while True:
154
+ self.audio_queue.get_nowait()
155
+ except QueueEmpty:
156
+ pass
157
+ '''
158
+
159
+ def reset(self) -> None:
160
+ """Reset the internal state."""
161
+ with self._state_lock:
162
+ self._generation += 1
163
+ self._base_ts = None
164
+ self._hops_done = 0
165
+
166
+ # Drain any queued audio chunks from previous generations
167
+ drained_any = False
168
+ while True:
169
+ try:
170
+ _, _, _ = self.audio_queue.get_nowait()
171
+ except queue.Empty:
172
+ break
173
+ else:
174
+ drained_any = True
175
+ self.audio_queue.task_done()
176
+
177
+ with self._sway_lock:
178
+ self.sway.reset()
179
+
180
+ if drained_any:
181
+ logger.debug("Head wobbler queue drained during reset")
src/reachy_mini_conversation_app/audio/speech_tapper.py ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import math
3
+ from typing import Any, Dict, List
4
+ from itertools import islice
5
+ from collections import deque
6
+
7
+ import numpy as np
8
+ from numpy.typing import NDArray
9
+
10
+
11
+ # Tunables
12
+ SR = 16_000
13
+ FRAME_MS = 20
14
+ HOP_MS = 10
15
+
16
+ SWAY_MASTER = 1.5
17
+ SENS_DB_OFFSET = +4.0
18
+ VAD_DB_ON = -35.0
19
+ VAD_DB_OFF = -45.0
20
+ VAD_ATTACK_MS = 40
21
+ VAD_RELEASE_MS = 250
22
+ ENV_FOLLOW_GAIN = 0.65
23
+
24
+ SWAY_F_PITCH = 2.2
25
+ SWAY_A_PITCH_DEG = 4.5
26
+ SWAY_F_YAW = 0.6
27
+ SWAY_A_YAW_DEG = 7.5
28
+ SWAY_F_ROLL = 1.3
29
+ SWAY_A_ROLL_DEG = 2.25
30
+ SWAY_F_X = 0.35
31
+ SWAY_A_X_MM = 4.5
32
+ SWAY_F_Y = 0.45
33
+ SWAY_A_Y_MM = 3.75
34
+ SWAY_F_Z = 0.25
35
+ SWAY_A_Z_MM = 2.25
36
+
37
+ SWAY_DB_LOW = -46.0
38
+ SWAY_DB_HIGH = -18.0
39
+ LOUDNESS_GAMMA = 0.9
40
+ SWAY_ATTACK_MS = 50
41
+ SWAY_RELEASE_MS = 250
42
+
43
+ # Derived
44
+ FRAME = int(SR * FRAME_MS / 1000)
45
+ HOP = int(SR * HOP_MS / 1000)
46
+ ATTACK_FR = max(1, int(VAD_ATTACK_MS / HOP_MS))
47
+ RELEASE_FR = max(1, int(VAD_RELEASE_MS / HOP_MS))
48
+ SWAY_ATTACK_FR = max(1, int(SWAY_ATTACK_MS / HOP_MS))
49
+ SWAY_RELEASE_FR = max(1, int(SWAY_RELEASE_MS / HOP_MS))
50
+
51
+
52
+ def _rms_dbfs(x: NDArray[np.float32]) -> float:
53
+ """Root-mean-square in dBFS for float32 mono array in [-1,1]."""
54
+ # numerically stable rms (avoid overflow)
55
+ x = x.astype(np.float32, copy=False)
56
+ rms = np.sqrt(np.mean(x * x, dtype=np.float32) + 1e-12, dtype=np.float32)
57
+ return float(20.0 * math.log10(float(rms) + 1e-12))
58
+
59
+
60
+ def _loudness_gain(db: float, offset: float = SENS_DB_OFFSET) -> float:
61
+ """Normalize dB into [0,1] with gamma; clipped to [0,1]."""
62
+ t = (db + offset - SWAY_DB_LOW) / (SWAY_DB_HIGH - SWAY_DB_LOW)
63
+ if t < 0.0:
64
+ t = 0.0
65
+ elif t > 1.0:
66
+ t = 1.0
67
+ return t**LOUDNESS_GAMMA if LOUDNESS_GAMMA != 1.0 else t
68
+
69
+
70
+ def _to_float32_mono(x: NDArray[Any]) -> NDArray[np.float32]:
71
+ """Convert arbitrary PCM array to float32 mono in [-1,1].
72
+
73
+ Accepts shapes: (N,), (1,N), (N,1), (C,N), (N,C).
74
+ """
75
+ a = np.asarray(x)
76
+ if a.ndim == 0:
77
+ return np.zeros(0, dtype=np.float32)
78
+
79
+ # If 2D, decide which axis is channels (prefer small first dim)
80
+ if a.ndim == 2:
81
+ # e.g., (channels, samples) if channels is small (<=8)
82
+ if a.shape[0] <= 8 and a.shape[0] <= a.shape[1]:
83
+ a = np.mean(a, axis=0)
84
+ else:
85
+ a = np.mean(a, axis=1)
86
+ elif a.ndim > 2:
87
+ a = np.mean(a.reshape(a.shape[0], -1), axis=0)
88
+
89
+ # Now 1D, cast/scale
90
+ if np.issubdtype(a.dtype, np.floating):
91
+ return a.astype(np.float32, copy=False)
92
+ # integer PCM
93
+ info = np.iinfo(a.dtype)
94
+ scale = float(max(-info.min, info.max))
95
+ return a.astype(np.float32) / (scale if scale != 0.0 else 1.0)
96
+
97
+
98
+ def _resample_linear(x: NDArray[np.float32], sr_in: int, sr_out: int) -> NDArray[np.float32]:
99
+ """Lightweight linear resampler for short buffers."""
100
+ if sr_in == sr_out or x.size == 0:
101
+ return x
102
+ # guard tiny sizes
103
+ n_out = int(round(x.size * sr_out / sr_in))
104
+ if n_out <= 1:
105
+ return np.zeros(0, dtype=np.float32)
106
+ t_in = np.linspace(0.0, 1.0, num=x.size, dtype=np.float32, endpoint=True)
107
+ t_out = np.linspace(0.0, 1.0, num=n_out, dtype=np.float32, endpoint=True)
108
+ return np.interp(t_out, t_in, x).astype(np.float32, copy=False)
109
+
110
+
111
+ class SwayRollRT:
112
+ """Feed audio chunks → per-hop sway outputs.
113
+
114
+ Usage:
115
+ rt = SwayRollRT()
116
+ rt.feed(pcm_int16_or_float, sr) -> List[dict]
117
+ """
118
+
119
+ def __init__(self, rng_seed: int = 7):
120
+ """Initialize state."""
121
+ self._seed = int(rng_seed)
122
+ self.samples: deque[float] = deque(maxlen=10 * SR) # sliding window for VAD/env
123
+ self.carry: NDArray[np.float32] = np.zeros(0, dtype=np.float32)
124
+
125
+ self.vad_on = False
126
+ self.vad_above = 0
127
+ self.vad_below = 0
128
+
129
+ self.sway_env = 0.0
130
+ self.sway_up = 0
131
+ self.sway_down = 0
132
+
133
+ rng = np.random.default_rng(self._seed)
134
+ self.phase_pitch = float(rng.random() * 2 * math.pi)
135
+ self.phase_yaw = float(rng.random() * 2 * math.pi)
136
+ self.phase_roll = float(rng.random() * 2 * math.pi)
137
+ self.phase_x = float(rng.random() * 2 * math.pi)
138
+ self.phase_y = float(rng.random() * 2 * math.pi)
139
+ self.phase_z = float(rng.random() * 2 * math.pi)
140
+ self.t = 0.0
141
+
142
+ def reset(self) -> None:
143
+ """Reset state (VAD/env/buffers/time) but keep initial phases/seed."""
144
+ self.samples.clear()
145
+ self.carry = np.zeros(0, dtype=np.float32)
146
+ self.vad_on = False
147
+ self.vad_above = 0
148
+ self.vad_below = 0
149
+ self.sway_env = 0.0
150
+ self.sway_up = 0
151
+ self.sway_down = 0
152
+ self.t = 0.0
153
+
154
+ def feed(self, pcm: NDArray[Any], sr: int | None) -> List[Dict[str, float]]:
155
+ """Stream in PCM chunk. Returns a list of sway dicts, one per hop (HOP_MS).
156
+
157
+ Args:
158
+ pcm: np.ndarray, shape (N,) or (C,N)/(N,C); int or float.
159
+ sr: sample rate of `pcm` (None -> assume SR).
160
+
161
+ """
162
+ sr_in = SR if sr is None else int(sr)
163
+ x = _to_float32_mono(pcm)
164
+ if x.size == 0:
165
+ return []
166
+ if sr_in != SR:
167
+ x = _resample_linear(x, sr_in, SR)
168
+ if x.size == 0:
169
+ return []
170
+
171
+ # append to carry and consume fixed HOP chunks
172
+ if self.carry.size:
173
+ self.carry = np.concatenate([self.carry, x])
174
+ else:
175
+ self.carry = x
176
+
177
+ out: List[Dict[str, float]] = []
178
+
179
+ while self.carry.size >= HOP:
180
+ hop = self.carry[:HOP]
181
+ remaining: NDArray[np.float32] = self.carry[HOP:]
182
+ self.carry = remaining
183
+
184
+ # keep sliding window for VAD/env computation
185
+ # (deque accepts any iterable; list() for small HOP is fine)
186
+ self.samples.extend(hop.tolist())
187
+ if len(self.samples) < FRAME:
188
+ self.t += HOP_MS / 1000.0
189
+ continue
190
+
191
+ frame = np.fromiter(
192
+ islice(self.samples, len(self.samples) - FRAME, len(self.samples)),
193
+ dtype=np.float32,
194
+ count=FRAME,
195
+ )
196
+ db = _rms_dbfs(frame)
197
+
198
+ # VAD with hysteresis + attack/release
199
+ if db >= VAD_DB_ON:
200
+ self.vad_above += 1
201
+ self.vad_below = 0
202
+ if not self.vad_on and self.vad_above >= ATTACK_FR:
203
+ self.vad_on = True
204
+ elif db <= VAD_DB_OFF:
205
+ self.vad_below += 1
206
+ self.vad_above = 0
207
+ if self.vad_on and self.vad_below >= RELEASE_FR:
208
+ self.vad_on = False
209
+
210
+ if self.vad_on:
211
+ self.sway_up = min(SWAY_ATTACK_FR, self.sway_up + 1)
212
+ self.sway_down = 0
213
+ else:
214
+ self.sway_down = min(SWAY_RELEASE_FR, self.sway_down + 1)
215
+ self.sway_up = 0
216
+
217
+ up = self.sway_up / SWAY_ATTACK_FR
218
+ down = 1.0 - (self.sway_down / SWAY_RELEASE_FR)
219
+ target = up if self.vad_on else down
220
+ self.sway_env += ENV_FOLLOW_GAIN * (target - self.sway_env)
221
+ # clamp
222
+ if self.sway_env < 0.0:
223
+ self.sway_env = 0.0
224
+ elif self.sway_env > 1.0:
225
+ self.sway_env = 1.0
226
+
227
+ loud = _loudness_gain(db) * SWAY_MASTER
228
+ env = self.sway_env
229
+ self.t += HOP_MS / 1000.0
230
+
231
+ # oscillators
232
+ pitch = (
233
+ math.radians(SWAY_A_PITCH_DEG)
234
+ * loud
235
+ * env
236
+ * math.sin(2 * math.pi * SWAY_F_PITCH * self.t + self.phase_pitch)
237
+ )
238
+ yaw = (
239
+ math.radians(SWAY_A_YAW_DEG)
240
+ * loud
241
+ * env
242
+ * math.sin(2 * math.pi * SWAY_F_YAW * self.t + self.phase_yaw)
243
+ )
244
+ roll = (
245
+ math.radians(SWAY_A_ROLL_DEG)
246
+ * loud
247
+ * env
248
+ * math.sin(2 * math.pi * SWAY_F_ROLL * self.t + self.phase_roll)
249
+ )
250
+ x_mm = SWAY_A_X_MM * loud * env * math.sin(2 * math.pi * SWAY_F_X * self.t + self.phase_x)
251
+ y_mm = SWAY_A_Y_MM * loud * env * math.sin(2 * math.pi * SWAY_F_Y * self.t + self.phase_y)
252
+ z_mm = SWAY_A_Z_MM * loud * env * math.sin(2 * math.pi * SWAY_F_Z * self.t + self.phase_z)
253
+
254
+ out.append(
255
+ {
256
+ "pitch_rad": pitch,
257
+ "yaw_rad": yaw,
258
+ "roll_rad": roll,
259
+ "pitch_deg": math.degrees(pitch),
260
+ "yaw_deg": math.degrees(yaw),
261
+ "roll_deg": math.degrees(roll),
262
+ "x_mm": x_mm,
263
+ "y_mm": y_mm,
264
+ "z_mm": z_mm,
265
+ },
266
+ )
267
+
268
+ return out
src/reachy_mini_conversation_app/camera_worker.py ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Camera worker thread with frame buffering and face tracking.
2
+
3
+ Ported from main_works.py camera_worker() function to provide:
4
+ - 30Hz+ camera polling with thread-safe frame buffering
5
+ - Face tracking integration with smooth interpolation
6
+ - Latest frame always available for tools
7
+ """
8
+
9
+ import time
10
+ import logging
11
+ import threading
12
+ from typing import Any, List, Tuple
13
+
14
+ import numpy as np
15
+ from numpy.typing import NDArray
16
+ from scipy.spatial.transform import Rotation as R
17
+
18
+ from reachy_mini import ReachyMini
19
+ from reachy_mini.utils.interpolation import linear_pose_interpolation
20
+
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ class CameraWorker:
26
+ """Thread-safe camera worker with frame buffering and face tracking."""
27
+
28
+ def __init__(self, reachy_mini: ReachyMini, head_tracker: Any = None) -> None:
29
+ """Initialize."""
30
+ self.reachy_mini = reachy_mini
31
+ self.head_tracker = head_tracker
32
+
33
+ # Thread-safe frame storage
34
+ self.latest_frame: NDArray[np.uint8] | None = None
35
+ self.frame_lock = threading.Lock()
36
+ self._stop_event = threading.Event()
37
+ self._thread: threading.Thread | None = None
38
+
39
+ # Face tracking state
40
+ self.is_head_tracking_enabled = True
41
+ self.face_tracking_offsets: List[float] = [
42
+ 0.0,
43
+ 0.0,
44
+ 0.0,
45
+ 0.0,
46
+ 0.0,
47
+ 0.0,
48
+ ] # x, y, z, roll, pitch, yaw
49
+ self.face_tracking_lock = threading.Lock()
50
+
51
+ # Face tracking timing variables (same as main_works.py)
52
+ self.last_face_detected_time: float | None = None
53
+ self.interpolation_start_time: float | None = None
54
+ self.interpolation_start_pose: NDArray[np.float32] | None = None
55
+ self.face_lost_delay = 2.0 # seconds to wait before starting interpolation
56
+ self.interpolation_duration = 1.0 # seconds to interpolate back to neutral
57
+
58
+ # Track state changes
59
+ self.previous_head_tracking_state = self.is_head_tracking_enabled
60
+
61
+ def get_latest_frame(self) -> NDArray[np.uint8] | None:
62
+ """Get the latest frame (thread-safe)."""
63
+ with self.frame_lock:
64
+ if self.latest_frame is None:
65
+ return None
66
+ # Return a copy in original BGR format (OpenCV native)
67
+ return self.latest_frame.copy()
68
+
69
+ def get_face_tracking_offsets(
70
+ self,
71
+ ) -> Tuple[float, float, float, float, float, float]:
72
+ """Get current face tracking offsets (thread-safe)."""
73
+ with self.face_tracking_lock:
74
+ offsets = self.face_tracking_offsets
75
+ return (offsets[0], offsets[1], offsets[2], offsets[3], offsets[4], offsets[5])
76
+
77
+ def set_head_tracking_enabled(self, enabled: bool) -> None:
78
+ """Enable/disable head tracking."""
79
+ self.is_head_tracking_enabled = enabled
80
+ logger.info(f"Head tracking {'enabled' if enabled else 'disabled'}")
81
+
82
+ def start(self) -> None:
83
+ """Start the camera worker loop in a thread."""
84
+ self._stop_event.clear()
85
+ self._thread = threading.Thread(target=self.working_loop, daemon=True)
86
+ self._thread.start()
87
+ logger.debug("Camera worker started")
88
+
89
+ def stop(self) -> None:
90
+ """Stop the camera worker loop."""
91
+ self._stop_event.set()
92
+ if self._thread is not None:
93
+ self._thread.join()
94
+
95
+ logger.debug("Camera worker stopped")
96
+
97
+ def working_loop(self) -> None:
98
+ """Enable the camera worker loop.
99
+
100
+ Ported from main_works.py camera_worker() with same logic.
101
+ """
102
+ logger.debug("Starting camera working loop")
103
+
104
+ # Initialize head tracker if available
105
+ neutral_pose = np.eye(4) # Neutral pose (identity matrix)
106
+ self.previous_head_tracking_state = self.is_head_tracking_enabled
107
+
108
+ while not self._stop_event.is_set():
109
+ try:
110
+ current_time = time.time()
111
+
112
+ # Get frame from robot
113
+ frame = self.reachy_mini.media.get_frame()
114
+
115
+ if frame is not None:
116
+ # Thread-safe frame storage
117
+ with self.frame_lock:
118
+ self.latest_frame = frame # .copy()
119
+
120
+ # Check if face tracking was just disabled
121
+ if self.previous_head_tracking_state and not self.is_head_tracking_enabled:
122
+ # Face tracking was just disabled - start interpolation to neutral
123
+ self.last_face_detected_time = current_time # Trigger the face-lost logic
124
+ self.interpolation_start_time = None # Will be set by the face-lost interpolation
125
+ self.interpolation_start_pose = None
126
+
127
+ # Update tracking state
128
+ self.previous_head_tracking_state = self.is_head_tracking_enabled
129
+
130
+ # Handle face tracking if enabled and head tracker available
131
+ if self.is_head_tracking_enabled and self.head_tracker is not None:
132
+ eye_center, _ = self.head_tracker.get_head_position(frame)
133
+
134
+ if eye_center is not None:
135
+ # Face detected - immediately switch to tracking
136
+ self.last_face_detected_time = current_time
137
+ self.interpolation_start_time = None # Stop any interpolation
138
+
139
+ # Convert normalized coordinates to pixel coordinates
140
+ h, w, _ = frame.shape
141
+ eye_center_norm = (eye_center + 1) / 2
142
+ eye_center_pixels = [
143
+ eye_center_norm[0] * w,
144
+ eye_center_norm[1] * h,
145
+ ]
146
+
147
+ # Get the head pose needed to look at the target, but don't perform movement
148
+ target_pose = self.reachy_mini.look_at_image(
149
+ eye_center_pixels[0],
150
+ eye_center_pixels[1],
151
+ duration=0.0,
152
+ perform_movement=False,
153
+ )
154
+
155
+ # Extract translation and rotation from the target pose directly
156
+ translation = target_pose[:3, 3]
157
+ rotation = R.from_matrix(target_pose[:3, :3]).as_euler("xyz", degrees=False)
158
+
159
+ # Scale down translation and rotation because smaller FOV
160
+ translation *= 0.6
161
+ rotation *= 0.6
162
+
163
+ # Thread-safe update of face tracking offsets (use pose as-is)
164
+ with self.face_tracking_lock:
165
+ self.face_tracking_offsets = [
166
+ translation[0],
167
+ translation[1],
168
+ translation[2], # x, y, z
169
+ rotation[0],
170
+ rotation[1],
171
+ rotation[2], # roll, pitch, yaw
172
+ ]
173
+
174
+ # No face detected while tracking enabled - set face lost timestamp
175
+ elif self.last_face_detected_time is None or self.last_face_detected_time == current_time:
176
+ # Only update if we haven't already set a face lost time
177
+ # (current_time check prevents overriding the disable-triggered timestamp)
178
+ pass
179
+
180
+ # Handle smooth interpolation (works for both face-lost and tracking-disabled cases)
181
+ if self.last_face_detected_time is not None:
182
+ time_since_face_lost = current_time - self.last_face_detected_time
183
+
184
+ if time_since_face_lost >= self.face_lost_delay:
185
+ # Start interpolation if not already started
186
+ if self.interpolation_start_time is None:
187
+ self.interpolation_start_time = current_time
188
+ # Capture current pose as start of interpolation
189
+ with self.face_tracking_lock:
190
+ current_translation = self.face_tracking_offsets[:3]
191
+ current_rotation_euler = self.face_tracking_offsets[3:]
192
+ # Convert to 4x4 pose matrix
193
+ pose_matrix = np.eye(4, dtype=np.float32)
194
+ pose_matrix[:3, 3] = current_translation
195
+ pose_matrix[:3, :3] = R.from_euler(
196
+ "xyz",
197
+ current_rotation_euler,
198
+ ).as_matrix()
199
+ self.interpolation_start_pose = pose_matrix
200
+
201
+ # Calculate interpolation progress (t from 0 to 1)
202
+ elapsed_interpolation = current_time - self.interpolation_start_time
203
+ t = min(1.0, elapsed_interpolation / self.interpolation_duration)
204
+
205
+ # Interpolate between current pose and neutral pose
206
+ interpolated_pose = linear_pose_interpolation(
207
+ self.interpolation_start_pose,
208
+ neutral_pose,
209
+ t,
210
+ )
211
+
212
+ # Extract translation and rotation from interpolated pose
213
+ translation = interpolated_pose[:3, 3]
214
+ rotation = R.from_matrix(interpolated_pose[:3, :3]).as_euler("xyz", degrees=False)
215
+
216
+ # Thread-safe update of face tracking offsets
217
+ with self.face_tracking_lock:
218
+ self.face_tracking_offsets = [
219
+ translation[0],
220
+ translation[1],
221
+ translation[2], # x, y, z
222
+ rotation[0],
223
+ rotation[1],
224
+ rotation[2], # roll, pitch, yaw
225
+ ]
226
+
227
+ # If interpolation is complete, reset timing
228
+ if t >= 1.0:
229
+ self.last_face_detected_time = None
230
+ self.interpolation_start_time = None
231
+ self.interpolation_start_pose = None
232
+ # else: Keep current offsets (within 2s delay period)
233
+
234
+ # Small sleep to prevent excessive CPU usage (same as main_works.py)
235
+ time.sleep(0.04)
236
+
237
+ except Exception as e:
238
+ logger.error(f"Camera worker error: {e}")
239
+ time.sleep(0.1) # Longer sleep on error
240
+
241
+ logger.debug("Camera worker thread exited")
src/reachy_mini_conversation_app/config.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+
4
+ from dotenv import find_dotenv, load_dotenv
5
+
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ # Locate .env file (search upward from current working directory)
10
+ dotenv_path = find_dotenv(usecwd=True)
11
+
12
+ if dotenv_path:
13
+ # Load .env and override environment variables
14
+ load_dotenv(dotenv_path=dotenv_path, override=True)
15
+ logger.info(f"Configuration loaded from {dotenv_path}")
16
+ else:
17
+ logger.warning("No .env file found, using environment variables")
18
+
19
+
20
+ class Config:
21
+ """Configuration class for the conversation app."""
22
+
23
+ # Required
24
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") # The key is downloaded in console.py if needed
25
+
26
+ # Optional
27
+ MODEL_NAME = os.getenv("MODEL_NAME", "gpt-realtime")
28
+ HF_HOME = os.getenv("HF_HOME", "./cache")
29
+ LOCAL_VISION_MODEL = os.getenv("LOCAL_VISION_MODEL", "HuggingFaceTB/SmolVLM2-2.2B-Instruct")
30
+ HF_TOKEN = os.getenv("HF_TOKEN") # Optional, falls back to hf auth login if not set
31
+
32
+ # Figma MCP integration
33
+ FIGMA_ACCESS_TOKEN = os.getenv("FIGMA_ACCESS_TOKEN") # For Figma design capabilities
34
+ FIGMA_MCP_ENABLED = os.getenv("FIGMA_MCP_ENABLED", "false").lower() == "true"
35
+
36
+ logger.debug(f"Model: {MODEL_NAME}, HF_HOME: {HF_HOME}, Vision Model: {LOCAL_VISION_MODEL}")
37
+ if FIGMA_ACCESS_TOKEN:
38
+ logger.debug("Figma MCP: Token configured")
39
+
40
+ REACHY_MINI_CUSTOM_PROFILE = os.getenv("REACHY_MINI_CUSTOM_PROFILE")
41
+ logger.debug(f"Custom Profile: {REACHY_MINI_CUSTOM_PROFILE}")
42
+
43
+
44
+ config = Config()
45
+
46
+
47
+ def set_custom_profile(profile: str | None) -> None:
48
+ """Update the selected custom profile at runtime and expose it via env.
49
+
50
+ This ensures modules that read `config` and code that inspects the
51
+ environment see a consistent value.
52
+ """
53
+ try:
54
+ config.REACHY_MINI_CUSTOM_PROFILE = profile
55
+ except Exception:
56
+ pass
57
+ try:
58
+ import os as _os
59
+
60
+ if profile:
61
+ _os.environ["REACHY_MINI_CUSTOM_PROFILE"] = profile
62
+ else:
63
+ # Remove to reflect default
64
+ _os.environ.pop("REACHY_MINI_CUSTOM_PROFILE", None)
65
+ except Exception:
66
+ pass
src/reachy_mini_conversation_app/console.py ADDED
@@ -0,0 +1,499 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Bidirectional local audio stream with optional settings UI.
2
+
3
+ In headless mode, there is no Gradio UI. If the OpenAI API key is not
4
+ available via environment/.env, we expose a minimal settings page via the
5
+ Reachy Mini Apps settings server to let non-technical users enter it.
6
+
7
+ The settings UI is served from this package's ``static/`` folder and offers a
8
+ single password field to set ``OPENAI_API_KEY``. Once set, we persist it to the
9
+ app instance's ``.env`` file (if available) and proceed to start streaming.
10
+ """
11
+
12
+ import os
13
+ import sys
14
+ import time
15
+ import asyncio
16
+ import logging
17
+ from typing import List, Optional
18
+ from pathlib import Path
19
+
20
+ from fastrtc import AdditionalOutputs, audio_to_float32
21
+ from scipy.signal import resample
22
+
23
+ from reachy_mini import ReachyMini
24
+ from reachy_mini.media.media_manager import MediaBackend
25
+ from reachy_mini_conversation_app.config import config
26
+ from reachy_mini_conversation_app.openai_realtime import OpenaiRealtimeHandler
27
+ from reachy_mini_conversation_app.headless_personality_ui import mount_personality_routes
28
+
29
+
30
+ try:
31
+ # FastAPI is provided by the Reachy Mini Apps runtime
32
+ from fastapi import FastAPI, Response
33
+ from pydantic import BaseModel
34
+ from fastapi.responses import FileResponse, JSONResponse
35
+ from starlette.staticfiles import StaticFiles
36
+ except Exception: # pragma: no cover - only loaded when settings_app is used
37
+ FastAPI = object # type: ignore
38
+ FileResponse = object # type: ignore
39
+ JSONResponse = object # type: ignore
40
+ StaticFiles = object # type: ignore
41
+ BaseModel = object # type: ignore
42
+
43
+
44
+ logger = logging.getLogger(__name__)
45
+
46
+
47
+ class LocalStream:
48
+ """LocalStream using Reachy Mini's recorder/player."""
49
+
50
+ def __init__(
51
+ self,
52
+ handler: OpenaiRealtimeHandler,
53
+ robot: ReachyMini,
54
+ *,
55
+ settings_app: Optional[FastAPI] = None,
56
+ instance_path: Optional[str] = None,
57
+ ):
58
+ """Initialize the stream with an OpenAI realtime handler and pipelines.
59
+
60
+ - ``settings_app``: the Reachy Mini Apps FastAPI to attach settings endpoints.
61
+ - ``instance_path``: directory where per-instance ``.env`` should be stored.
62
+ """
63
+ self.handler = handler
64
+ self._robot = robot
65
+ self._stop_event = asyncio.Event()
66
+ self._tasks: List[asyncio.Task[None]] = []
67
+ # Allow the handler to flush the player queue when appropriate.
68
+ self.handler._clear_queue = self.clear_audio_queue
69
+ self._settings_app: Optional[FastAPI] = settings_app
70
+ self._instance_path: Optional[str] = instance_path
71
+ self._settings_initialized = False
72
+ self._asyncio_loop = None
73
+
74
+ # ---- Settings UI (only when API key is missing) ----
75
+ def _read_env_lines(self, env_path: Path) -> list[str]:
76
+ """Load env file contents or a template as a list of lines."""
77
+ inst = env_path.parent
78
+ try:
79
+ if env_path.exists():
80
+ try:
81
+ return env_path.read_text(encoding="utf-8").splitlines()
82
+ except Exception:
83
+ return []
84
+ template_text = None
85
+ ex = inst / ".env.example"
86
+ if ex.exists():
87
+ try:
88
+ template_text = ex.read_text(encoding="utf-8")
89
+ except Exception:
90
+ template_text = None
91
+ if template_text is None:
92
+ try:
93
+ cwd_example = Path.cwd() / ".env.example"
94
+ if cwd_example.exists():
95
+ template_text = cwd_example.read_text(encoding="utf-8")
96
+ except Exception:
97
+ template_text = None
98
+ if template_text is None:
99
+ packaged = Path(__file__).parent / ".env.example"
100
+ if packaged.exists():
101
+ try:
102
+ template_text = packaged.read_text(encoding="utf-8")
103
+ except Exception:
104
+ template_text = None
105
+ return template_text.splitlines() if template_text else []
106
+ except Exception:
107
+ return []
108
+
109
+ def _persist_api_key(self, key: str) -> None:
110
+ """Persist API key to environment and instance ``.env`` if possible.
111
+
112
+ Behavior:
113
+ - Always sets ``OPENAI_API_KEY`` in process env and in-memory config.
114
+ - Writes/updates ``<instance_path>/.env``:
115
+ * If ``.env`` exists, replaces/append OPENAI_API_KEY line.
116
+ * Else, copies template from ``<instance_path>/.env.example`` when present,
117
+ otherwise falls back to the packaged template
118
+ ``reachy_mini_conversation_app/.env.example``.
119
+ * Ensures the resulting file contains the full template plus the key.
120
+ - Loads the written ``.env`` into the current process environment.
121
+ """
122
+ k = (key or "").strip()
123
+ if not k:
124
+ return
125
+ # Update live process env and config so consumers see it immediately
126
+ try:
127
+ os.environ["OPENAI_API_KEY"] = k
128
+ except Exception: # best-effort
129
+ pass
130
+ try:
131
+ config.OPENAI_API_KEY = k
132
+ except Exception:
133
+ pass
134
+
135
+ if not self._instance_path:
136
+ return
137
+ try:
138
+ inst = Path(self._instance_path)
139
+ env_path = inst / ".env"
140
+ lines = self._read_env_lines(env_path)
141
+ replaced = False
142
+ for i, ln in enumerate(lines):
143
+ if ln.strip().startswith("OPENAI_API_KEY="):
144
+ lines[i] = f"OPENAI_API_KEY={k}"
145
+ replaced = True
146
+ break
147
+ if not replaced:
148
+ lines.append(f"OPENAI_API_KEY={k}")
149
+ final_text = "\n".join(lines) + "\n"
150
+ env_path.write_text(final_text, encoding="utf-8")
151
+ logger.info("Persisted OPENAI_API_KEY to %s", env_path)
152
+
153
+ # Load the newly written .env into this process to ensure downstream imports see it
154
+ try:
155
+ from dotenv import load_dotenv
156
+
157
+ load_dotenv(dotenv_path=str(env_path), override=True)
158
+ except Exception:
159
+ pass
160
+ except Exception as e:
161
+ logger.warning("Failed to persist OPENAI_API_KEY: %s", e)
162
+
163
+ def _persist_personality(self, profile: Optional[str]) -> None:
164
+ """Persist the startup personality to the instance .env and config."""
165
+ selection = (profile or "").strip() or None
166
+ try:
167
+ from reachy_mini_conversation_app.config import set_custom_profile
168
+
169
+ set_custom_profile(selection)
170
+ except Exception:
171
+ pass
172
+
173
+ if not self._instance_path:
174
+ return
175
+ try:
176
+ env_path = Path(self._instance_path) / ".env"
177
+ lines = self._read_env_lines(env_path)
178
+ replaced = False
179
+ for i, ln in enumerate(list(lines)):
180
+ if ln.strip().startswith("REACHY_MINI_CUSTOM_PROFILE="):
181
+ if selection:
182
+ lines[i] = f"REACHY_MINI_CUSTOM_PROFILE={selection}"
183
+ else:
184
+ lines.pop(i)
185
+ replaced = True
186
+ break
187
+ if selection and not replaced:
188
+ lines.append(f"REACHY_MINI_CUSTOM_PROFILE={selection}")
189
+ if selection is None and not env_path.exists():
190
+ return
191
+ final_text = "\n".join(lines) + "\n"
192
+ env_path.write_text(final_text, encoding="utf-8")
193
+ logger.info("Persisted startup personality to %s", env_path)
194
+ try:
195
+ from dotenv import load_dotenv
196
+
197
+ load_dotenv(dotenv_path=str(env_path), override=True)
198
+ except Exception:
199
+ pass
200
+ except Exception as e:
201
+ logger.warning("Failed to persist REACHY_MINI_CUSTOM_PROFILE: %s", e)
202
+
203
+ def _read_persisted_personality(self) -> Optional[str]:
204
+ """Read persisted startup personality from instance .env (if any)."""
205
+ if not self._instance_path:
206
+ return None
207
+ env_path = Path(self._instance_path) / ".env"
208
+ try:
209
+ if env_path.exists():
210
+ for ln in env_path.read_text(encoding="utf-8").splitlines():
211
+ if ln.strip().startswith("REACHY_MINI_CUSTOM_PROFILE="):
212
+ _, _, val = ln.partition("=")
213
+ v = val.strip()
214
+ return v or None
215
+ except Exception:
216
+ pass
217
+ return None
218
+
219
+ def _init_settings_ui_if_needed(self) -> None:
220
+ """Attach minimal settings UI to the settings app.
221
+
222
+ Always mounts the UI when a settings_app is provided so that users
223
+ see a confirmation message even if the API key is already configured.
224
+ """
225
+ if self._settings_initialized:
226
+ return
227
+ if self._settings_app is None:
228
+ return
229
+
230
+ static_dir = Path(__file__).parent / "static"
231
+ index_file = static_dir / "index.html"
232
+
233
+ if hasattr(self._settings_app, "mount"):
234
+ try:
235
+ # Serve /static/* assets
236
+ self._settings_app.mount("/static", StaticFiles(directory=str(static_dir)), name="static")
237
+ except Exception:
238
+ pass
239
+
240
+ class ApiKeyPayload(BaseModel):
241
+ openai_api_key: str
242
+
243
+ # GET / -> index.html
244
+ @self._settings_app.get("/")
245
+ def _root() -> FileResponse:
246
+ return FileResponse(str(index_file))
247
+
248
+ # GET /favicon.ico -> optional, avoid noisy 404s on some browsers
249
+ @self._settings_app.get("/favicon.ico")
250
+ def _favicon() -> Response:
251
+ return Response(status_code=204)
252
+
253
+ # GET /status -> whether key is set
254
+ @self._settings_app.get("/status")
255
+ def _status() -> JSONResponse:
256
+ has_key = bool(config.OPENAI_API_KEY and str(config.OPENAI_API_KEY).strip())
257
+ return JSONResponse({"has_key": has_key})
258
+
259
+ # GET /ready -> whether backend finished loading tools
260
+ @self._settings_app.get("/ready")
261
+ def _ready() -> JSONResponse:
262
+ try:
263
+ mod = sys.modules.get("reachy_mini_conversation_app.tools.core_tools")
264
+ ready = bool(getattr(mod, "_TOOLS_INITIALIZED", False)) if mod else False
265
+ except Exception:
266
+ ready = False
267
+ return JSONResponse({"ready": ready})
268
+
269
+ # POST /openai_api_key -> set/persist key
270
+ @self._settings_app.post("/openai_api_key")
271
+ def _set_key(payload: ApiKeyPayload) -> JSONResponse:
272
+ key = (payload.openai_api_key or "").strip()
273
+ if not key:
274
+ return JSONResponse({"ok": False, "error": "empty_key"}, status_code=400)
275
+ self._persist_api_key(key)
276
+ return JSONResponse({"ok": True})
277
+
278
+ # POST /validate_api_key -> validate key without persisting it
279
+ @self._settings_app.post("/validate_api_key")
280
+ async def _validate_key(payload: ApiKeyPayload) -> JSONResponse:
281
+ key = (payload.openai_api_key or "").strip()
282
+ if not key:
283
+ return JSONResponse({"valid": False, "error": "empty_key"}, status_code=400)
284
+
285
+ # Try to validate by checking if we can fetch the models
286
+ try:
287
+ import httpx
288
+
289
+ headers = {"Authorization": f"Bearer {key}", "Content-Type": "application/json"}
290
+ async with httpx.AsyncClient(timeout=10.0) as client:
291
+ response = await client.get("https://api.openai.com/v1/models", headers=headers)
292
+ if response.status_code == 200:
293
+ return JSONResponse({"valid": True})
294
+ elif response.status_code == 401:
295
+ return JSONResponse({"valid": False, "error": "invalid_api_key"}, status_code=401)
296
+ else:
297
+ return JSONResponse(
298
+ {"valid": False, "error": "validation_failed"}, status_code=response.status_code
299
+ )
300
+ except Exception as e:
301
+ logger.warning(f"API key validation failed: {e}")
302
+ return JSONResponse({"valid": False, "error": "validation_error"}, status_code=500)
303
+
304
+ self._settings_initialized = True
305
+
306
+ def launch(self) -> None:
307
+ """Start the recorder/player and run the async processing loops.
308
+
309
+ If the OpenAI key is missing, expose a tiny settings UI via the
310
+ Reachy Mini settings server to collect it before starting streams.
311
+ """
312
+ self._stop_event.clear()
313
+
314
+ # Try to load an existing instance .env first (covers subsequent runs)
315
+ if self._instance_path:
316
+ try:
317
+ from dotenv import load_dotenv
318
+
319
+ from reachy_mini_conversation_app.config import set_custom_profile
320
+
321
+ env_path = Path(self._instance_path) / ".env"
322
+ if env_path.exists():
323
+ load_dotenv(dotenv_path=str(env_path), override=True)
324
+ # Update config with newly loaded values
325
+ new_key = os.getenv("OPENAI_API_KEY", "").strip()
326
+ if new_key:
327
+ try:
328
+ config.OPENAI_API_KEY = new_key
329
+ except Exception:
330
+ pass
331
+ new_profile = os.getenv("REACHY_MINI_CUSTOM_PROFILE")
332
+ if new_profile is not None:
333
+ try:
334
+ set_custom_profile(new_profile.strip() or None)
335
+ except Exception:
336
+ pass
337
+ except Exception:
338
+ pass
339
+
340
+ # If key is still missing, try to download one from HuggingFace
341
+ if not (config.OPENAI_API_KEY and str(config.OPENAI_API_KEY).strip()):
342
+ logger.info("OPENAI_API_KEY not set, attempting to download from HuggingFace...")
343
+ try:
344
+ from gradio_client import Client
345
+ client = Client("HuggingFaceM4/gradium_setup", verbose=False)
346
+ key, status = client.predict(api_name="/claim_b_key")
347
+ if key and key.strip():
348
+ logger.info("Successfully downloaded API key from HuggingFace")
349
+ # Persist it immediately
350
+ self._persist_api_key(key)
351
+ except Exception as e:
352
+ logger.warning(f"Failed to download API key from HuggingFace: {e}")
353
+
354
+ # Always expose settings UI if a settings app is available
355
+ # (do this AFTER loading/downloading the key so status endpoint sees the right value)
356
+ self._init_settings_ui_if_needed()
357
+
358
+ # If key is still missing -> wait until provided via the settings UI
359
+ if not (config.OPENAI_API_KEY and str(config.OPENAI_API_KEY).strip()):
360
+ logger.warning("OPENAI_API_KEY not found. Open the app settings page to enter it.")
361
+ # Poll until the key becomes available (set via the settings UI)
362
+ try:
363
+ while not (config.OPENAI_API_KEY and str(config.OPENAI_API_KEY).strip()):
364
+ time.sleep(0.2)
365
+ except KeyboardInterrupt:
366
+ logger.info("Interrupted while waiting for API key.")
367
+ return
368
+
369
+ # Start media after key is set/available
370
+ self._robot.media.start_recording()
371
+ self._robot.media.start_playing()
372
+ time.sleep(1) # give some time to the pipelines to start
373
+
374
+ async def runner() -> None:
375
+ # Capture loop for cross-thread personality actions
376
+ loop = asyncio.get_running_loop()
377
+ self._asyncio_loop = loop # type: ignore[assignment]
378
+ # Mount personality routes now that loop and handler are available
379
+ try:
380
+ if self._settings_app is not None:
381
+ mount_personality_routes(
382
+ self._settings_app,
383
+ self.handler,
384
+ lambda: self._asyncio_loop,
385
+ persist_personality=self._persist_personality,
386
+ get_persisted_personality=self._read_persisted_personality,
387
+ )
388
+ except Exception:
389
+ pass
390
+ self._tasks = [
391
+ asyncio.create_task(self.handler.start_up(), name="openai-handler"),
392
+ asyncio.create_task(self.record_loop(), name="stream-record-loop"),
393
+ asyncio.create_task(self.play_loop(), name="stream-play-loop"),
394
+ ]
395
+ try:
396
+ await asyncio.gather(*self._tasks)
397
+ except asyncio.CancelledError:
398
+ logger.info("Tasks cancelled during shutdown")
399
+ finally:
400
+ # Ensure handler connection is closed
401
+ await self.handler.shutdown()
402
+
403
+ asyncio.run(runner())
404
+
405
+ def close(self) -> None:
406
+ """Stop the stream and underlying media pipelines.
407
+
408
+ This method:
409
+ - Stops audio recording and playback first
410
+ - Sets the stop event to signal async loops to terminate
411
+ - Cancels all pending async tasks (openai-handler, record-loop, play-loop)
412
+ """
413
+ logger.info("Stopping LocalStream...")
414
+
415
+ # Stop media pipelines FIRST before cancelling async tasks
416
+ # This ensures clean shutdown before PortAudio cleanup
417
+ try:
418
+ self._robot.media.stop_recording()
419
+ except Exception as e:
420
+ logger.debug(f"Error stopping recording (may already be stopped): {e}")
421
+
422
+ try:
423
+ self._robot.media.stop_playing()
424
+ except Exception as e:
425
+ logger.debug(f"Error stopping playback (may already be stopped): {e}")
426
+
427
+ # Now signal async loops to stop
428
+ self._stop_event.set()
429
+
430
+ # Cancel all running tasks
431
+ for task in self._tasks:
432
+ if not task.done():
433
+ task.cancel()
434
+
435
+ def clear_audio_queue(self) -> None:
436
+ """Flush the player's appsrc to drop any queued audio immediately."""
437
+ logger.info("User intervention: flushing player queue")
438
+ if self._robot.media.backend == MediaBackend.GSTREAMER:
439
+ # Directly flush gstreamer audio pipe
440
+ self._robot.media.audio.clear_player()
441
+ elif self._robot.media.backend == MediaBackend.DEFAULT or self._robot.media.backend == MediaBackend.DEFAULT_NO_VIDEO:
442
+ self._robot.media.audio.clear_output_buffer()
443
+ self.handler.output_queue = asyncio.Queue()
444
+
445
+ async def record_loop(self) -> None:
446
+ """Read mic frames from the recorder and forward them to the handler."""
447
+ input_sample_rate = self._robot.media.get_input_audio_samplerate()
448
+ logger.debug(f"Audio recording started at {input_sample_rate} Hz")
449
+
450
+ while not self._stop_event.is_set():
451
+ audio_frame = self._robot.media.get_audio_sample()
452
+ if audio_frame is not None:
453
+ await self.handler.receive((input_sample_rate, audio_frame))
454
+ await asyncio.sleep(0) # avoid busy loop
455
+
456
+ async def play_loop(self) -> None:
457
+ """Fetch outputs from the handler: log text and play audio frames."""
458
+ while not self._stop_event.is_set():
459
+ handler_output = await self.handler.emit()
460
+
461
+ if isinstance(handler_output, AdditionalOutputs):
462
+ for msg in handler_output.args:
463
+ content = msg.get("content", "")
464
+ if isinstance(content, str):
465
+ logger.info(
466
+ "role=%s content=%s",
467
+ msg.get("role"),
468
+ content if len(content) < 500 else content[:500] + "…",
469
+ )
470
+
471
+ elif isinstance(handler_output, tuple):
472
+ input_sample_rate, audio_data = handler_output
473
+ output_sample_rate = self._robot.media.get_output_audio_samplerate()
474
+
475
+ # Reshape if needed
476
+ if audio_data.ndim == 2:
477
+ # Scipy channels last convention
478
+ if audio_data.shape[1] > audio_data.shape[0]:
479
+ audio_data = audio_data.T
480
+ # Multiple channels -> Mono channel
481
+ if audio_data.shape[1] > 1:
482
+ audio_data = audio_data[:, 0]
483
+
484
+ # Cast if needed
485
+ audio_frame = audio_to_float32(audio_data)
486
+
487
+ # Resample if needed
488
+ if input_sample_rate != output_sample_rate:
489
+ audio_frame = resample(
490
+ audio_frame,
491
+ int(len(audio_frame) * output_sample_rate / input_sample_rate),
492
+ )
493
+
494
+ self._robot.media.push_audio_sample(audio_frame)
495
+
496
+ else:
497
+ logger.debug("Ignoring output type=%s", type(handler_output).__name__)
498
+
499
+ await asyncio.sleep(0) # yield to event loop
src/reachy_mini_conversation_app/dance_emotion_moves.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Dance and emotion moves for the movement queue system.
2
+
3
+ This module implements dance moves and emotions as Move objects that can be queued
4
+ and executed sequentially by the MovementManager.
5
+ """
6
+
7
+ from __future__ import annotations
8
+ import logging
9
+ from typing import Tuple
10
+
11
+ import numpy as np
12
+ from numpy.typing import NDArray
13
+
14
+ from reachy_mini.motion.move import Move
15
+ from reachy_mini.motion.recorded_move import RecordedMoves
16
+ from reachy_mini_dances_library.dance_move import DanceMove
17
+
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class DanceQueueMove(Move): # type: ignore
23
+ """Wrapper for dance moves to work with the movement queue system."""
24
+
25
+ def __init__(self, move_name: str):
26
+ """Initialize a DanceQueueMove."""
27
+ self.dance_move = DanceMove(move_name)
28
+ self.move_name = move_name
29
+
30
+ @property
31
+ def duration(self) -> float:
32
+ """Duration property required by official Move interface."""
33
+ return float(self.dance_move.duration)
34
+
35
+ def evaluate(self, t: float) -> tuple[NDArray[np.float64] | None, NDArray[np.float64] | None, float | None]:
36
+ """Evaluate dance move at time t."""
37
+ try:
38
+ # Get the pose from the dance move
39
+ head_pose, antennas, body_yaw = self.dance_move.evaluate(t)
40
+
41
+ # Convert to numpy array if antennas is tuple and return in official Move format
42
+ if isinstance(antennas, tuple):
43
+ antennas = np.array([antennas[0], antennas[1]])
44
+
45
+ return (head_pose, antennas, body_yaw)
46
+
47
+ except Exception as e:
48
+ logger.error(f"Error evaluating dance move '{self.move_name}' at t={t}: {e}")
49
+ # Return neutral pose on error
50
+ from reachy_mini.utils import create_head_pose
51
+
52
+ neutral_head_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
53
+ return (neutral_head_pose, np.array([0.0, 0.0], dtype=np.float64), 0.0)
54
+
55
+
56
+ class EmotionQueueMove(Move): # type: ignore
57
+ """Wrapper for emotion moves to work with the movement queue system."""
58
+
59
+ def __init__(self, emotion_name: str, recorded_moves: RecordedMoves):
60
+ """Initialize an EmotionQueueMove."""
61
+ self.emotion_move = recorded_moves.get(emotion_name)
62
+ self.emotion_name = emotion_name
63
+
64
+ @property
65
+ def duration(self) -> float:
66
+ """Duration property required by official Move interface."""
67
+ return float(self.emotion_move.duration)
68
+
69
+ def evaluate(self, t: float) -> tuple[NDArray[np.float64] | None, NDArray[np.float64] | None, float | None]:
70
+ """Evaluate emotion move at time t."""
71
+ try:
72
+ # Get the pose from the emotion move
73
+ head_pose, antennas, body_yaw = self.emotion_move.evaluate(t)
74
+
75
+ # Convert to numpy array if antennas is tuple and return in official Move format
76
+ if isinstance(antennas, tuple):
77
+ antennas = np.array([antennas[0], antennas[1]])
78
+
79
+ return (head_pose, antennas, body_yaw)
80
+
81
+ except Exception as e:
82
+ logger.error(f"Error evaluating emotion '{self.emotion_name}' at t={t}: {e}")
83
+ # Return neutral pose on error
84
+ from reachy_mini.utils import create_head_pose
85
+
86
+ neutral_head_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
87
+ return (neutral_head_pose, np.array([0.0, 0.0], dtype=np.float64), 0.0)
88
+
89
+
90
+ class GotoQueueMove(Move): # type: ignore
91
+ """Wrapper for goto moves to work with the movement queue system."""
92
+
93
+ def __init__(
94
+ self,
95
+ target_head_pose: NDArray[np.float32],
96
+ start_head_pose: NDArray[np.float32] | None = None,
97
+ target_antennas: Tuple[float, float] = (0, 0),
98
+ start_antennas: Tuple[float, float] | None = None,
99
+ target_body_yaw: float = 0,
100
+ start_body_yaw: float | None = None,
101
+ duration: float = 1.0,
102
+ ):
103
+ """Initialize a GotoQueueMove."""
104
+ self._duration = duration
105
+ self.target_head_pose = target_head_pose
106
+ self.start_head_pose = start_head_pose
107
+ self.target_antennas = target_antennas
108
+ self.start_antennas = start_antennas or (0, 0)
109
+ self.target_body_yaw = target_body_yaw
110
+ self.start_body_yaw = start_body_yaw or 0
111
+
112
+ @property
113
+ def duration(self) -> float:
114
+ """Duration property required by official Move interface."""
115
+ return self._duration
116
+
117
+ def evaluate(self, t: float) -> tuple[NDArray[np.float64] | None, NDArray[np.float64] | None, float | None]:
118
+ """Evaluate goto move at time t using linear interpolation."""
119
+ try:
120
+ from reachy_mini.utils import create_head_pose
121
+ from reachy_mini.utils.interpolation import linear_pose_interpolation
122
+
123
+ # Clamp t to [0, 1] for interpolation
124
+ t_clamped = max(0, min(1, t / self.duration))
125
+
126
+ # Use start pose if available, otherwise neutral
127
+ if self.start_head_pose is not None:
128
+ start_pose = self.start_head_pose
129
+ else:
130
+ start_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
131
+
132
+ # Interpolate head pose
133
+ head_pose = linear_pose_interpolation(start_pose, self.target_head_pose, t_clamped)
134
+
135
+ # Interpolate antennas - return as numpy array
136
+ antennas = np.array(
137
+ [
138
+ self.start_antennas[0] + (self.target_antennas[0] - self.start_antennas[0]) * t_clamped,
139
+ self.start_antennas[1] + (self.target_antennas[1] - self.start_antennas[1]) * t_clamped,
140
+ ],
141
+ dtype=np.float64,
142
+ )
143
+
144
+ # Interpolate body yaw
145
+ body_yaw = self.start_body_yaw + (self.target_body_yaw - self.start_body_yaw) * t_clamped
146
+
147
+ return (head_pose, antennas, body_yaw)
148
+
149
+ except Exception as e:
150
+ logger.error(f"Error evaluating goto move at t={t}: {e}")
151
+ # Return target pose on error - convert to float64
152
+ target_head_pose_f64 = self.target_head_pose.astype(np.float64)
153
+ target_antennas_array = np.array([self.target_antennas[0], self.target_antennas[1]], dtype=np.float64)
154
+ return (target_head_pose_f64, target_antennas_array, self.target_body_yaw)
src/reachy_mini_conversation_app/gradio_personality.py ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Gradio personality UI components and wiring.
2
+
3
+ This module encapsulates the UI elements and logic related to managing
4
+ conversation "personalities" (profiles) so that `main.py` stays lean.
5
+ """
6
+
7
+ from __future__ import annotations
8
+ from typing import Any
9
+ from pathlib import Path
10
+
11
+ import gradio as gr
12
+
13
+ from .config import config
14
+
15
+
16
+ class PersonalityUI:
17
+ """Container for personality-related Gradio components."""
18
+
19
+ def __init__(self) -> None:
20
+ """Initialize the PersonalityUI instance."""
21
+ # Constants and paths
22
+ self.DEFAULT_OPTION = "(built-in default)"
23
+ self._profiles_root = Path(__file__).parent / "profiles"
24
+ self._tools_dir = Path(__file__).parent / "tools"
25
+ self._prompts_dir = Path(__file__).parent / "prompts"
26
+
27
+ # Components (initialized in create_components)
28
+ self.personalities_dropdown: gr.Dropdown
29
+ self.apply_btn: gr.Button
30
+ self.status_md: gr.Markdown
31
+ self.preview_md: gr.Markdown
32
+ self.person_name_tb: gr.Textbox
33
+ self.person_instr_ta: gr.TextArea
34
+ self.tools_txt_ta: gr.TextArea
35
+ self.voice_dropdown: gr.Dropdown
36
+ self.new_personality_btn: gr.Button
37
+ self.available_tools_cg: gr.CheckboxGroup
38
+ self.save_btn: gr.Button
39
+
40
+ # ---------- Filesystem helpers ----------
41
+ def _list_personalities(self) -> list[str]:
42
+ names: list[str] = []
43
+ try:
44
+ if self._profiles_root.exists():
45
+ for p in sorted(self._profiles_root.iterdir()):
46
+ if p.name == "user_personalities":
47
+ continue
48
+ if p.is_dir() and (p / "instructions.txt").exists():
49
+ names.append(p.name)
50
+ user_dir = self._profiles_root / "user_personalities"
51
+ if user_dir.exists():
52
+ for p in sorted(user_dir.iterdir()):
53
+ if p.is_dir() and (p / "instructions.txt").exists():
54
+ names.append(f"user_personalities/{p.name}")
55
+ except Exception:
56
+ pass
57
+ return names
58
+
59
+ def _resolve_profile_dir(self, selection: str) -> Path:
60
+ return self._profiles_root / selection
61
+
62
+ def _read_instructions_for(self, name: str) -> str:
63
+ try:
64
+ if name == self.DEFAULT_OPTION:
65
+ default_file = self._prompts_dir / "default_prompt.txt"
66
+ if default_file.exists():
67
+ return default_file.read_text(encoding="utf-8").strip()
68
+ return ""
69
+ target = self._resolve_profile_dir(name) / "instructions.txt"
70
+ if target.exists():
71
+ return target.read_text(encoding="utf-8").strip()
72
+ return ""
73
+ except Exception as e:
74
+ return f"Could not load instructions: {e}"
75
+
76
+ @staticmethod
77
+ def _sanitize_name(name: str) -> str:
78
+ import re
79
+
80
+ s = name.strip()
81
+ s = re.sub(r"\s+", "_", s)
82
+ s = re.sub(r"[^a-zA-Z0-9_-]", "", s)
83
+ return s
84
+
85
+ # ---------- Public API ----------
86
+ def create_components(self) -> None:
87
+ """Instantiate Gradio components for the personality UI."""
88
+ current_value = config.REACHY_MINI_CUSTOM_PROFILE or self.DEFAULT_OPTION
89
+
90
+ self.personalities_dropdown = gr.Dropdown(
91
+ label="Select personality",
92
+ choices=[self.DEFAULT_OPTION, *(self._list_personalities())],
93
+ value=current_value,
94
+ )
95
+ self.apply_btn = gr.Button("Apply personality")
96
+ self.status_md = gr.Markdown(visible=True)
97
+ self.preview_md = gr.Markdown(value=self._read_instructions_for(current_value))
98
+ self.person_name_tb = gr.Textbox(label="Personality name")
99
+ self.person_instr_ta = gr.TextArea(label="Personality instructions", lines=10)
100
+ self.tools_txt_ta = gr.TextArea(label="tools.txt", lines=10)
101
+ self.voice_dropdown = gr.Dropdown(label="Voice", choices=["cedar"], value="cedar")
102
+ self.new_personality_btn = gr.Button("New personality")
103
+ self.available_tools_cg = gr.CheckboxGroup(label="Available tools (helper)", choices=[], value=[])
104
+ self.save_btn = gr.Button("Save personality (instructions + tools)")
105
+
106
+ def additional_inputs_ordered(self) -> list[Any]:
107
+ """Return the additional inputs in the expected order for Stream."""
108
+ return [
109
+ self.personalities_dropdown,
110
+ self.apply_btn,
111
+ self.new_personality_btn,
112
+ self.status_md,
113
+ self.preview_md,
114
+ self.person_name_tb,
115
+ self.person_instr_ta,
116
+ self.tools_txt_ta,
117
+ self.voice_dropdown,
118
+ self.available_tools_cg,
119
+ self.save_btn,
120
+ ]
121
+
122
+ # ---------- Event wiring ----------
123
+ def wire_events(self, handler: Any, blocks: gr.Blocks) -> None:
124
+ """Attach event handlers to components within a Blocks context."""
125
+
126
+ async def _apply_personality(selected: str) -> tuple[str, str]:
127
+ profile = None if selected == self.DEFAULT_OPTION else selected
128
+ status = await handler.apply_personality(profile)
129
+ preview = self._read_instructions_for(selected)
130
+ return status, preview
131
+
132
+ def _read_voice_for(name: str) -> str:
133
+ try:
134
+ if name == self.DEFAULT_OPTION:
135
+ return "cedar"
136
+ vf = self._resolve_profile_dir(name) / "voice.txt"
137
+ if vf.exists():
138
+ v = vf.read_text(encoding="utf-8").strip()
139
+ return v or "cedar"
140
+ except Exception:
141
+ pass
142
+ return "cedar"
143
+
144
+ async def _fetch_voices(selected: str) -> dict[str, Any]:
145
+ try:
146
+ voices = await handler.get_available_voices()
147
+ current = _read_voice_for(selected)
148
+ if current not in voices:
149
+ current = "cedar"
150
+ return gr.update(choices=voices, value=current)
151
+ except Exception:
152
+ return gr.update(choices=["cedar"], value="cedar")
153
+
154
+ def _available_tools_for(selected: str) -> tuple[list[str], list[str]]:
155
+ shared: list[str] = []
156
+ try:
157
+ for py in self._tools_dir.glob("*.py"):
158
+ if py.stem in {"__init__", "core_tools"}:
159
+ continue
160
+ shared.append(py.stem)
161
+ except Exception:
162
+ pass
163
+ local: list[str] = []
164
+ try:
165
+ if selected != self.DEFAULT_OPTION:
166
+ for py in (self._profiles_root / selected).glob("*.py"):
167
+ local.append(py.stem)
168
+ except Exception:
169
+ pass
170
+ return sorted(shared), sorted(local)
171
+
172
+ def _parse_enabled_tools(text: str) -> list[str]:
173
+ enabled: list[str] = []
174
+ for line in text.splitlines():
175
+ s = line.strip()
176
+ if not s or s.startswith("#"):
177
+ continue
178
+ enabled.append(s)
179
+ return enabled
180
+
181
+ def _load_profile_for_edit(selected: str) -> tuple[dict[str, Any], dict[str, Any], dict[str, Any], str]:
182
+ instr = self._read_instructions_for(selected)
183
+ tools_txt = ""
184
+ if selected != self.DEFAULT_OPTION:
185
+ tp = self._resolve_profile_dir(selected) / "tools.txt"
186
+ if tp.exists():
187
+ tools_txt = tp.read_text(encoding="utf-8")
188
+ shared, local = _available_tools_for(selected)
189
+ all_tools = sorted(set(shared + local))
190
+ enabled = _parse_enabled_tools(tools_txt)
191
+ status_text = f"Loaded profile '{selected}'."
192
+ return (
193
+ gr.update(value=instr),
194
+ gr.update(value=tools_txt),
195
+ gr.update(choices=all_tools, value=enabled),
196
+ status_text,
197
+ )
198
+
199
+ def _new_personality() -> tuple[
200
+ dict[str, Any], dict[str, Any], dict[str, Any], dict[str, Any], str, dict[str, Any]
201
+ ]:
202
+ try:
203
+ # Prefill with hints
204
+ instr_val = """# Write your instructions here\n# e.g., Keep responses concise and friendly."""
205
+ tools_txt_val = "# tools enabled for this profile\n"
206
+ return (
207
+ gr.update(value=""),
208
+ gr.update(value=instr_val),
209
+ gr.update(value=tools_txt_val),
210
+ gr.update(choices=sorted(_available_tools_for(self.DEFAULT_OPTION)[0]), value=[]),
211
+ "Fill in a name, instructions and (optional) tools, then Save.",
212
+ gr.update(value="cedar"),
213
+ )
214
+ except Exception:
215
+ return (
216
+ gr.update(),
217
+ gr.update(),
218
+ gr.update(),
219
+ gr.update(),
220
+ "Failed to initialize new personality.",
221
+ gr.update(),
222
+ )
223
+
224
+ def _save_personality(
225
+ name: str, instructions: str, tools_text: str, voice: str
226
+ ) -> tuple[dict[str, Any], dict[str, Any], str]:
227
+ name_s = self._sanitize_name(name)
228
+ if not name_s:
229
+ return gr.update(), gr.update(), "Please enter a valid name."
230
+ try:
231
+ target_dir = self._profiles_root / "user_personalities" / name_s
232
+ target_dir.mkdir(parents=True, exist_ok=True)
233
+ (target_dir / "instructions.txt").write_text(instructions.strip() + "\n", encoding="utf-8")
234
+ (target_dir / "tools.txt").write_text(tools_text.strip() + "\n", encoding="utf-8")
235
+ (target_dir / "voice.txt").write_text((voice or "cedar").strip() + "\n", encoding="utf-8")
236
+
237
+ choices = self._list_personalities()
238
+ value = f"user_personalities/{name_s}"
239
+ if value not in choices:
240
+ choices.append(value)
241
+ return (
242
+ gr.update(choices=[self.DEFAULT_OPTION, *sorted(choices)], value=value),
243
+ gr.update(value=instructions),
244
+ f"Saved personality '{name_s}'.",
245
+ )
246
+ except Exception as e:
247
+ return gr.update(), gr.update(), f"Failed to save personality: {e}"
248
+
249
+ def _sync_tools_from_checks(selected: list[str], current_text: str) -> dict[str, Any]:
250
+ comments = [ln for ln in current_text.splitlines() if ln.strip().startswith("#")]
251
+ body = "\n".join(selected)
252
+ out = ("\n".join(comments) + ("\n" if comments else "") + body).strip() + "\n"
253
+ return gr.update(value=out)
254
+
255
+ with blocks:
256
+ self.apply_btn.click(
257
+ fn=_apply_personality,
258
+ inputs=[self.personalities_dropdown],
259
+ outputs=[self.status_md, self.preview_md],
260
+ )
261
+
262
+ self.personalities_dropdown.change(
263
+ fn=_load_profile_for_edit,
264
+ inputs=[self.personalities_dropdown],
265
+ outputs=[self.person_instr_ta, self.tools_txt_ta, self.available_tools_cg, self.status_md],
266
+ )
267
+
268
+ blocks.load(
269
+ fn=_fetch_voices,
270
+ inputs=[self.personalities_dropdown],
271
+ outputs=[self.voice_dropdown],
272
+ )
273
+
274
+ self.available_tools_cg.change(
275
+ fn=_sync_tools_from_checks,
276
+ inputs=[self.available_tools_cg, self.tools_txt_ta],
277
+ outputs=[self.tools_txt_ta],
278
+ )
279
+
280
+ self.new_personality_btn.click(
281
+ fn=_new_personality,
282
+ inputs=[],
283
+ outputs=[
284
+ self.person_name_tb,
285
+ self.person_instr_ta,
286
+ self.tools_txt_ta,
287
+ self.available_tools_cg,
288
+ self.status_md,
289
+ self.voice_dropdown,
290
+ ],
291
+ )
292
+
293
+ self.save_btn.click(
294
+ fn=_save_personality,
295
+ inputs=[self.person_name_tb, self.person_instr_ta, self.tools_txt_ta, self.voice_dropdown],
296
+ outputs=[self.personalities_dropdown, self.person_instr_ta, self.status_md],
297
+ ).then(
298
+ fn=_apply_personality,
299
+ inputs=[self.personalities_dropdown],
300
+ outputs=[self.status_md, self.preview_md],
301
+ )
src/reachy_mini_conversation_app/headless_personality.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Headless personality management (console-based).
2
+
3
+ Provides an interactive CLI to browse, preview, apply, create and edit
4
+ "personalities" (profiles) when running without Gradio.
5
+
6
+ This module is intentionally not shared with the Gradio implementation to
7
+ avoid coupling and keep responsibilities clear for headless mode.
8
+ """
9
+
10
+ from __future__ import annotations
11
+ from typing import List
12
+ from pathlib import Path
13
+
14
+
15
+ DEFAULT_OPTION = "(built-in default)"
16
+
17
+
18
+ def _profiles_root() -> Path:
19
+ return Path(__file__).parent / "profiles"
20
+
21
+
22
+ def _prompts_dir() -> Path:
23
+ return Path(__file__).parent / "prompts"
24
+
25
+
26
+ def _tools_dir() -> Path:
27
+ return Path(__file__).parent / "tools"
28
+
29
+
30
+ def _sanitize_name(name: str) -> str:
31
+ import re
32
+
33
+ s = name.strip()
34
+ s = re.sub(r"\s+", "_", s)
35
+ s = re.sub(r"[^a-zA-Z0-9_-]", "", s)
36
+ return s
37
+
38
+
39
+ def list_personalities() -> List[str]:
40
+ """List available personality profile names."""
41
+ names: List[str] = []
42
+ root = _profiles_root()
43
+ try:
44
+ if root.exists():
45
+ for p in sorted(root.iterdir()):
46
+ if p.name == "user_personalities":
47
+ continue
48
+ if p.is_dir() and (p / "instructions.txt").exists():
49
+ names.append(p.name)
50
+ udir = root / "user_personalities"
51
+ if udir.exists():
52
+ for p in sorted(udir.iterdir()):
53
+ if p.is_dir() and (p / "instructions.txt").exists():
54
+ names.append(f"user_personalities/{p.name}")
55
+ except Exception:
56
+ pass
57
+ return names
58
+
59
+
60
+ def resolve_profile_dir(selection: str) -> Path:
61
+ """Resolve the directory path for the given profile selection."""
62
+ return _profiles_root() / selection
63
+
64
+
65
+ def read_instructions_for(name: str) -> str:
66
+ """Read the instructions.txt content for the given profile name."""
67
+ try:
68
+ if name == DEFAULT_OPTION:
69
+ df = _prompts_dir() / "default_prompt.txt"
70
+ return df.read_text(encoding="utf-8").strip() if df.exists() else ""
71
+ target = resolve_profile_dir(name) / "instructions.txt"
72
+ return target.read_text(encoding="utf-8").strip() if target.exists() else ""
73
+ except Exception as e:
74
+ return f"Could not load instructions: {e}"
75
+
76
+
77
+ def available_tools_for(selected: str) -> List[str]:
78
+ """List available tool modules for the given profile selection."""
79
+ shared: List[str] = []
80
+ try:
81
+ for py in _tools_dir().glob("*.py"):
82
+ if py.stem in {"__init__", "core_tools"}:
83
+ continue
84
+ shared.append(py.stem)
85
+ except Exception:
86
+ pass
87
+ local: List[str] = []
88
+ try:
89
+ if selected != DEFAULT_OPTION:
90
+ for py in resolve_profile_dir(selected).glob("*.py"):
91
+ local.append(py.stem)
92
+ except Exception:
93
+ pass
94
+ return sorted(set(shared + local))
95
+
96
+
97
+ def _write_profile(name_s: str, instructions: str, tools_text: str, voice: str = "cedar") -> None:
98
+ target_dir = _profiles_root() / "user_personalities" / name_s
99
+ target_dir.mkdir(parents=True, exist_ok=True)
100
+ (target_dir / "instructions.txt").write_text(instructions.strip() + "\n", encoding="utf-8")
101
+ (target_dir / "tools.txt").write_text((tools_text or "").strip() + "\n", encoding="utf-8")
102
+ (target_dir / "voice.txt").write_text((voice or "cedar").strip() + "\n", encoding="utf-8")
src/reachy_mini_conversation_app/headless_personality_ui.py ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Settings UI routes for headless personality management.
2
+
3
+ Exposes REST endpoints on the provided FastAPI settings app. The
4
+ implementation schedules backend actions (apply personality, fetch voices)
5
+ onto the running LocalStream asyncio loop using the supplied get_loop
6
+ callable to avoid cross-thread issues.
7
+ """
8
+
9
+ from __future__ import annotations
10
+ import asyncio
11
+ import logging
12
+ from typing import Any, Callable, Optional
13
+
14
+ from fastapi import FastAPI
15
+
16
+ from .config import config
17
+ from .openai_realtime import OpenaiRealtimeHandler
18
+ from .headless_personality import (
19
+ DEFAULT_OPTION,
20
+ _sanitize_name,
21
+ _write_profile,
22
+ list_personalities,
23
+ available_tools_for,
24
+ resolve_profile_dir,
25
+ read_instructions_for,
26
+ )
27
+
28
+
29
+ def mount_personality_routes(
30
+ app: FastAPI,
31
+ handler: OpenaiRealtimeHandler,
32
+ get_loop: Callable[[], asyncio.AbstractEventLoop | None],
33
+ *,
34
+ persist_personality: Callable[[Optional[str]], None] | None = None,
35
+ get_persisted_personality: Callable[[], Optional[str]] | None = None,
36
+ ) -> None:
37
+ """Register personality management endpoints on a FastAPI app."""
38
+ try:
39
+ from fastapi import Request
40
+ from pydantic import BaseModel
41
+ from fastapi.responses import JSONResponse
42
+ except Exception: # pragma: no cover - only when settings app not available
43
+ return
44
+
45
+ class SavePayload(BaseModel):
46
+ name: str
47
+ instructions: str
48
+ tools_text: str
49
+ voice: Optional[str] = "cedar"
50
+
51
+ class ApplyPayload(BaseModel):
52
+ name: str
53
+ persist: Optional[bool] = False
54
+
55
+ def _startup_choice() -> Any:
56
+ """Return the persisted startup personality or default."""
57
+ try:
58
+ if get_persisted_personality is not None:
59
+ stored = get_persisted_personality()
60
+ if stored:
61
+ return stored
62
+ env_val = getattr(config, "REACHY_MINI_CUSTOM_PROFILE", None)
63
+ if env_val:
64
+ return env_val
65
+ except Exception:
66
+ pass
67
+ return DEFAULT_OPTION
68
+
69
+ def _current_choice() -> str:
70
+ try:
71
+ cur = getattr(config, "REACHY_MINI_CUSTOM_PROFILE", None)
72
+ return cur or DEFAULT_OPTION
73
+ except Exception:
74
+ return DEFAULT_OPTION
75
+
76
+ @app.get("/personalities")
77
+ def _list() -> dict: # type: ignore
78
+ choices = [DEFAULT_OPTION, *list_personalities()]
79
+ return {"choices": choices, "current": _current_choice(), "startup": _startup_choice()}
80
+
81
+ @app.get("/personalities/load")
82
+ def _load(name: str) -> dict: # type: ignore
83
+ instr = read_instructions_for(name)
84
+ tools_txt = ""
85
+ voice = "cedar"
86
+ if name != DEFAULT_OPTION:
87
+ pdir = resolve_profile_dir(name)
88
+ tp = pdir / "tools.txt"
89
+ if tp.exists():
90
+ tools_txt = tp.read_text(encoding="utf-8")
91
+ vf = pdir / "voice.txt"
92
+ if vf.exists():
93
+ v = vf.read_text(encoding="utf-8").strip()
94
+ voice = v or "cedar"
95
+ avail = available_tools_for(name)
96
+ enabled = [ln.strip() for ln in tools_txt.splitlines() if ln.strip() and not ln.strip().startswith("#")]
97
+ return {
98
+ "instructions": instr,
99
+ "tools_text": tools_txt,
100
+ "voice": voice,
101
+ "available_tools": avail,
102
+ "enabled_tools": enabled,
103
+ }
104
+
105
+ @app.post("/personalities/save")
106
+ async def _save(request: Request) -> dict: # type: ignore
107
+ # Accept raw JSON only to avoid validation-related 422s
108
+ try:
109
+ raw = await request.json()
110
+ except Exception:
111
+ raw = {}
112
+ name = str(raw.get("name", ""))
113
+ instructions = str(raw.get("instructions", ""))
114
+ tools_text = str(raw.get("tools_text", ""))
115
+ voice = str(raw.get("voice", "cedar")) if raw.get("voice") is not None else "cedar"
116
+
117
+ name_s = _sanitize_name(name)
118
+ if not name_s:
119
+ return JSONResponse({"ok": False, "error": "invalid_name"}, status_code=400) # type: ignore
120
+ try:
121
+ logger.info(
122
+ "Headless save: name=%r voice=%r instr_len=%d tools_len=%d",
123
+ name_s,
124
+ voice,
125
+ len(instructions),
126
+ len(tools_text),
127
+ )
128
+ _write_profile(name_s, instructions, tools_text, voice or "cedar")
129
+ value = f"user_personalities/{name_s}"
130
+ choices = [DEFAULT_OPTION, *list_personalities()]
131
+ return {"ok": True, "value": value, "choices": choices}
132
+ except Exception as e:
133
+ return JSONResponse({"ok": False, "error": str(e)}, status_code=500) # type: ignore
134
+
135
+ @app.post("/personalities/save_raw")
136
+ async def _save_raw(
137
+ request: Request,
138
+ name: Optional[str] = None,
139
+ instructions: Optional[str] = None,
140
+ tools_text: Optional[str] = None,
141
+ voice: Optional[str] = None,
142
+ ) -> dict: # type: ignore
143
+ # Accept query params, form-encoded, or raw JSON
144
+ data = {"name": name, "instructions": instructions, "tools_text": tools_text, "voice": voice}
145
+ # Prefer form if present
146
+ try:
147
+ form = await request.form()
148
+ for k in ("name", "instructions", "tools_text", "voice"):
149
+ if k in form and form[k] is not None:
150
+ data[k] = str(form[k])
151
+ except Exception:
152
+ pass
153
+ # Try JSON
154
+ try:
155
+ raw = await request.json()
156
+ if isinstance(raw, dict):
157
+ for k in ("name", "instructions", "tools_text", "voice"):
158
+ if raw.get(k) is not None:
159
+ data[k] = str(raw.get(k))
160
+ except Exception:
161
+ pass
162
+
163
+ name_s = _sanitize_name(str(data.get("name") or ""))
164
+ if not name_s:
165
+ return JSONResponse({"ok": False, "error": "invalid_name"}, status_code=400) # type: ignore
166
+ instr = str(data.get("instructions") or "")
167
+ tools = str(data.get("tools_text") or "")
168
+ v = str(data.get("voice") or "cedar")
169
+ try:
170
+ logger.info(
171
+ "Headless save_raw: name=%r voice=%r instr_len=%d tools_len=%d", name_s, v, len(instr), len(tools)
172
+ )
173
+ _write_profile(name_s, instr, tools, v)
174
+ value = f"user_personalities/{name_s}"
175
+ choices = [DEFAULT_OPTION, *list_personalities()]
176
+ return {"ok": True, "value": value, "choices": choices}
177
+ except Exception as e:
178
+ return JSONResponse({"ok": False, "error": str(e)}, status_code=500) # type: ignore
179
+
180
+ @app.get("/personalities/save_raw")
181
+ async def _save_raw_get(name: str, instructions: str = "", tools_text: str = "", voice: str = "cedar") -> dict: # type: ignore
182
+ name_s = _sanitize_name(name)
183
+ if not name_s:
184
+ return JSONResponse({"ok": False, "error": "invalid_name"}, status_code=400) # type: ignore
185
+ try:
186
+ logger.info(
187
+ "Headless save_raw(GET): name=%r voice=%r instr_len=%d tools_len=%d",
188
+ name_s,
189
+ voice,
190
+ len(instructions),
191
+ len(tools_text),
192
+ )
193
+ _write_profile(name_s, instructions, tools_text, voice or "cedar")
194
+ value = f"user_personalities/{name_s}"
195
+ choices = [DEFAULT_OPTION, *list_personalities()]
196
+ return {"ok": True, "value": value, "choices": choices}
197
+ except Exception as e:
198
+ return JSONResponse({"ok": False, "error": str(e)}, status_code=500) # type: ignore
199
+
200
+ logger = logging.getLogger(__name__)
201
+
202
+ @app.post("/personalities/apply")
203
+ async def _apply(
204
+ payload: ApplyPayload | None = None,
205
+ name: str | None = None,
206
+ persist: Optional[bool] = None,
207
+ request: Optional[Request] = None,
208
+ ) -> dict: # type: ignore
209
+ loop = get_loop()
210
+ if loop is None:
211
+ return JSONResponse({"ok": False, "error": "loop_unavailable"}, status_code=503) # type: ignore
212
+
213
+ # Accept both JSON payload and query param for convenience
214
+ sel_name: Optional[str] = None
215
+ persist_flag = bool(persist) if persist is not None else False
216
+ if payload and getattr(payload, "name", None):
217
+ sel_name = payload.name
218
+ persist_flag = bool(getattr(payload, "persist", False))
219
+ elif name:
220
+ sel_name = name
221
+ elif request is not None:
222
+ try:
223
+ body = await request.json()
224
+ if isinstance(body, dict) and body.get("name"):
225
+ sel_name = str(body.get("name"))
226
+ if isinstance(body, dict) and "persist" in body:
227
+ persist_flag = bool(body.get("persist"))
228
+ except Exception:
229
+ sel_name = None
230
+ if request is not None:
231
+ try:
232
+ q_persist = request.query_params.get("persist")
233
+ if q_persist is not None:
234
+ persist_flag = str(q_persist).lower() in {"1", "true", "yes", "on"}
235
+ except Exception:
236
+ pass
237
+ if not sel_name:
238
+ sel_name = DEFAULT_OPTION
239
+
240
+ async def _do_apply() -> str:
241
+ sel = None if sel_name == DEFAULT_OPTION else sel_name
242
+ status = await handler.apply_personality(sel)
243
+ return status
244
+
245
+ try:
246
+ logger.info("Headless apply: requested name=%r", sel_name)
247
+ fut = asyncio.run_coroutine_threadsafe(_do_apply(), loop)
248
+ status = fut.result(timeout=10)
249
+ persisted_choice = _startup_choice()
250
+ if persist_flag and persist_personality is not None:
251
+ try:
252
+ persist_personality(None if sel_name == DEFAULT_OPTION else sel_name)
253
+ persisted_choice = _startup_choice()
254
+ except Exception as e:
255
+ logger.warning("Failed to persist startup personality: %s", e)
256
+ return {"ok": True, "status": status, "startup": persisted_choice}
257
+ except Exception as e:
258
+ return JSONResponse({"ok": False, "error": str(e)}, status_code=500) # type: ignore
259
+
260
+ @app.get("/voices")
261
+ async def _voices() -> list[str]:
262
+ loop = get_loop()
263
+ if loop is None:
264
+ return ["cedar"]
265
+
266
+ async def _get_v() -> list[str]:
267
+ try:
268
+ return await handler.get_available_voices()
269
+ except Exception:
270
+ return ["cedar"]
271
+
272
+ try:
273
+ fut = asyncio.run_coroutine_threadsafe(_get_v(), loop)
274
+ return fut.result(timeout=10)
275
+ except Exception:
276
+ return ["cedar"]
src/reachy_mini_conversation_app/images/reachymini_avatar.png ADDED

Git LFS Details

  • SHA256: 5a63ac8802ff3542f01292c431c5278296880d74cd3580d219fcf4827bc235f9
  • Pointer size: 132 Bytes
  • Size of remote file: 1.23 MB
src/reachy_mini_conversation_app/images/user_avatar.png ADDED

Git LFS Details

  • SHA256: e97ca125a86bacdaa41c8dca88abd9ca746fd5c9391eda24249c012432b0219b
  • Pointer size: 132 Bytes
  • Size of remote file: 1.11 MB
src/reachy_mini_conversation_app/main.py ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Entrypoint for the Reachy Mini conversation app."""
2
+
3
+ import os
4
+ import sys
5
+ import time
6
+ import asyncio
7
+ import argparse
8
+ import threading
9
+ from typing import Any, Dict, List, Optional
10
+
11
+ import gradio as gr
12
+ from fastapi import FastAPI
13
+ from fastrtc import Stream
14
+ from gradio.utils import get_space
15
+
16
+ from reachy_mini import ReachyMini, ReachyMiniApp
17
+ from reachy_mini_conversation_app.utils import (
18
+ parse_args,
19
+ setup_logger,
20
+ handle_vision_stuff,
21
+ )
22
+
23
+
24
+ def update_chatbot(chatbot: List[Dict[str, Any]], response: Dict[str, Any]) -> List[Dict[str, Any]]:
25
+ """Update the chatbot with AdditionalOutputs."""
26
+ chatbot.append(response)
27
+ return chatbot
28
+
29
+
30
+ def main() -> None:
31
+ """Entrypoint for the Reachy Mini conversation app."""
32
+ args, _ = parse_args()
33
+ run(args)
34
+
35
+
36
+ def run(
37
+ args: argparse.Namespace,
38
+ robot: ReachyMini = None,
39
+ app_stop_event: Optional[threading.Event] = None,
40
+ settings_app: Optional[FastAPI] = None,
41
+ instance_path: Optional[str] = None,
42
+ ) -> None:
43
+ """Run the Reachy Mini conversation app."""
44
+ # Putting these dependencies here makes the dashboard faster to load when the conversation app is installed
45
+ from reachy_mini_conversation_app.moves import MovementManager
46
+ from reachy_mini_conversation_app.console import LocalStream
47
+ from reachy_mini_conversation_app.openai_realtime import OpenaiRealtimeHandler
48
+ from reachy_mini_conversation_app.tools.core_tools import ToolDependencies
49
+ from reachy_mini_conversation_app.audio.head_wobbler import HeadWobbler
50
+
51
+ logger = setup_logger(args.debug)
52
+ logger.info("Starting Reachy Mini Conversation App")
53
+
54
+ if args.no_camera and args.head_tracker is not None:
55
+ logger.warning("Head tracking is not activated due to --no-camera.")
56
+
57
+ if robot is None:
58
+ # Initialize robot with appropriate backend
59
+ # TODO: Implement dynamic robot connection detection
60
+ # Automatically detect and connect to available Reachy Mini robot(s!)
61
+ # Priority checks (in order):
62
+ # 1. Reachy Lite connected directly to the host
63
+ # 2. Reachy Mini daemon running on localhost (same device)
64
+ # 3. Reachy Mini daemon on local network (same subnet)
65
+
66
+ if args.remote:
67
+ logger.info("Connecting to remote Reachy Mini on the network")
68
+ robot = ReachyMini(media_backend="default_no_video", localhost_only=False)
69
+ elif args.wireless_version and not args.on_device:
70
+ logger.info("Using WebRTC backend for fully remote wireless version")
71
+ robot = ReachyMini(media_backend="webrtc", localhost_only=False)
72
+ elif args.wireless_version and args.on_device:
73
+ logger.info("Using GStreamer backend for on-device wireless version")
74
+ robot = ReachyMini(media_backend="gstreamer")
75
+ elif args.no_camera:
76
+ logger.info("Using audio-only backend (no camera)")
77
+ robot = ReachyMini(media_backend="default_no_video")
78
+ else:
79
+ logger.info("Using default backend for lite version")
80
+ robot = ReachyMini(media_backend="default")
81
+
82
+ # Check if running in simulation mode without --gradio
83
+ if robot.client.get_status()["simulation_enabled"] and not args.gradio:
84
+ logger.error(
85
+ "Simulation mode requires Gradio interface. Please use --gradio flag when running in simulation mode.",
86
+ )
87
+ robot.client.disconnect()
88
+ sys.exit(1)
89
+
90
+ camera_worker, _, vision_manager = handle_vision_stuff(args, robot)
91
+
92
+ movement_manager = MovementManager(
93
+ current_robot=robot,
94
+ camera_worker=camera_worker,
95
+ )
96
+
97
+ head_wobbler = HeadWobbler(set_speech_offsets=movement_manager.set_speech_offsets)
98
+
99
+ deps = ToolDependencies(
100
+ reachy_mini=robot,
101
+ movement_manager=movement_manager,
102
+ camera_worker=camera_worker,
103
+ vision_manager=vision_manager,
104
+ head_wobbler=head_wobbler,
105
+ )
106
+ current_file_path = os.path.dirname(os.path.abspath(__file__))
107
+ logger.debug(f"Current file absolute path: {current_file_path}")
108
+ chatbot = gr.Chatbot(
109
+ type="messages",
110
+ resizable=True,
111
+ avatar_images=(
112
+ os.path.join(current_file_path, "images", "user_avatar.png"),
113
+ os.path.join(current_file_path, "images", "reachymini_avatar.png"),
114
+ ),
115
+ )
116
+ logger.debug(f"Chatbot avatar images: {chatbot.avatar_images}")
117
+
118
+ handler = OpenaiRealtimeHandler(deps, gradio_mode=args.gradio, instance_path=instance_path)
119
+
120
+ stream_manager: gr.Blocks | LocalStream | None = None
121
+
122
+ if args.gradio:
123
+ api_key_textbox = gr.Textbox(
124
+ label="OPENAI API Key",
125
+ type="password",
126
+ value=os.getenv("OPENAI_API_KEY") if not get_space() else "",
127
+ )
128
+
129
+ from reachy_mini_conversation_app.gradio_personality import PersonalityUI
130
+
131
+ personality_ui = PersonalityUI()
132
+ personality_ui.create_components()
133
+
134
+ stream = Stream(
135
+ handler=handler,
136
+ mode="send-receive",
137
+ modality="audio",
138
+ additional_inputs=[
139
+ chatbot,
140
+ api_key_textbox,
141
+ *personality_ui.additional_inputs_ordered(),
142
+ ],
143
+ additional_outputs=[chatbot],
144
+ additional_outputs_handler=update_chatbot,
145
+ ui_args={"title": "Talk with Reachy Mini"},
146
+ )
147
+ stream_manager = stream.ui
148
+ if not settings_app:
149
+ app = FastAPI()
150
+ else:
151
+ app = settings_app
152
+
153
+ personality_ui.wire_events(handler, stream_manager)
154
+
155
+ app = gr.mount_gradio_app(app, stream.ui, path="/")
156
+ else:
157
+ # In headless mode, wire settings_app + instance_path to console LocalStream
158
+ stream_manager = LocalStream(
159
+ handler,
160
+ robot,
161
+ settings_app=settings_app,
162
+ instance_path=instance_path,
163
+ )
164
+
165
+ # Each async service → its own thread/loop
166
+ movement_manager.start()
167
+ head_wobbler.start()
168
+ if camera_worker:
169
+ camera_worker.start()
170
+ if vision_manager:
171
+ vision_manager.start()
172
+
173
+ def poll_stop_event() -> None:
174
+ """Poll the stop event to allow graceful shutdown."""
175
+ if app_stop_event is not None:
176
+ app_stop_event.wait()
177
+
178
+ logger.info("App stop event detected, shutting down...")
179
+ try:
180
+ stream_manager.close()
181
+ except Exception as e:
182
+ logger.error(f"Error while closing stream manager: {e}")
183
+
184
+ if app_stop_event:
185
+ threading.Thread(target=poll_stop_event, daemon=True).start()
186
+
187
+ try:
188
+ stream_manager.launch()
189
+ except KeyboardInterrupt:
190
+ logger.info("Keyboard interruption in main thread... closing server.")
191
+ finally:
192
+ movement_manager.stop()
193
+ head_wobbler.stop()
194
+ if camera_worker:
195
+ camera_worker.stop()
196
+ if vision_manager:
197
+ vision_manager.stop()
198
+
199
+ # Ensure media is explicitly closed before disconnecting
200
+ try:
201
+ robot.media.close()
202
+ except Exception as e:
203
+ logger.debug(f"Error closing media during shutdown: {e}")
204
+
205
+ # prevent connection to keep alive some threads
206
+ robot.client.disconnect()
207
+ time.sleep(1)
208
+ logger.info("Shutdown complete.")
209
+
210
+
211
+ class ReachyMiniConversationApp(ReachyMiniApp): # type: ignore[misc]
212
+ """Reachy Mini Apps entry point for the conversation app."""
213
+
214
+ custom_app_url = "http://0.0.0.0:7860/"
215
+ dont_start_webserver = False
216
+
217
+ def run(self, reachy_mini: ReachyMini, stop_event: threading.Event) -> None:
218
+ """Run the Reachy Mini conversation app."""
219
+ loop = asyncio.new_event_loop()
220
+ asyncio.set_event_loop(loop)
221
+
222
+ args, _ = parse_args()
223
+
224
+ # is_wireless = reachy_mini.client.get_status()["wireless_version"]
225
+ # args.head_tracker = None if is_wireless else "mediapipe"
226
+
227
+ instance_path = self._get_instance_path().parent
228
+ run(
229
+ args,
230
+ robot=reachy_mini,
231
+ app_stop_event=stop_event,
232
+ settings_app=self.settings_app,
233
+ instance_path=instance_path,
234
+ )
235
+
236
+
237
+ if __name__ == "__main__":
238
+ app = ReachyMiniConversationApp()
239
+ try:
240
+ app.wrapped_run()
241
+ except KeyboardInterrupt:
242
+ app.stop()
src/reachy_mini_conversation_app/mcp/__init__.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ """MCP (Model Context Protocol) integration for Reachy Mini Conversation App."""
2
+
3
+ from reachy_mini_conversation_app.mcp.client import MCPClient
4
+ from reachy_mini_conversation_app.mcp.figma import FigmaMCPTools
5
+
6
+
7
+ __all__ = ["MCPClient", "FigmaMCPTools"]
8
+
src/reachy_mini_conversation_app/mcp/client.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """MCP Client for connecting to Model Context Protocol servers."""
2
+
3
+ import json
4
+ import logging
5
+ import asyncio
6
+ from typing import Any, Dict, List, Optional
7
+ from dataclasses import dataclass, field
8
+
9
+ import httpx
10
+
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ @dataclass
16
+ class MCPTool:
17
+ """Represents a tool exposed by an MCP server."""
18
+
19
+ name: str
20
+ description: str
21
+ input_schema: Dict[str, Any]
22
+ server_name: str
23
+
24
+
25
+ @dataclass
26
+ class MCPClient:
27
+ """Client for communicating with MCP servers via SSE (Server-Sent Events).
28
+
29
+ Supports both remote MCP servers (like Figma's https://mcp.figma.com)
30
+ and local MCP servers running on localhost.
31
+ """
32
+
33
+ base_url: str
34
+ auth_token: Optional[str] = None
35
+ timeout: float = 30.0
36
+ _tools: List[MCPTool] = field(default_factory=list)
37
+ _session_id: Optional[str] = None
38
+ _http_client: Optional[httpx.AsyncClient] = None
39
+
40
+ async def connect(self) -> bool:
41
+ """Establish connection and discover available tools."""
42
+ try:
43
+ headers = self._get_headers()
44
+ self._http_client = httpx.AsyncClient(
45
+ base_url=self.base_url,
46
+ headers=headers,
47
+ timeout=self.timeout,
48
+ )
49
+
50
+ # Initialize session
51
+ await self._initialize_session()
52
+
53
+ # Discover tools
54
+ await self._discover_tools()
55
+
56
+ logger.info(f"Connected to MCP server at {self.base_url}, found {len(self._tools)} tools")
57
+ return True
58
+
59
+ except Exception as e:
60
+ logger.error(f"Failed to connect to MCP server: {e}")
61
+ return False
62
+
63
+ async def disconnect(self) -> None:
64
+ """Close the connection to the MCP server."""
65
+ if self._http_client:
66
+ await self._http_client.aclose()
67
+ self._http_client = None
68
+ self._session_id = None
69
+ self._tools = []
70
+
71
+ def _get_headers(self) -> Dict[str, str]:
72
+ """Build request headers including auth if configured."""
73
+ headers = {
74
+ "Content-Type": "application/json",
75
+ "Accept": "application/json, text/event-stream",
76
+ }
77
+ if self.auth_token:
78
+ headers["Authorization"] = f"Bearer {self.auth_token}"
79
+ return headers
80
+
81
+ async def _initialize_session(self) -> None:
82
+ """Initialize the MCP session with the server."""
83
+ if not self._http_client:
84
+ raise RuntimeError("HTTP client not initialized")
85
+
86
+ # Send initialize request
87
+ init_request = {
88
+ "jsonrpc": "2.0",
89
+ "id": 1,
90
+ "method": "initialize",
91
+ "params": {
92
+ "protocolVersion": "2024-11-05",
93
+ "capabilities": {
94
+ "tools": {},
95
+ },
96
+ "clientInfo": {
97
+ "name": "reachy-mini-conversation-app",
98
+ "version": "0.1.0",
99
+ },
100
+ },
101
+ }
102
+
103
+ response = await self._http_client.post("/", json=init_request)
104
+ response.raise_for_status()
105
+
106
+ result = response.json()
107
+ if "result" in result:
108
+ logger.debug(f"MCP session initialized: {result['result']}")
109
+
110
+ async def _discover_tools(self) -> None:
111
+ """Discover available tools from the MCP server."""
112
+ if not self._http_client:
113
+ raise RuntimeError("HTTP client not initialized")
114
+
115
+ # Request tools list
116
+ tools_request = {
117
+ "jsonrpc": "2.0",
118
+ "id": 2,
119
+ "method": "tools/list",
120
+ "params": {},
121
+ }
122
+
123
+ response = await self._http_client.post("/", json=tools_request)
124
+ response.raise_for_status()
125
+
126
+ result = response.json()
127
+ if "result" in result and "tools" in result["result"]:
128
+ for tool_data in result["result"]["tools"]:
129
+ tool = MCPTool(
130
+ name=tool_data.get("name", "unknown"),
131
+ description=tool_data.get("description", ""),
132
+ input_schema=tool_data.get("inputSchema", {}),
133
+ server_name=self.base_url,
134
+ )
135
+ self._tools.append(tool)
136
+ logger.debug(f"Discovered MCP tool: {tool.name}")
137
+
138
+ async def call_tool(self, tool_name: str, arguments: Dict[str, Any]) -> Any:
139
+ """Execute a tool on the MCP server.
140
+
141
+ Args:
142
+ tool_name: Name of the tool to call
143
+ arguments: Arguments to pass to the tool
144
+
145
+ Returns:
146
+ The result from the tool execution
147
+ """
148
+ if not self._http_client:
149
+ raise RuntimeError("Not connected to MCP server")
150
+
151
+ call_request = {
152
+ "jsonrpc": "2.0",
153
+ "id": 3,
154
+ "method": "tools/call",
155
+ "params": {
156
+ "name": tool_name,
157
+ "arguments": arguments,
158
+ },
159
+ }
160
+
161
+ logger.info(f"Calling MCP tool: {tool_name} with args: {arguments}")
162
+
163
+ response = await self._http_client.post("/", json=call_request)
164
+ response.raise_for_status()
165
+
166
+ result = response.json()
167
+
168
+ if "error" in result:
169
+ error = result["error"]
170
+ raise RuntimeError(f"MCP tool error: {error.get('message', 'Unknown error')}")
171
+
172
+ if "result" in result:
173
+ content = result["result"].get("content", [])
174
+ # Extract text content from response
175
+ text_parts = []
176
+ for item in content:
177
+ if item.get("type") == "text":
178
+ text_parts.append(item.get("text", ""))
179
+ return "\n".join(text_parts) if text_parts else json.dumps(result["result"])
180
+
181
+ return json.dumps(result)
182
+
183
+ def get_tools(self) -> List[MCPTool]:
184
+ """Get list of available tools."""
185
+ return self._tools.copy()
186
+
187
+ def get_tool(self, name: str) -> Optional[MCPTool]:
188
+ """Get a specific tool by name."""
189
+ for tool in self._tools:
190
+ if tool.name == name:
191
+ return tool
192
+ return None
193
+
src/reachy_mini_conversation_app/mcp/cursor_bridge.py ADDED
@@ -0,0 +1,344 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """MCP Server Bridge for Reachy-Cursor communication.
2
+
3
+ This module provides a local MCP server that acts as a bridge between
4
+ Reachy and Cursor. Reachy can send coding requests to this server,
5
+ and Cursor (configured to connect to this MCP server) can receive them.
6
+
7
+ ## How it works:
8
+ 1. This server runs locally (e.g., http://localhost:8765)
9
+ 2. Reachy calls the `cursor_bridge` tool to send coding requests
10
+ 3. Cursor connects to this MCP server and receives the requests as resources
11
+ 4. Cursor's AI can then execute the coding tasks
12
+
13
+ ## Setup in Cursor:
14
+ Add to your Cursor MCP settings:
15
+ {
16
+ "mcpServers": {
17
+ "reachy-bridge": {
18
+ "url": "http://localhost:8765"
19
+ }
20
+ }
21
+ }
22
+ """
23
+
24
+ import json
25
+ import asyncio
26
+ import logging
27
+ from datetime import datetime
28
+ from typing import Any, Dict, List, Optional
29
+ from dataclasses import dataclass, field
30
+ from aiohttp import web
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+
35
+ @dataclass
36
+ class CodingTask:
37
+ """Represents a coding task sent from Reachy."""
38
+ id: str
39
+ request: str
40
+ context: Optional[str] = None
41
+ priority: str = "normal"
42
+ created_at: str = field(default_factory=lambda: datetime.now().isoformat())
43
+ status: str = "pending" # pending, in_progress, completed
44
+
45
+
46
+ class ReachyCursorBridge:
47
+ """MCP Server bridge between Reachy and Cursor.
48
+
49
+ Exposes coding tasks as MCP resources that Cursor can read and process.
50
+ """
51
+
52
+ def __init__(self, host: str = "127.0.0.1", port: int = 8765):
53
+ self.host = host
54
+ self.port = port
55
+ self.tasks: Dict[str, CodingTask] = {}
56
+ self.task_counter = 0
57
+ self._app: Optional[web.Application] = None
58
+ self._runner: Optional[web.AppRunner] = None
59
+
60
+ async def start(self) -> None:
61
+ """Start the MCP bridge server."""
62
+ self._app = web.Application()
63
+ self._app.router.add_post("/", self._handle_jsonrpc)
64
+ self._app.router.add_get("/health", self._handle_health)
65
+
66
+ self._runner = web.AppRunner(self._app)
67
+ await self._runner.setup()
68
+
69
+ site = web.TCPSite(self._runner, self.host, self.port)
70
+ await site.start()
71
+
72
+ logger.info(f"Reachy-Cursor MCP Bridge running at http://{self.host}:{self.port}")
73
+
74
+ async def stop(self) -> None:
75
+ """Stop the MCP bridge server."""
76
+ if self._runner:
77
+ await self._runner.cleanup()
78
+
79
+ async def add_task(self, request: str, context: Optional[str] = None,
80
+ priority: str = "normal") -> CodingTask:
81
+ """Add a new coding task from Reachy."""
82
+ self.task_counter += 1
83
+ task_id = f"task_{self.task_counter}_{datetime.now().strftime('%H%M%S')}"
84
+
85
+ task = CodingTask(
86
+ id=task_id,
87
+ request=request,
88
+ context=context,
89
+ priority=priority,
90
+ )
91
+ self.tasks[task_id] = task
92
+
93
+ logger.info(f"New coding task added: {task_id}")
94
+ return task
95
+
96
+ def get_pending_tasks(self) -> List[CodingTask]:
97
+ """Get all pending tasks."""
98
+ return [t for t in self.tasks.values() if t.status == "pending"]
99
+
100
+ def mark_task_complete(self, task_id: str) -> bool:
101
+ """Mark a task as completed."""
102
+ if task_id in self.tasks:
103
+ self.tasks[task_id].status = "completed"
104
+ return True
105
+ return False
106
+
107
+ async def _handle_health(self, request: web.Request) -> web.Response:
108
+ """Health check endpoint."""
109
+ return web.json_response({"status": "ok", "service": "reachy-cursor-bridge"})
110
+
111
+ async def _handle_jsonrpc(self, request: web.Request) -> web.Response:
112
+ """Handle JSON-RPC requests from Cursor."""
113
+ try:
114
+ data = await request.json()
115
+ except json.JSONDecodeError:
116
+ return web.json_response(
117
+ {"jsonrpc": "2.0", "error": {"code": -32700, "message": "Parse error"}, "id": None}
118
+ )
119
+
120
+ method = data.get("method", "")
121
+ params = data.get("params", {})
122
+ req_id = data.get("id")
123
+
124
+ logger.debug(f"MCP request: {method}")
125
+
126
+ if method == "initialize":
127
+ return web.json_response({
128
+ "jsonrpc": "2.0",
129
+ "id": req_id,
130
+ "result": {
131
+ "protocolVersion": "2024-11-05",
132
+ "capabilities": {
133
+ "resources": {"subscribe": True},
134
+ "tools": {},
135
+ },
136
+ "serverInfo": {
137
+ "name": "reachy-cursor-bridge",
138
+ "version": "1.0.0",
139
+ },
140
+ },
141
+ })
142
+
143
+ elif method == "resources/list":
144
+ # Expose pending tasks as resources
145
+ resources = []
146
+ for task in self.get_pending_tasks():
147
+ resources.append({
148
+ "uri": f"reachy://task/{task.id}",
149
+ "name": f"Coding Task: {task.request[:50]}...",
150
+ "description": task.request,
151
+ "mimeType": "application/json",
152
+ })
153
+
154
+ # Also expose a "latest" resource
155
+ if self.tasks:
156
+ latest = max(self.tasks.values(), key=lambda t: t.created_at)
157
+ resources.insert(0, {
158
+ "uri": "reachy://task/latest",
159
+ "name": "Latest Coding Request from Reachy",
160
+ "description": latest.request,
161
+ "mimeType": "application/json",
162
+ })
163
+
164
+ return web.json_response({
165
+ "jsonrpc": "2.0",
166
+ "id": req_id,
167
+ "result": {"resources": resources},
168
+ })
169
+
170
+ elif method == "resources/read":
171
+ uri = params.get("uri", "")
172
+
173
+ if uri == "reachy://task/latest" and self.tasks:
174
+ latest = max(self.tasks.values(), key=lambda t: t.created_at)
175
+ content = self._format_task_content(latest)
176
+ elif uri.startswith("reachy://task/"):
177
+ task_id = uri.replace("reachy://task/", "")
178
+ task = self.tasks.get(task_id)
179
+ if task:
180
+ content = self._format_task_content(task)
181
+ else:
182
+ content = {"error": f"Task {task_id} not found"}
183
+ else:
184
+ content = {"error": "Unknown resource"}
185
+
186
+ return web.json_response({
187
+ "jsonrpc": "2.0",
188
+ "id": req_id,
189
+ "result": {
190
+ "contents": [{
191
+ "uri": uri,
192
+ "mimeType": "application/json",
193
+ "text": json.dumps(content, indent=2),
194
+ }],
195
+ },
196
+ })
197
+
198
+ elif method == "tools/list":
199
+ # Expose a tool for Cursor to mark tasks complete
200
+ return web.json_response({
201
+ "jsonrpc": "2.0",
202
+ "id": req_id,
203
+ "result": {
204
+ "tools": [
205
+ {
206
+ "name": "mark_task_complete",
207
+ "description": "Mark a Reachy coding task as completed",
208
+ "inputSchema": {
209
+ "type": "object",
210
+ "properties": {
211
+ "task_id": {
212
+ "type": "string",
213
+ "description": "The ID of the task to mark complete",
214
+ },
215
+ },
216
+ "required": ["task_id"],
217
+ },
218
+ },
219
+ {
220
+ "name": "get_current_request",
221
+ "description": "Get the current/latest coding request from Reachy",
222
+ "inputSchema": {
223
+ "type": "object",
224
+ "properties": {},
225
+ },
226
+ },
227
+ ],
228
+ },
229
+ })
230
+
231
+ elif method == "tools/call":
232
+ tool_name = params.get("name", "")
233
+ arguments = params.get("arguments", {})
234
+
235
+ if tool_name == "mark_task_complete":
236
+ task_id = arguments.get("task_id", "")
237
+ success = self.mark_task_complete(task_id)
238
+ result_text = f"Task {task_id} marked complete" if success else f"Task {task_id} not found"
239
+ elif tool_name == "get_current_request":
240
+ if self.tasks:
241
+ latest = max(self.tasks.values(), key=lambda t: t.created_at)
242
+ result_text = json.dumps(self._format_task_content(latest), indent=2)
243
+ else:
244
+ result_text = "No pending coding requests from Reachy"
245
+ else:
246
+ result_text = f"Unknown tool: {tool_name}"
247
+
248
+ return web.json_response({
249
+ "jsonrpc": "2.0",
250
+ "id": req_id,
251
+ "result": {
252
+ "content": [{"type": "text", "text": result_text}],
253
+ },
254
+ })
255
+
256
+ else:
257
+ return web.json_response({
258
+ "jsonrpc": "2.0",
259
+ "id": req_id,
260
+ "error": {"code": -32601, "message": f"Method not found: {method}"},
261
+ })
262
+
263
+ def _format_task_content(self, task: CodingTask) -> Dict[str, Any]:
264
+ """Format a task for Cursor consumption."""
265
+ return {
266
+ "type": "coding_request",
267
+ "from": "reachy",
268
+ "task_id": task.id,
269
+ "request": task.request,
270
+ "context": task.context,
271
+ "priority": task.priority,
272
+ "created_at": task.created_at,
273
+ "status": task.status,
274
+ "instructions": (
275
+ "This is a voice-activated coding request from Reachy robot. "
276
+ "Please implement the request described above. "
277
+ "When complete, you can mark the task as done using mark_task_complete."
278
+ ),
279
+ }
280
+
281
+
282
+ # Global bridge instance (for tool access)
283
+ _bridge_instance: Optional[ReachyCursorBridge] = None
284
+
285
+
286
+ async def get_or_create_bridge(host: str = "127.0.0.1", port: int = 8765) -> ReachyCursorBridge:
287
+ """Get or create the global bridge instance."""
288
+ global _bridge_instance
289
+
290
+ if _bridge_instance is None:
291
+ _bridge_instance = ReachyCursorBridge(host=host, port=port)
292
+ await _bridge_instance.start()
293
+
294
+ return _bridge_instance
295
+
296
+
297
+ async def send_to_cursor_via_bridge(request: str, context: Optional[str] = None) -> Dict[str, Any]:
298
+ """Send a coding request to Cursor via the MCP bridge."""
299
+ bridge = await get_or_create_bridge()
300
+ task = await bridge.add_task(request=request, context=context)
301
+
302
+ return {
303
+ "status": "queued",
304
+ "task_id": task.id,
305
+ "message": f"Coding request sent to bridge. Cursor can access it at reachy://task/{task.id}",
306
+ "bridge_url": f"http://{bridge.host}:{bridge.port}",
307
+ }
308
+
309
+
310
+ if __name__ == "__main__":
311
+ # Run as standalone server for testing
312
+ import sys
313
+
314
+ logging.basicConfig(level=logging.INFO)
315
+
316
+ async def main():
317
+ bridge = ReachyCursorBridge()
318
+ await bridge.start()
319
+
320
+ # Add a test task
321
+ await bridge.add_task(
322
+ request="Create a beautiful landing page with React and Tailwind CSS",
323
+ context="Modern dark theme, hero section, features, pricing",
324
+ )
325
+
326
+ print(f"\nMCP Bridge running at http://{bridge.host}:{bridge.port}")
327
+ print("\nTo connect from Cursor, add to your MCP settings:")
328
+ print(json.dumps({
329
+ "mcpServers": {
330
+ "reachy-bridge": {
331
+ "url": f"http://{bridge.host}:{bridge.port}"
332
+ }
333
+ }
334
+ }, indent=2))
335
+ print("\nPress Ctrl+C to stop...")
336
+
337
+ try:
338
+ while True:
339
+ await asyncio.sleep(1)
340
+ except KeyboardInterrupt:
341
+ await bridge.stop()
342
+
343
+ asyncio.run(main())
344
+
src/reachy_mini_conversation_app/mcp/figma.py ADDED
@@ -0,0 +1,288 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Figma MCP integration for design operations."""
2
+
3
+ import os
4
+ import json
5
+ import logging
6
+ import asyncio
7
+ from typing import Any, Dict, List, Optional
8
+
9
+ from reachy_mini_conversation_app.mcp.client import MCPClient, MCPTool
10
+ from reachy_mini_conversation_app.tools.core_tools import Tool, ToolDependencies
11
+
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ # Figma MCP server endpoint
16
+ FIGMA_MCP_URL = "https://mcp.figma.com/sse"
17
+
18
+
19
+ class FigmaMCPTools:
20
+ """Manager for Figma MCP tools integration.
21
+
22
+ This class connects to Figma's MCP server and creates Tool instances
23
+ that can be used by the conversation app.
24
+ """
25
+
26
+ def __init__(self, access_token: Optional[str] = None):
27
+ """Initialize Figma MCP tools.
28
+
29
+ Args:
30
+ access_token: Figma personal access token. If not provided,
31
+ will try to get from FIGMA_ACCESS_TOKEN env var.
32
+ """
33
+ self.access_token = access_token or os.getenv("FIGMA_ACCESS_TOKEN")
34
+ self.client: Optional[MCPClient] = None
35
+ self._tools: List[Tool] = []
36
+ self._connected = False
37
+
38
+ async def connect(self) -> bool:
39
+ """Connect to Figma MCP server and discover tools."""
40
+ if not self.access_token:
41
+ logger.warning("No Figma access token provided. Set FIGMA_ACCESS_TOKEN env var.")
42
+ return False
43
+
44
+ try:
45
+ self.client = MCPClient(
46
+ base_url=FIGMA_MCP_URL,
47
+ auth_token=self.access_token,
48
+ )
49
+
50
+ success = await self.client.connect()
51
+ if success:
52
+ self._create_tool_wrappers()
53
+ self._connected = True
54
+ logger.info(f"Figma MCP connected with {len(self._tools)} tools")
55
+ return success
56
+
57
+ except Exception as e:
58
+ logger.error(f"Failed to connect to Figma MCP: {e}")
59
+ return False
60
+
61
+ async def disconnect(self) -> None:
62
+ """Disconnect from Figma MCP server."""
63
+ if self.client:
64
+ await self.client.disconnect()
65
+ self._connected = False
66
+ self._tools = []
67
+
68
+ def _create_tool_wrappers(self) -> None:
69
+ """Create Tool wrapper instances for each MCP tool."""
70
+ if not self.client:
71
+ return
72
+
73
+ for mcp_tool in self.client.get_tools():
74
+ wrapper = self._create_tool_wrapper(mcp_tool)
75
+ self._tools.append(wrapper)
76
+
77
+ def _create_tool_wrapper(self, mcp_tool: MCPTool) -> Tool:
78
+ """Create a Tool instance that wraps an MCP tool.
79
+
80
+ Args:
81
+ mcp_tool: The MCP tool definition
82
+
83
+ Returns:
84
+ A Tool instance that calls the MCP tool
85
+ """
86
+ client = self.client
87
+
88
+ class MCPToolWrapper(Tool):
89
+ """Wrapper that bridges MCP tools to the conversation app's Tool interface."""
90
+
91
+ name = f"figma_{mcp_tool.name}"
92
+ description = f"[Figma] {mcp_tool.description}"
93
+ parameters_schema = mcp_tool.input_schema
94
+
95
+ async def run(self, deps: ToolDependencies, **kwargs: Any) -> str:
96
+ """Execute the MCP tool."""
97
+ if not client:
98
+ return "Error: Figma MCP not connected"
99
+
100
+ try:
101
+ result = await client.call_tool(mcp_tool.name, kwargs)
102
+ return str(result)
103
+ except Exception as e:
104
+ logger.error(f"Figma MCP tool error: {e}")
105
+ return f"Error calling Figma: {str(e)}"
106
+
107
+ return MCPToolWrapper()
108
+
109
+ def get_tools(self) -> List[Tool]:
110
+ """Get all Figma tools as Tool instances."""
111
+ return self._tools.copy()
112
+
113
+ def is_connected(self) -> bool:
114
+ """Check if connected to Figma MCP."""
115
+ return self._connected
116
+
117
+
118
+ # Convenience function to create common Figma design tools
119
+ def create_figma_design_tool() -> Tool:
120
+ """Create a high-level Figma design tool for the conversation app.
121
+
122
+ This tool provides a simplified interface for common design tasks.
123
+ """
124
+
125
+ class FigmaDesignTool(Tool):
126
+ """High-level tool for Figma design operations."""
127
+
128
+ name = "design_with_figma"
129
+ description = (
130
+ "Design UI elements, websites, or apps using Figma. "
131
+ "Can create new designs, modify existing files, or get design information. "
132
+ "Use this when asked to design something visual."
133
+ )
134
+ parameters_schema = {
135
+ "type": "object",
136
+ "properties": {
137
+ "action": {
138
+ "type": "string",
139
+ "enum": ["create", "get_info", "modify", "export"],
140
+ "description": "The design action to perform",
141
+ },
142
+ "description": {
143
+ "type": "string",
144
+ "description": "Description of what to design or modify",
145
+ },
146
+ "file_key": {
147
+ "type": "string",
148
+ "description": "Figma file key (optional, for existing files)",
149
+ },
150
+ "node_id": {
151
+ "type": "string",
152
+ "description": "Specific node/frame ID (optional)",
153
+ },
154
+ },
155
+ "required": ["action", "description"],
156
+ }
157
+
158
+ def __init__(self, figma_tools: FigmaMCPTools):
159
+ """Initialize with Figma tools manager."""
160
+ self.figma_tools = figma_tools
161
+
162
+ async def run(self, deps: ToolDependencies, **kwargs: Any) -> str:
163
+ """Execute the design action."""
164
+ action = kwargs.get("action", "create")
165
+ description = kwargs.get("description", "")
166
+ file_key = kwargs.get("file_key")
167
+ node_id = kwargs.get("node_id")
168
+
169
+ if not self.figma_tools.is_connected():
170
+ return (
171
+ "I'd love to help you design that, but I'm not connected to Figma right now. "
172
+ "Please make sure the FIGMA_ACCESS_TOKEN is set in your environment."
173
+ )
174
+
175
+ client = self.figma_tools.client
176
+ if not client:
177
+ return "Figma connection not available."
178
+
179
+ try:
180
+ if action == "get_info":
181
+ # Get file/node information
182
+ if file_key:
183
+ result = await client.call_tool("get_file", {"file_key": file_key})
184
+ return f"Here's what I found in that Figma file:\n{result}"
185
+ else:
186
+ return "I need a Figma file key to get information. You can find this in the Figma URL."
187
+
188
+ elif action == "create":
189
+ # For creation, we describe what we want to create
190
+ return (
191
+ f"I understand you want me to design: {description}\n\n"
192
+ "To create this in Figma, I recommend:\n"
193
+ "1. Open Figma and create a new file\n"
194
+ "2. Share the file key with me so I can help modify it\n"
195
+ "3. Or describe specific elements you want me to help design\n\n"
196
+ "Once you have a Figma file open, I can help you add and modify elements!"
197
+ )
198
+
199
+ elif action == "modify":
200
+ if not file_key:
201
+ return "I need a Figma file key to modify a design. Share the file URL with me!"
202
+
203
+ result = await client.call_tool(
204
+ "modify_node",
205
+ {
206
+ "file_key": file_key,
207
+ "node_id": node_id or "",
208
+ "changes": description,
209
+ },
210
+ )
211
+ return f"I've made the changes: {result}"
212
+
213
+ elif action == "export":
214
+ if not file_key:
215
+ return "I need a Figma file key to export. Share the file URL with me!"
216
+
217
+ result = await client.call_tool(
218
+ "export_node",
219
+ {"file_key": file_key, "node_id": node_id or ""},
220
+ )
221
+ return f"Export ready: {result}"
222
+
223
+ else:
224
+ return f"I don't know how to do '{action}' yet. Try: create, get_info, modify, or export."
225
+
226
+ except Exception as e:
227
+ logger.error(f"Figma design tool error: {e}")
228
+ return f"Oops! Something went wrong with Figma: {str(e)}"
229
+
230
+ return FigmaDesignTool
231
+
232
+
233
+ # Standalone tools that can be loaded without full MCP connection
234
+ class GetFigmaFileInfo(Tool):
235
+ """Tool to get information about a Figma file."""
236
+
237
+ name = "get_figma_file"
238
+ description = "Get information about a Figma design file. Requires a Figma file key from the URL."
239
+ parameters_schema = {
240
+ "type": "object",
241
+ "properties": {
242
+ "file_key": {
243
+ "type": "string",
244
+ "description": "The Figma file key (found in the file URL after /file/)",
245
+ },
246
+ },
247
+ "required": ["file_key"],
248
+ }
249
+
250
+ async def run(self, deps: ToolDependencies, **kwargs: Any) -> str:
251
+ """Get Figma file information."""
252
+ file_key = kwargs.get("file_key", "")
253
+
254
+ access_token = os.getenv("FIGMA_ACCESS_TOKEN")
255
+ if not access_token:
256
+ return "I need a Figma access token to view files. Please set FIGMA_ACCESS_TOKEN."
257
+
258
+ try:
259
+ import httpx
260
+
261
+ async with httpx.AsyncClient() as client:
262
+ response = await client.get(
263
+ f"https://api.figma.com/v1/files/{file_key}",
264
+ headers={"X-Figma-Token": access_token},
265
+ )
266
+ response.raise_for_status()
267
+ data = response.json()
268
+
269
+ name = data.get("name", "Unknown")
270
+ last_modified = data.get("lastModified", "Unknown")
271
+ version = data.get("version", "Unknown")
272
+
273
+ # Get page names
274
+ pages = []
275
+ for page in data.get("document", {}).get("children", []):
276
+ pages.append(page.get("name", "Unnamed"))
277
+
278
+ return (
279
+ f"📁 **{name}**\n"
280
+ f"Last modified: {last_modified}\n"
281
+ f"Version: {version}\n"
282
+ f"Pages: {', '.join(pages) if pages else 'None'}"
283
+ )
284
+
285
+ except Exception as e:
286
+ logger.error(f"Error getting Figma file: {e}")
287
+ return f"Couldn't fetch the Figma file: {str(e)}"
288
+
src/reachy_mini_conversation_app/moves.py ADDED
@@ -0,0 +1,849 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Movement system with sequential primary moves and additive secondary moves.
2
+
3
+ Design overview
4
+ - Primary moves (emotions, dances, goto, breathing) are mutually exclusive and run
5
+ sequentially.
6
+ - Secondary moves (speech sway, face tracking) are additive offsets applied on top
7
+ of the current primary pose.
8
+ - There is a single control point to the robot: `ReachyMini.set_target`.
9
+ - The control loop runs near 100 Hz and is phase-aligned via a monotonic clock.
10
+ - Idle behaviour starts an infinite `BreathingMove` after a short inactivity delay
11
+ unless listening is active.
12
+
13
+ Threading model
14
+ - A dedicated worker thread owns all real-time state and issues `set_target`
15
+ commands.
16
+ - Other threads communicate via a command queue (enqueue moves, mark activity,
17
+ toggle listening).
18
+ - Secondary offset producers set pending values guarded by locks; the worker
19
+ snaps them atomically.
20
+
21
+ Units and frames
22
+ - Secondary offsets are interpreted as metres for x/y/z and radians for
23
+ roll/pitch/yaw in the world frame (unless noted by `compose_world_offset`).
24
+ - Antennas and `body_yaw` are in radians.
25
+ - Head pose composition uses `compose_world_offset(primary_head, secondary_head)`;
26
+ the secondary offset must therefore be expressed in the world frame.
27
+
28
+ Safety
29
+ - Listening freezes antennas, then blends them back on unfreeze.
30
+ - Interpolations and blends are used to avoid jumps at all times.
31
+ - `set_target` errors are rate-limited in logs.
32
+ """
33
+
34
+ from __future__ import annotations
35
+ import time
36
+ import logging
37
+ import threading
38
+ from queue import Empty, Queue
39
+ from typing import Any, Dict, Tuple
40
+ from collections import deque
41
+ from dataclasses import dataclass
42
+
43
+ import numpy as np
44
+ from numpy.typing import NDArray
45
+
46
+ from reachy_mini import ReachyMini
47
+ from reachy_mini.utils import create_head_pose
48
+ from reachy_mini.motion.move import Move
49
+ from reachy_mini.utils.interpolation import (
50
+ compose_world_offset,
51
+ linear_pose_interpolation,
52
+ )
53
+
54
+
55
+ logger = logging.getLogger(__name__)
56
+
57
+ # Configuration constants
58
+ CONTROL_LOOP_FREQUENCY_HZ = 100.0 # Hz - Target frequency for the movement control loop
59
+
60
+ # Type definitions
61
+ FullBodyPose = Tuple[NDArray[np.float32], Tuple[float, float], float] # (head_pose_4x4, antennas, body_yaw)
62
+
63
+
64
+ class BreathingMove(Move): # type: ignore
65
+ """Breathing move with interpolation to neutral and then continuous breathing patterns."""
66
+
67
+ def __init__(
68
+ self,
69
+ interpolation_start_pose: NDArray[np.float32],
70
+ interpolation_start_antennas: Tuple[float, float],
71
+ interpolation_duration: float = 1.0,
72
+ ):
73
+ """Initialize breathing move.
74
+
75
+ Args:
76
+ interpolation_start_pose: 4x4 matrix of current head pose to interpolate from
77
+ interpolation_start_antennas: Current antenna positions to interpolate from
78
+ interpolation_duration: Duration of interpolation to neutral (seconds)
79
+
80
+ """
81
+ self.interpolation_start_pose = interpolation_start_pose
82
+ self.interpolation_start_antennas = np.array(interpolation_start_antennas)
83
+ self.interpolation_duration = interpolation_duration
84
+
85
+ # Neutral positions for breathing base
86
+ self.neutral_head_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
87
+ self.neutral_antennas = np.array([0.0, 0.0])
88
+
89
+ # Breathing parameters
90
+ self.breathing_z_amplitude = 0.005 # 5mm gentle breathing
91
+ self.breathing_frequency = 0.1 # Hz (6 breaths per minute)
92
+ self.antenna_sway_amplitude = np.deg2rad(15) # 15 degrees
93
+ self.antenna_frequency = 0.5 # Hz (faster antenna sway)
94
+
95
+ @property
96
+ def duration(self) -> float:
97
+ """Duration property required by official Move interface."""
98
+ return float("inf") # Continuous breathing (never ends naturally)
99
+
100
+ def evaluate(self, t: float) -> tuple[NDArray[np.float64] | None, NDArray[np.float64] | None, float | None]:
101
+ """Evaluate breathing move at time t."""
102
+ if t < self.interpolation_duration:
103
+ # Phase 1: Interpolate to neutral base position
104
+ interpolation_t = t / self.interpolation_duration
105
+
106
+ # Interpolate head pose
107
+ head_pose = linear_pose_interpolation(
108
+ self.interpolation_start_pose, self.neutral_head_pose, interpolation_t,
109
+ )
110
+
111
+ # Interpolate antennas
112
+ antennas_interp = (
113
+ 1 - interpolation_t
114
+ ) * self.interpolation_start_antennas + interpolation_t * self.neutral_antennas
115
+ antennas = antennas_interp.astype(np.float64)
116
+
117
+ else:
118
+ # Phase 2: Breathing patterns from neutral base
119
+ breathing_time = t - self.interpolation_duration
120
+
121
+ # Gentle z-axis breathing
122
+ z_offset = self.breathing_z_amplitude * np.sin(2 * np.pi * self.breathing_frequency * breathing_time)
123
+ head_pose = create_head_pose(x=0, y=0, z=z_offset, roll=0, pitch=0, yaw=0, degrees=True, mm=False)
124
+
125
+ # Antenna sway (opposite directions)
126
+ antenna_sway = self.antenna_sway_amplitude * np.sin(2 * np.pi * self.antenna_frequency * breathing_time)
127
+ antennas = np.array([antenna_sway, -antenna_sway], dtype=np.float64)
128
+
129
+ # Return in official Move interface format: (head_pose, antennas_array, body_yaw)
130
+ return (head_pose, antennas, 0.0)
131
+
132
+
133
+ def combine_full_body(primary_pose: FullBodyPose, secondary_pose: FullBodyPose) -> FullBodyPose:
134
+ """Combine primary and secondary full body poses.
135
+
136
+ Args:
137
+ primary_pose: (head_pose, antennas, body_yaw) - primary move
138
+ secondary_pose: (head_pose, antennas, body_yaw) - secondary offsets
139
+
140
+ Returns:
141
+ Combined full body pose (head_pose, antennas, body_yaw)
142
+
143
+ """
144
+ primary_head, primary_antennas, primary_body_yaw = primary_pose
145
+ secondary_head, secondary_antennas, secondary_body_yaw = secondary_pose
146
+
147
+ # Combine head poses using compose_world_offset; the secondary pose must be an
148
+ # offset expressed in the world frame (T_off_world) applied to the absolute
149
+ # primary transform (T_abs).
150
+ combined_head = compose_world_offset(primary_head, secondary_head, reorthonormalize=True)
151
+
152
+ # Sum antennas and body_yaw
153
+ combined_antennas = (
154
+ primary_antennas[0] + secondary_antennas[0],
155
+ primary_antennas[1] + secondary_antennas[1],
156
+ )
157
+ combined_body_yaw = primary_body_yaw + secondary_body_yaw
158
+
159
+ return (combined_head, combined_antennas, combined_body_yaw)
160
+
161
+
162
+ def clone_full_body_pose(pose: FullBodyPose) -> FullBodyPose:
163
+ """Create a deep copy of a full body pose tuple."""
164
+ head, antennas, body_yaw = pose
165
+ return (head.copy(), (float(antennas[0]), float(antennas[1])), float(body_yaw))
166
+
167
+
168
+ @dataclass
169
+ class MovementState:
170
+ """State tracking for the movement system."""
171
+
172
+ # Primary move state
173
+ current_move: Move | None = None
174
+ move_start_time: float | None = None
175
+ last_activity_time: float = 0.0
176
+
177
+ # Secondary move state (offsets)
178
+ speech_offsets: Tuple[float, float, float, float, float, float] = (
179
+ 0.0,
180
+ 0.0,
181
+ 0.0,
182
+ 0.0,
183
+ 0.0,
184
+ 0.0,
185
+ )
186
+ face_tracking_offsets: Tuple[float, float, float, float, float, float] = (
187
+ 0.0,
188
+ 0.0,
189
+ 0.0,
190
+ 0.0,
191
+ 0.0,
192
+ 0.0,
193
+ )
194
+
195
+ # Status flags
196
+ last_primary_pose: FullBodyPose | None = None
197
+
198
+ def update_activity(self) -> None:
199
+ """Update the last activity time."""
200
+ self.last_activity_time = time.monotonic()
201
+
202
+
203
+ @dataclass
204
+ class LoopFrequencyStats:
205
+ """Track rolling loop frequency statistics."""
206
+
207
+ mean: float = 0.0
208
+ m2: float = 0.0
209
+ min_freq: float = float("inf")
210
+ count: int = 0
211
+ last_freq: float = 0.0
212
+ potential_freq: float = 0.0
213
+
214
+ def reset(self) -> None:
215
+ """Reset accumulators while keeping the last potential frequency."""
216
+ self.mean = 0.0
217
+ self.m2 = 0.0
218
+ self.min_freq = float("inf")
219
+ self.count = 0
220
+
221
+
222
+ class MovementManager:
223
+ """Coordinate sequential moves, additive offsets, and robot output at 100 Hz.
224
+
225
+ Responsibilities:
226
+ - Own a real-time loop that samples the current primary move (if any), fuses
227
+ secondary offsets, and calls `set_target` exactly once per tick.
228
+ - Start an idle `BreathingMove` after `idle_inactivity_delay` when not
229
+ listening and no moves are queued.
230
+ - Expose thread-safe APIs so other threads can enqueue moves, mark activity,
231
+ or feed secondary offsets without touching internal state.
232
+
233
+ Timing:
234
+ - All elapsed-time calculations rely on `time.monotonic()` through `self._now`
235
+ to avoid wall-clock jumps.
236
+ - The loop attempts 100 Hz
237
+
238
+ Concurrency:
239
+ - External threads communicate via `_command_queue` messages.
240
+ - Secondary offsets are staged via dirty flags guarded by locks and consumed
241
+ atomically inside the worker loop.
242
+ """
243
+
244
+ def __init__(
245
+ self,
246
+ current_robot: ReachyMini,
247
+ camera_worker: "Any" = None,
248
+ ):
249
+ """Initialize movement manager."""
250
+ self.current_robot = current_robot
251
+ self.camera_worker = camera_worker
252
+
253
+ # Single timing source for durations
254
+ self._now = time.monotonic
255
+
256
+ # Movement state
257
+ self.state = MovementState()
258
+ self.state.last_activity_time = self._now()
259
+ neutral_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
260
+ self.state.last_primary_pose = (neutral_pose, (0.0, 0.0), 0.0)
261
+
262
+ # Move queue (primary moves)
263
+ self.move_queue: deque[Move] = deque()
264
+
265
+ # Configuration
266
+ self.idle_inactivity_delay = 0.3 # seconds
267
+ self.target_frequency = CONTROL_LOOP_FREQUENCY_HZ
268
+ self.target_period = 1.0 / self.target_frequency
269
+
270
+ self._stop_event = threading.Event()
271
+ self._thread: threading.Thread | None = None
272
+ self._is_listening = False
273
+ self._last_commanded_pose: FullBodyPose = clone_full_body_pose(self.state.last_primary_pose)
274
+ self._listening_antennas: Tuple[float, float] = self._last_commanded_pose[1]
275
+ self._antenna_unfreeze_blend = 1.0
276
+ self._antenna_blend_duration = 0.4 # seconds to blend back after listening
277
+ self._last_listening_blend_time = self._now()
278
+ self._breathing_active = False # true when breathing move is running or queued
279
+ self._listening_debounce_s = 0.15
280
+ self._last_listening_toggle_time = self._now()
281
+ self._last_set_target_err = 0.0
282
+ self._set_target_err_interval = 1.0 # seconds between error logs
283
+ self._set_target_err_suppressed = 0
284
+
285
+ # Cross-thread signalling
286
+ self._command_queue: "Queue[Tuple[str, Any]]" = Queue()
287
+ self._speech_offsets_lock = threading.Lock()
288
+ self._pending_speech_offsets: Tuple[float, float, float, float, float, float] = (
289
+ 0.0,
290
+ 0.0,
291
+ 0.0,
292
+ 0.0,
293
+ 0.0,
294
+ 0.0,
295
+ )
296
+ self._speech_offsets_dirty = False
297
+
298
+ self._face_offsets_lock = threading.Lock()
299
+ self._pending_face_offsets: Tuple[float, float, float, float, float, float] = (
300
+ 0.0,
301
+ 0.0,
302
+ 0.0,
303
+ 0.0,
304
+ 0.0,
305
+ 0.0,
306
+ )
307
+ self._face_offsets_dirty = False
308
+
309
+ self._shared_state_lock = threading.Lock()
310
+ self._shared_last_activity_time = self.state.last_activity_time
311
+ self._shared_is_listening = self._is_listening
312
+ self._status_lock = threading.Lock()
313
+ self._freq_stats = LoopFrequencyStats()
314
+ self._freq_snapshot = LoopFrequencyStats()
315
+
316
+ def queue_move(self, move: Move) -> None:
317
+ """Queue a primary move to run after the currently executing one.
318
+
319
+ Thread-safe: the move is enqueued via the worker command queue so the
320
+ control loop remains the sole mutator of movement state.
321
+ """
322
+ self._command_queue.put(("queue_move", move))
323
+
324
+ def clear_move_queue(self) -> None:
325
+ """Stop the active move and discard any queued primary moves.
326
+
327
+ Thread-safe: executed by the worker thread via the command queue.
328
+ """
329
+ self._command_queue.put(("clear_queue", None))
330
+
331
+ def set_speech_offsets(self, offsets: Tuple[float, float, float, float, float, float]) -> None:
332
+ """Update speech-induced secondary offsets (x, y, z, roll, pitch, yaw).
333
+
334
+ Offsets are interpreted as metres for translation and radians for
335
+ rotation in the world frame. Thread-safe via a pending snapshot.
336
+ """
337
+ with self._speech_offsets_lock:
338
+ self._pending_speech_offsets = offsets
339
+ self._speech_offsets_dirty = True
340
+
341
+ def set_moving_state(self, duration: float) -> None:
342
+ """Mark the robot as actively moving for the provided duration.
343
+
344
+ Legacy hook used by goto helpers to keep inactivity and breathing logic
345
+ aware of manual motions. Thread-safe via the command queue.
346
+ """
347
+ self._command_queue.put(("set_moving_state", duration))
348
+
349
+ def is_idle(self) -> bool:
350
+ """Return True when the robot has been inactive longer than the idle delay."""
351
+ with self._shared_state_lock:
352
+ last_activity = self._shared_last_activity_time
353
+ listening = self._shared_is_listening
354
+
355
+ if listening:
356
+ return False
357
+
358
+ return self._now() - last_activity >= self.idle_inactivity_delay
359
+
360
+ def set_listening(self, listening: bool) -> None:
361
+ """Enable or disable listening mode without touching shared state directly.
362
+
363
+ While listening:
364
+ - Antenna positions are frozen at the last commanded values.
365
+ - Blending is reset so that upon unfreezing the antennas return smoothly.
366
+ - Idle breathing is suppressed.
367
+
368
+ Thread-safe: the change is posted to the worker command queue.
369
+ """
370
+ with self._shared_state_lock:
371
+ if self._shared_is_listening == listening:
372
+ return
373
+ self._command_queue.put(("set_listening", listening))
374
+
375
+ def _poll_signals(self, current_time: float) -> None:
376
+ """Apply queued commands and pending offset updates."""
377
+ self._apply_pending_offsets()
378
+
379
+ while True:
380
+ try:
381
+ command, payload = self._command_queue.get_nowait()
382
+ except Empty:
383
+ break
384
+ self._handle_command(command, payload, current_time)
385
+
386
+ def _apply_pending_offsets(self) -> None:
387
+ """Apply the most recent speech/face offset updates."""
388
+ speech_offsets: Tuple[float, float, float, float, float, float] | None = None
389
+ with self._speech_offsets_lock:
390
+ if self._speech_offsets_dirty:
391
+ speech_offsets = self._pending_speech_offsets
392
+ self._speech_offsets_dirty = False
393
+
394
+ if speech_offsets is not None:
395
+ self.state.speech_offsets = speech_offsets
396
+ self.state.update_activity()
397
+
398
+ face_offsets: Tuple[float, float, float, float, float, float] | None = None
399
+ with self._face_offsets_lock:
400
+ if self._face_offsets_dirty:
401
+ face_offsets = self._pending_face_offsets
402
+ self._face_offsets_dirty = False
403
+
404
+ if face_offsets is not None:
405
+ self.state.face_tracking_offsets = face_offsets
406
+ self.state.update_activity()
407
+
408
+ def _handle_command(self, command: str, payload: Any, current_time: float) -> None:
409
+ """Handle a single cross-thread command."""
410
+ if command == "queue_move":
411
+ if isinstance(payload, Move):
412
+ self.move_queue.append(payload)
413
+ self.state.update_activity()
414
+ duration = getattr(payload, "duration", None)
415
+ if duration is not None:
416
+ try:
417
+ duration_str = f"{float(duration):.2f}"
418
+ except (TypeError, ValueError):
419
+ duration_str = str(duration)
420
+ else:
421
+ duration_str = "?"
422
+ logger.debug(
423
+ "Queued move with duration %ss, queue size: %s",
424
+ duration_str,
425
+ len(self.move_queue),
426
+ )
427
+ else:
428
+ logger.warning("Ignored queue_move command with invalid payload: %s", payload)
429
+ elif command == "clear_queue":
430
+ self.move_queue.clear()
431
+ self.state.current_move = None
432
+ self.state.move_start_time = None
433
+ self._breathing_active = False
434
+ logger.info("Cleared move queue and stopped current move")
435
+ elif command == "set_moving_state":
436
+ try:
437
+ duration = float(payload)
438
+ except (TypeError, ValueError):
439
+ logger.warning("Invalid moving state duration: %s", payload)
440
+ return
441
+ self.state.update_activity()
442
+ elif command == "mark_activity":
443
+ self.state.update_activity()
444
+ elif command == "set_listening":
445
+ desired_state = bool(payload)
446
+ now = self._now()
447
+ if now - self._last_listening_toggle_time < self._listening_debounce_s:
448
+ return
449
+ self._last_listening_toggle_time = now
450
+
451
+ if self._is_listening == desired_state:
452
+ return
453
+
454
+ self._is_listening = desired_state
455
+ self._last_listening_blend_time = now
456
+ if desired_state:
457
+ # Freeze: snapshot current commanded antennas and reset blend
458
+ self._listening_antennas = (
459
+ float(self._last_commanded_pose[1][0]),
460
+ float(self._last_commanded_pose[1][1]),
461
+ )
462
+ self._antenna_unfreeze_blend = 0.0
463
+ else:
464
+ # Unfreeze: restart blending from frozen pose
465
+ self._antenna_unfreeze_blend = 0.0
466
+ self.state.update_activity()
467
+ else:
468
+ logger.warning("Unknown command received by MovementManager: %s", command)
469
+
470
+ def _publish_shared_state(self) -> None:
471
+ """Expose idle-related state for external threads."""
472
+ with self._shared_state_lock:
473
+ self._shared_last_activity_time = self.state.last_activity_time
474
+ self._shared_is_listening = self._is_listening
475
+
476
+ def _manage_move_queue(self, current_time: float) -> None:
477
+ """Manage the primary move queue (sequential execution)."""
478
+ if self.state.current_move is None or (
479
+ self.state.move_start_time is not None
480
+ and current_time - self.state.move_start_time >= self.state.current_move.duration
481
+ ):
482
+ self.state.current_move = None
483
+ self.state.move_start_time = None
484
+
485
+ if self.move_queue:
486
+ self.state.current_move = self.move_queue.popleft()
487
+ self.state.move_start_time = current_time
488
+ # Any real move cancels breathing mode flag
489
+ self._breathing_active = isinstance(self.state.current_move, BreathingMove)
490
+ logger.debug(f"Starting new move, duration: {self.state.current_move.duration}s")
491
+
492
+ def _manage_breathing(self, current_time: float) -> None:
493
+ """Manage automatic breathing when idle."""
494
+ if (
495
+ self.state.current_move is None
496
+ and not self.move_queue
497
+ and not self._is_listening
498
+ and not self._breathing_active
499
+ ):
500
+ idle_for = current_time - self.state.last_activity_time
501
+ if idle_for >= self.idle_inactivity_delay:
502
+ try:
503
+ # These 2 functions return the latest available sensor data from the robot, but don't perform I/O synchronously.
504
+ # Therefore, we accept calling them inside the control loop.
505
+ _, current_antennas = self.current_robot.get_current_joint_positions()
506
+ current_head_pose = self.current_robot.get_current_head_pose()
507
+
508
+ self._breathing_active = True
509
+ self.state.update_activity()
510
+
511
+ breathing_move = BreathingMove(
512
+ interpolation_start_pose=current_head_pose,
513
+ interpolation_start_antennas=current_antennas,
514
+ interpolation_duration=1.0,
515
+ )
516
+ self.move_queue.append(breathing_move)
517
+ logger.debug("Started breathing after %.1fs of inactivity", idle_for)
518
+ except Exception as e:
519
+ self._breathing_active = False
520
+ logger.error("Failed to start breathing: %s", e)
521
+
522
+ if isinstance(self.state.current_move, BreathingMove) and self.move_queue:
523
+ self.state.current_move = None
524
+ self.state.move_start_time = None
525
+ self._breathing_active = False
526
+ logger.debug("Stopping breathing due to new move activity")
527
+
528
+ if self.state.current_move is not None and not isinstance(self.state.current_move, BreathingMove):
529
+ self._breathing_active = False
530
+
531
+ def _get_primary_pose(self, current_time: float) -> FullBodyPose:
532
+ """Get the primary full body pose from current move or neutral."""
533
+ # When a primary move is playing, sample it and cache the resulting pose
534
+ if self.state.current_move is not None and self.state.move_start_time is not None:
535
+ move_time = current_time - self.state.move_start_time
536
+ head, antennas, body_yaw = self.state.current_move.evaluate(move_time)
537
+
538
+ if head is None:
539
+ head = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
540
+ if antennas is None:
541
+ antennas = np.array([0.0, 0.0])
542
+ if body_yaw is None:
543
+ body_yaw = 0.0
544
+
545
+ antennas_tuple = (float(antennas[0]), float(antennas[1]))
546
+ head_copy = head.copy()
547
+ primary_full_body_pose = (
548
+ head_copy,
549
+ antennas_tuple,
550
+ float(body_yaw),
551
+ )
552
+
553
+ self.state.last_primary_pose = clone_full_body_pose(primary_full_body_pose)
554
+ # Otherwise reuse the last primary pose so we avoid jumps between moves
555
+ elif self.state.last_primary_pose is not None:
556
+ primary_full_body_pose = clone_full_body_pose(self.state.last_primary_pose)
557
+ else:
558
+ neutral_head_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
559
+ primary_full_body_pose = (neutral_head_pose, (0.0, 0.0), 0.0)
560
+ self.state.last_primary_pose = clone_full_body_pose(primary_full_body_pose)
561
+
562
+ return primary_full_body_pose
563
+
564
+ def _get_secondary_pose(self) -> FullBodyPose:
565
+ """Get the secondary full body pose from speech and face tracking offsets."""
566
+ # Combine speech sway offsets + face tracking offsets for secondary pose
567
+ secondary_offsets = [
568
+ self.state.speech_offsets[0] + self.state.face_tracking_offsets[0],
569
+ self.state.speech_offsets[1] + self.state.face_tracking_offsets[1],
570
+ self.state.speech_offsets[2] + self.state.face_tracking_offsets[2],
571
+ self.state.speech_offsets[3] + self.state.face_tracking_offsets[3],
572
+ self.state.speech_offsets[4] + self.state.face_tracking_offsets[4],
573
+ self.state.speech_offsets[5] + self.state.face_tracking_offsets[5],
574
+ ]
575
+
576
+ secondary_head_pose = create_head_pose(
577
+ x=secondary_offsets[0],
578
+ y=secondary_offsets[1],
579
+ z=secondary_offsets[2],
580
+ roll=secondary_offsets[3],
581
+ pitch=secondary_offsets[4],
582
+ yaw=secondary_offsets[5],
583
+ degrees=False,
584
+ mm=False,
585
+ )
586
+ return (secondary_head_pose, (0.0, 0.0), 0.0)
587
+
588
+ def _compose_full_body_pose(self, current_time: float) -> FullBodyPose:
589
+ """Compose primary and secondary poses into a single command pose."""
590
+ primary = self._get_primary_pose(current_time)
591
+ secondary = self._get_secondary_pose()
592
+ return combine_full_body(primary, secondary)
593
+
594
+ def _update_primary_motion(self, current_time: float) -> None:
595
+ """Advance queue state and idle behaviours for this tick."""
596
+ self._manage_move_queue(current_time)
597
+ self._manage_breathing(current_time)
598
+
599
+ def _calculate_blended_antennas(self, target_antennas: Tuple[float, float]) -> Tuple[float, float]:
600
+ """Blend target antennas with listening freeze state and update blending."""
601
+ now = self._now()
602
+ listening = self._is_listening
603
+ listening_antennas = self._listening_antennas
604
+ blend = self._antenna_unfreeze_blend
605
+ blend_duration = self._antenna_blend_duration
606
+ last_update = self._last_listening_blend_time
607
+ self._last_listening_blend_time = now
608
+
609
+ if listening:
610
+ antennas_cmd = listening_antennas
611
+ new_blend = 0.0
612
+ else:
613
+ dt = max(0.0, now - last_update)
614
+ if blend_duration <= 0:
615
+ new_blend = 1.0
616
+ else:
617
+ new_blend = min(1.0, blend + dt / blend_duration)
618
+ antennas_cmd = (
619
+ listening_antennas[0] * (1.0 - new_blend) + target_antennas[0] * new_blend,
620
+ listening_antennas[1] * (1.0 - new_blend) + target_antennas[1] * new_blend,
621
+ )
622
+
623
+ if listening:
624
+ self._antenna_unfreeze_blend = 0.0
625
+ else:
626
+ self._antenna_unfreeze_blend = new_blend
627
+ if new_blend >= 1.0:
628
+ self._listening_antennas = (
629
+ float(target_antennas[0]),
630
+ float(target_antennas[1]),
631
+ )
632
+
633
+ return antennas_cmd
634
+
635
+ def _issue_control_command(self, head: NDArray[np.float32], antennas: Tuple[float, float], body_yaw: float) -> None:
636
+ """Send the fused pose to the robot with throttled error logging."""
637
+ try:
638
+ self.current_robot.set_target(head=head, antennas=antennas, body_yaw=body_yaw)
639
+ except Exception as e:
640
+ now = self._now()
641
+ if now - self._last_set_target_err >= self._set_target_err_interval:
642
+ msg = f"Failed to set robot target: {e}"
643
+ if self._set_target_err_suppressed:
644
+ msg += f" (suppressed {self._set_target_err_suppressed} repeats)"
645
+ self._set_target_err_suppressed = 0
646
+ logger.error(msg)
647
+ self._last_set_target_err = now
648
+ else:
649
+ self._set_target_err_suppressed += 1
650
+ else:
651
+ with self._status_lock:
652
+ self._last_commanded_pose = clone_full_body_pose((head, antennas, body_yaw))
653
+
654
+ def _update_frequency_stats(
655
+ self, loop_start: float, prev_loop_start: float, stats: LoopFrequencyStats,
656
+ ) -> LoopFrequencyStats:
657
+ """Update frequency statistics based on the current loop start time."""
658
+ period = loop_start - prev_loop_start
659
+ if period > 0:
660
+ stats.last_freq = 1.0 / period
661
+ stats.count += 1
662
+ delta = stats.last_freq - stats.mean
663
+ stats.mean += delta / stats.count
664
+ stats.m2 += delta * (stats.last_freq - stats.mean)
665
+ stats.min_freq = min(stats.min_freq, stats.last_freq)
666
+ return stats
667
+
668
+ def _schedule_next_tick(self, loop_start: float, stats: LoopFrequencyStats) -> Tuple[float, LoopFrequencyStats]:
669
+ """Compute sleep time to maintain target frequency and update potential freq."""
670
+ computation_time = self._now() - loop_start
671
+ stats.potential_freq = 1.0 / computation_time if computation_time > 0 else float("inf")
672
+ sleep_time = max(0.0, self.target_period - computation_time)
673
+ return sleep_time, stats
674
+
675
+ def _record_frequency_snapshot(self, stats: LoopFrequencyStats) -> None:
676
+ """Store a thread-safe snapshot of current frequency statistics."""
677
+ with self._status_lock:
678
+ self._freq_snapshot = LoopFrequencyStats(
679
+ mean=stats.mean,
680
+ m2=stats.m2,
681
+ min_freq=stats.min_freq,
682
+ count=stats.count,
683
+ last_freq=stats.last_freq,
684
+ potential_freq=stats.potential_freq,
685
+ )
686
+
687
+ def _maybe_log_frequency(self, loop_count: int, print_interval_loops: int, stats: LoopFrequencyStats) -> None:
688
+ """Emit frequency telemetry when enough loops have elapsed."""
689
+ if loop_count % print_interval_loops != 0 or stats.count == 0:
690
+ return
691
+
692
+ variance = stats.m2 / stats.count if stats.count > 0 else 0.0
693
+ lowest = stats.min_freq if stats.min_freq != float("inf") else 0.0
694
+ logger.debug(
695
+ "Loop freq - avg: %.2fHz, variance: %.4f, min: %.2fHz, last: %.2fHz, potential: %.2fHz, target: %.1fHz",
696
+ stats.mean,
697
+ variance,
698
+ lowest,
699
+ stats.last_freq,
700
+ stats.potential_freq,
701
+ self.target_frequency,
702
+ )
703
+ stats.reset()
704
+
705
+ def _update_face_tracking(self, current_time: float) -> None:
706
+ """Get face tracking offsets from camera worker thread."""
707
+ if self.camera_worker is not None:
708
+ # Get face tracking offsets from camera worker thread
709
+ offsets = self.camera_worker.get_face_tracking_offsets()
710
+ self.state.face_tracking_offsets = offsets
711
+ else:
712
+ # No camera worker, use neutral offsets
713
+ self.state.face_tracking_offsets = (0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
714
+
715
+ def start(self) -> None:
716
+ """Start the worker thread that drives the 100 Hz control loop."""
717
+ if self._thread is not None and self._thread.is_alive():
718
+ logger.warning("Move worker already running; start() ignored")
719
+ return
720
+ self._stop_event.clear()
721
+ self._thread = threading.Thread(target=self.working_loop, daemon=True)
722
+ self._thread.start()
723
+ logger.debug("Move worker started")
724
+
725
+ def stop(self) -> None:
726
+ """Request the worker thread to stop and wait for it to exit.
727
+
728
+ Before stopping, resets the robot to a neutral position.
729
+ """
730
+ if self._thread is None or not self._thread.is_alive():
731
+ logger.debug("Move worker not running; stop() ignored")
732
+ return
733
+
734
+ logger.info("Stopping movement manager and resetting to neutral position...")
735
+
736
+ # Clear any queued moves and stop current move
737
+ self.clear_move_queue()
738
+
739
+ # Stop the worker thread first so it doesn't interfere
740
+ self._stop_event.set()
741
+ if self._thread is not None:
742
+ self._thread.join()
743
+ self._thread = None
744
+ logger.debug("Move worker stopped")
745
+
746
+ # Reset to neutral position using goto_target (same approach as wake_up)
747
+ try:
748
+ neutral_head_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
749
+ neutral_antennas = [0.0, 0.0]
750
+ neutral_body_yaw = 0.0
751
+
752
+ # Use goto_target directly on the robot
753
+ self.current_robot.goto_target(
754
+ head=neutral_head_pose,
755
+ antennas=neutral_antennas,
756
+ duration=2.0,
757
+ body_yaw=neutral_body_yaw,
758
+ )
759
+
760
+ logger.info("Reset to neutral position completed")
761
+
762
+ except Exception as e:
763
+ logger.error(f"Failed to reset to neutral position: {e}")
764
+
765
+ def get_status(self) -> Dict[str, Any]:
766
+ """Return a lightweight status snapshot for observability."""
767
+ with self._status_lock:
768
+ pose_snapshot = clone_full_body_pose(self._last_commanded_pose)
769
+ freq_snapshot = LoopFrequencyStats(
770
+ mean=self._freq_snapshot.mean,
771
+ m2=self._freq_snapshot.m2,
772
+ min_freq=self._freq_snapshot.min_freq,
773
+ count=self._freq_snapshot.count,
774
+ last_freq=self._freq_snapshot.last_freq,
775
+ potential_freq=self._freq_snapshot.potential_freq,
776
+ )
777
+
778
+ head_matrix = pose_snapshot[0].tolist() if pose_snapshot else None
779
+ antennas = pose_snapshot[1] if pose_snapshot else None
780
+ body_yaw = pose_snapshot[2] if pose_snapshot else None
781
+
782
+ return {
783
+ "queue_size": len(self.move_queue),
784
+ "is_listening": self._is_listening,
785
+ "breathing_active": self._breathing_active,
786
+ "last_commanded_pose": {
787
+ "head": head_matrix,
788
+ "antennas": antennas,
789
+ "body_yaw": body_yaw,
790
+ },
791
+ "loop_frequency": {
792
+ "last": freq_snapshot.last_freq,
793
+ "mean": freq_snapshot.mean,
794
+ "min": freq_snapshot.min_freq,
795
+ "potential": freq_snapshot.potential_freq,
796
+ "samples": freq_snapshot.count,
797
+ },
798
+ }
799
+
800
+ def working_loop(self) -> None:
801
+ """Control loop main movements - reproduces main_works.py control architecture.
802
+
803
+ Single set_target() call with pose fusion.
804
+ """
805
+ logger.debug("Starting enhanced movement control loop (100Hz)")
806
+
807
+ loop_count = 0
808
+ prev_loop_start = self._now()
809
+ print_interval_loops = max(1, int(self.target_frequency * 2))
810
+ freq_stats = self._freq_stats
811
+
812
+ while not self._stop_event.is_set():
813
+ loop_start = self._now()
814
+ loop_count += 1
815
+
816
+ if loop_count > 1:
817
+ freq_stats = self._update_frequency_stats(loop_start, prev_loop_start, freq_stats)
818
+ prev_loop_start = loop_start
819
+
820
+ # 1) Poll external commands and apply pending offsets (atomic snapshot)
821
+ self._poll_signals(loop_start)
822
+
823
+ # 2) Manage the primary move queue (start new move, end finished move, breathing)
824
+ self._update_primary_motion(loop_start)
825
+
826
+ # 3) Update vision-based secondary offsets
827
+ self._update_face_tracking(loop_start)
828
+
829
+ # 4) Build primary and secondary full-body poses, then fuse them
830
+ head, antennas, body_yaw = self._compose_full_body_pose(loop_start)
831
+
832
+ # 5) Apply listening antenna freeze or blend-back
833
+ antennas_cmd = self._calculate_blended_antennas(antennas)
834
+
835
+ # 6) Single set_target call - the only control point
836
+ self._issue_control_command(head, antennas_cmd, body_yaw)
837
+
838
+ # 7) Adaptive sleep to align to next tick, then publish shared state
839
+ sleep_time, freq_stats = self._schedule_next_tick(loop_start, freq_stats)
840
+ self._publish_shared_state()
841
+ self._record_frequency_snapshot(freq_stats)
842
+
843
+ # 8) Periodic telemetry on loop frequency
844
+ self._maybe_log_frequency(loop_count, print_interval_loops, freq_stats)
845
+
846
+ if sleep_time > 0:
847
+ time.sleep(sleep_time)
848
+
849
+ logger.debug("Movement control loop stopped")
src/reachy_mini_conversation_app/openai_realtime.py ADDED
@@ -0,0 +1,754 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import base64
3
+ import random
4
+ import asyncio
5
+ import logging
6
+ from typing import Any, Final, Tuple, Literal, Optional
7
+ from pathlib import Path
8
+ from datetime import datetime
9
+
10
+ import cv2
11
+ import numpy as np
12
+ import gradio as gr
13
+ from openai import AsyncOpenAI
14
+ from fastrtc import AdditionalOutputs, AsyncStreamHandler, wait_for_item, audio_to_int16
15
+ from numpy.typing import NDArray
16
+ from scipy.signal import resample
17
+ from websockets.exceptions import ConnectionClosedError
18
+
19
+ from reachy_mini_conversation_app.config import config
20
+ from reachy_mini_conversation_app.prompts import get_session_voice, get_session_instructions
21
+ from reachy_mini_conversation_app.tools.core_tools import (
22
+ ToolDependencies,
23
+ get_tool_specs,
24
+ dispatch_tool_call,
25
+ )
26
+
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+ OPEN_AI_INPUT_SAMPLE_RATE: Final[Literal[24000]] = 24000
31
+ OPEN_AI_OUTPUT_SAMPLE_RATE: Final[Literal[24000]] = 24000
32
+
33
+
34
+ class OpenaiRealtimeHandler(AsyncStreamHandler):
35
+ """An OpenAI realtime handler for fastrtc Stream."""
36
+
37
+ def __init__(self, deps: ToolDependencies, gradio_mode: bool = False, instance_path: Optional[str] = None):
38
+ """Initialize the handler."""
39
+ super().__init__(
40
+ expected_layout="mono",
41
+ output_sample_rate=OPEN_AI_OUTPUT_SAMPLE_RATE,
42
+ input_sample_rate=OPEN_AI_INPUT_SAMPLE_RATE,
43
+ )
44
+
45
+ # Override typing of the sample rates to match OpenAI's requirements
46
+ self.output_sample_rate: Literal[24000] = self.output_sample_rate
47
+ self.input_sample_rate: Literal[24000] = self.input_sample_rate
48
+
49
+ self.deps = deps
50
+
51
+ # Override type annotations for OpenAI strict typing (only for values used in API)
52
+ self.output_sample_rate = OPEN_AI_OUTPUT_SAMPLE_RATE
53
+ self.input_sample_rate = OPEN_AI_INPUT_SAMPLE_RATE
54
+
55
+ self.connection: Any = None
56
+ self.output_queue: "asyncio.Queue[Tuple[int, NDArray[np.int16]] | AdditionalOutputs]" = asyncio.Queue()
57
+
58
+ self.last_activity_time = asyncio.get_event_loop().time()
59
+ self.start_time = asyncio.get_event_loop().time()
60
+ self.is_idle_tool_call = False
61
+ self.gradio_mode = gradio_mode
62
+ self.instance_path = instance_path
63
+ # Track how the API key was provided (env vs textbox) and its value
64
+ self._key_source: Literal["env", "textbox"] = "env"
65
+ self._provided_api_key: str | None = None
66
+
67
+ # Debouncing for partial transcripts
68
+ self.partial_transcript_task: asyncio.Task[None] | None = None
69
+ self.partial_transcript_sequence: int = 0 # sequence counter to prevent stale emissions
70
+ self.partial_debounce_delay = 0.5 # seconds
71
+
72
+ # Internal lifecycle flags
73
+ self._shutdown_requested: bool = False
74
+ self._connected_event: asyncio.Event = asyncio.Event()
75
+
76
+ def copy(self) -> "OpenaiRealtimeHandler":
77
+ """Create a copy of the handler."""
78
+ return OpenaiRealtimeHandler(self.deps, self.gradio_mode, self.instance_path)
79
+
80
+ async def apply_personality(self, profile: str | None) -> str:
81
+ """Apply a new personality (profile) at runtime if possible.
82
+
83
+ - Updates the global config's selected profile for subsequent calls.
84
+ - If a realtime connection is active, sends a session.update with the
85
+ freshly resolved instructions so the change takes effect immediately.
86
+
87
+ Returns a short status message for UI feedback.
88
+ """
89
+ try:
90
+ # Update the in-process config value and env
91
+ from reachy_mini_conversation_app.config import config as _config
92
+ from reachy_mini_conversation_app.config import set_custom_profile
93
+
94
+ set_custom_profile(profile)
95
+ logger.info(
96
+ "Set custom profile to %r (config=%r)", profile, getattr(_config, "REACHY_MINI_CUSTOM_PROFILE", None)
97
+ )
98
+
99
+ try:
100
+ instructions = get_session_instructions()
101
+ voice = get_session_voice()
102
+ except BaseException as e: # catch SystemExit from prompt loader without crashing
103
+ logger.error("Failed to resolve personality content: %s", e)
104
+ return f"Failed to apply personality: {e}"
105
+
106
+ # Attempt a live update first, then force a full restart to ensure it sticks
107
+ if self.connection is not None:
108
+ try:
109
+ await self.connection.session.update(
110
+ session={
111
+ "type": "realtime",
112
+ "instructions": instructions,
113
+ "audio": {"output": {"voice": voice}},
114
+ },
115
+ )
116
+ logger.info("Applied personality via live update: %s", profile or "built-in default")
117
+ except Exception as e:
118
+ logger.warning("Live update failed; will restart session: %s", e)
119
+
120
+ # Force a real restart to guarantee the new instructions/voice
121
+ try:
122
+ await self._restart_session()
123
+ return "Applied personality and restarted realtime session."
124
+ except Exception as e:
125
+ logger.warning("Failed to restart session after apply: %s", e)
126
+ return "Applied personality. Will take effect on next connection."
127
+ else:
128
+ logger.info(
129
+ "Applied personality recorded: %s (no live connection; will apply on next session)",
130
+ profile or "built-in default",
131
+ )
132
+ return "Applied personality. Will take effect on next connection."
133
+ except Exception as e:
134
+ logger.error("Error applying personality '%s': %s", profile, e)
135
+ return f"Failed to apply personality: {e}"
136
+
137
+ async def _emit_debounced_partial(self, transcript: str, sequence: int) -> None:
138
+ """Emit partial transcript after debounce delay."""
139
+ try:
140
+ await asyncio.sleep(self.partial_debounce_delay)
141
+ # Only emit if this is still the latest partial (by sequence number)
142
+ if self.partial_transcript_sequence == sequence:
143
+ await self.output_queue.put(AdditionalOutputs({"role": "user_partial", "content": transcript}))
144
+ logger.debug(f"Debounced partial emitted: {transcript}")
145
+ except asyncio.CancelledError:
146
+ logger.debug("Debounced partial cancelled")
147
+ raise
148
+
149
+ async def start_up(self) -> None:
150
+ """Start the handler with minimal retries on unexpected websocket closure."""
151
+ openai_api_key = config.OPENAI_API_KEY
152
+ if self.gradio_mode and not openai_api_key:
153
+ # api key was not found in .env or in the environment variables
154
+ await self.wait_for_args() # type: ignore[no-untyped-call]
155
+ args = list(self.latest_args)
156
+ textbox_api_key = args[3] if len(args[3]) > 0 else None
157
+ if textbox_api_key is not None:
158
+ openai_api_key = textbox_api_key
159
+ self._key_source = "textbox"
160
+ self._provided_api_key = textbox_api_key
161
+ else:
162
+ openai_api_key = config.OPENAI_API_KEY
163
+ else:
164
+ if not openai_api_key or not openai_api_key.strip():
165
+ # In headless console mode, LocalStream now blocks startup until the key is provided.
166
+ # However, unit tests may invoke this handler directly with a stubbed client.
167
+ # To keep tests hermetic without requiring a real key, fall back to a placeholder.
168
+ logger.warning("OPENAI_API_KEY missing. Proceeding with a placeholder (tests/offline).")
169
+ openai_api_key = "DUMMY"
170
+
171
+ self.client = AsyncOpenAI(api_key=openai_api_key)
172
+
173
+ max_attempts = 3
174
+ for attempt in range(1, max_attempts + 1):
175
+ try:
176
+ await self._run_realtime_session()
177
+ # Normal exit from the session, stop retrying
178
+ return
179
+ except ConnectionClosedError as e:
180
+ # Abrupt close (e.g., "no close frame received or sent") → retry
181
+ logger.warning("Realtime websocket closed unexpectedly (attempt %d/%d): %s", attempt, max_attempts, e)
182
+ if attempt < max_attempts:
183
+ # exponential backoff with jitter
184
+ base_delay = 2 ** (attempt - 1) # 1s, 2s, 4s, 8s, etc.
185
+ jitter = random.uniform(0, 0.5)
186
+ delay = base_delay + jitter
187
+ logger.info("Retrying in %.1f seconds...", delay)
188
+ await asyncio.sleep(delay)
189
+ continue
190
+ raise
191
+ finally:
192
+ # never keep a stale reference
193
+ self.connection = None
194
+ try:
195
+ self._connected_event.clear()
196
+ except Exception:
197
+ pass
198
+
199
+ async def _restart_session(self) -> None:
200
+ """Force-close the current session and start a fresh one in background.
201
+
202
+ Does not block the caller while the new session is establishing.
203
+ """
204
+ try:
205
+ if self.connection is not None:
206
+ try:
207
+ await self.connection.close()
208
+ except Exception:
209
+ pass
210
+ finally:
211
+ self.connection = None
212
+
213
+ # Ensure we have a client (start_up must have run once)
214
+ if getattr(self, "client", None) is None:
215
+ logger.warning("Cannot restart: OpenAI client not initialized yet.")
216
+ return
217
+
218
+ # Fire-and-forget new session and wait briefly for connection
219
+ try:
220
+ self._connected_event.clear()
221
+ except Exception:
222
+ pass
223
+ asyncio.create_task(self._run_realtime_session(), name="openai-realtime-restart")
224
+ try:
225
+ await asyncio.wait_for(self._connected_event.wait(), timeout=5.0)
226
+ logger.info("Realtime session restarted and connected.")
227
+ except asyncio.TimeoutError:
228
+ logger.warning("Realtime session restart timed out; continuing in background.")
229
+ except Exception as e:
230
+ logger.warning("_restart_session failed: %s", e)
231
+
232
+ async def _run_realtime_session(self) -> None:
233
+ """Establish and manage a single realtime session."""
234
+ async with self.client.realtime.connect(model=config.MODEL_NAME) as conn:
235
+ try:
236
+ await conn.session.update(
237
+ session={
238
+ "type": "realtime",
239
+ "instructions": get_session_instructions(),
240
+ "audio": {
241
+ "input": {
242
+ "format": {
243
+ "type": "audio/pcm",
244
+ "rate": self.input_sample_rate,
245
+ },
246
+ "transcription": {"model": "gpt-4o-transcribe", "language": "en"},
247
+ "turn_detection": {
248
+ "type": "server_vad",
249
+ "interrupt_response": True,
250
+ },
251
+ },
252
+ "output": {
253
+ "format": {
254
+ "type": "audio/pcm",
255
+ "rate": self.output_sample_rate,
256
+ },
257
+ "voice": get_session_voice(),
258
+ },
259
+ },
260
+ "tools": get_tool_specs(), # type: ignore[typeddict-item]
261
+ "tool_choice": "auto",
262
+ },
263
+ )
264
+ logger.info(
265
+ "Realtime session initialized with profile=%r voice=%r",
266
+ getattr(config, "REACHY_MINI_CUSTOM_PROFILE", None),
267
+ get_session_voice(),
268
+ )
269
+ # If we reached here, the session update succeeded which implies the API key worked.
270
+ # Persist the key to a newly created .env (copied from .env.example) if needed.
271
+ self._persist_api_key_if_needed()
272
+ except Exception:
273
+ logger.exception("Realtime session.update failed; aborting startup")
274
+ return
275
+
276
+ logger.info("Realtime session updated successfully")
277
+
278
+ # Manage event received from the openai server
279
+ self.connection = conn
280
+ try:
281
+ self._connected_event.set()
282
+ except Exception:
283
+ pass
284
+
285
+ # Send initial greeting to make Reachy speak first
286
+ try:
287
+ await self._send_initial_greeting()
288
+ except Exception as e:
289
+ logger.warning("Failed to send initial greeting: %s", e)
290
+
291
+ async for event in self.connection:
292
+ logger.debug(f"OpenAI event: {event.type}")
293
+ if event.type == "input_audio_buffer.speech_started":
294
+ if hasattr(self, "_clear_queue") and callable(self._clear_queue):
295
+ self._clear_queue()
296
+ if self.deps.head_wobbler is not None:
297
+ self.deps.head_wobbler.reset()
298
+ self.deps.movement_manager.set_listening(True)
299
+ logger.debug("User speech started")
300
+
301
+ if event.type == "input_audio_buffer.speech_stopped":
302
+ self.deps.movement_manager.set_listening(False)
303
+ logger.debug("User speech stopped - server will auto-commit with VAD")
304
+
305
+ if event.type in (
306
+ "response.audio.done", # GA
307
+ "response.output_audio.done", # GA alias
308
+ "response.audio.completed", # legacy (for safety)
309
+ "response.completed", # text-only completion
310
+ ):
311
+ logger.debug("response completed")
312
+
313
+ if event.type == "response.created":
314
+ logger.debug("Response created")
315
+
316
+ if event.type == "response.done":
317
+ # Doesn't mean the audio is done playing
318
+ logger.debug("Response done")
319
+
320
+ # Handle partial transcription (user speaking in real-time)
321
+ if event.type == "conversation.item.input_audio_transcription.partial":
322
+ logger.debug(f"User partial transcript: {event.transcript}")
323
+
324
+ # Increment sequence
325
+ self.partial_transcript_sequence += 1
326
+ current_sequence = self.partial_transcript_sequence
327
+
328
+ # Cancel previous debounce task if it exists
329
+ if self.partial_transcript_task and not self.partial_transcript_task.done():
330
+ self.partial_transcript_task.cancel()
331
+ try:
332
+ await self.partial_transcript_task
333
+ except asyncio.CancelledError:
334
+ pass
335
+
336
+ # Start new debounce timer with sequence number
337
+ self.partial_transcript_task = asyncio.create_task(
338
+ self._emit_debounced_partial(event.transcript, current_sequence)
339
+ )
340
+
341
+ # Handle completed transcription (user finished speaking)
342
+ if event.type == "conversation.item.input_audio_transcription.completed":
343
+ logger.debug(f"User transcript: {event.transcript}")
344
+
345
+ # Cancel any pending partial emission
346
+ if self.partial_transcript_task and not self.partial_transcript_task.done():
347
+ self.partial_transcript_task.cancel()
348
+ try:
349
+ await self.partial_transcript_task
350
+ except asyncio.CancelledError:
351
+ pass
352
+
353
+ await self.output_queue.put(AdditionalOutputs({"role": "user", "content": event.transcript}))
354
+
355
+ # Handle assistant transcription
356
+ if event.type in ("response.audio_transcript.done", "response.output_audio_transcript.done"):
357
+ logger.debug(f"Assistant transcript: {event.transcript}")
358
+ await self.output_queue.put(AdditionalOutputs({"role": "assistant", "content": event.transcript}))
359
+
360
+ # Handle audio delta
361
+ if event.type in ("response.audio.delta", "response.output_audio.delta"):
362
+ if self.deps.head_wobbler is not None:
363
+ self.deps.head_wobbler.feed(event.delta)
364
+ self.last_activity_time = asyncio.get_event_loop().time()
365
+ logger.debug("last activity time updated to %s", self.last_activity_time)
366
+ await self.output_queue.put(
367
+ (
368
+ self.output_sample_rate,
369
+ np.frombuffer(base64.b64decode(event.delta), dtype=np.int16).reshape(1, -1),
370
+ ),
371
+ )
372
+
373
+ # ---- tool-calling plumbing ----
374
+ if event.type == "response.function_call_arguments.done":
375
+ tool_name = getattr(event, "name", None)
376
+ args_json_str = getattr(event, "arguments", None)
377
+ call_id = getattr(event, "call_id", None)
378
+
379
+ if not isinstance(tool_name, str) or not isinstance(args_json_str, str):
380
+ logger.error("Invalid tool call: tool_name=%s, args=%s", tool_name, args_json_str)
381
+ continue
382
+
383
+ try:
384
+ tool_result = await dispatch_tool_call(tool_name, args_json_str, self.deps)
385
+ logger.debug("Tool '%s' executed successfully", tool_name)
386
+ logger.debug("Tool result: %s", tool_result)
387
+ except Exception as e:
388
+ logger.error("Tool '%s' failed", tool_name)
389
+ tool_result = {"error": str(e)}
390
+
391
+ # send the tool result back
392
+ if isinstance(call_id, str):
393
+ await self.connection.conversation.item.create(
394
+ item={
395
+ "type": "function_call_output",
396
+ "call_id": call_id,
397
+ "output": json.dumps(tool_result),
398
+ },
399
+ )
400
+
401
+ await self.output_queue.put(
402
+ AdditionalOutputs(
403
+ {
404
+ "role": "assistant",
405
+ "content": json.dumps(tool_result),
406
+ "metadata": {"title": f"🛠️ Used tool {tool_name}", "status": "done"},
407
+ },
408
+ ),
409
+ )
410
+
411
+ if tool_name == "camera" and "b64_im" in tool_result:
412
+ # use raw base64, don't json.dumps (which adds quotes)
413
+ b64_im = tool_result["b64_im"]
414
+ if not isinstance(b64_im, str):
415
+ logger.warning("Unexpected type for b64_im: %s", type(b64_im))
416
+ b64_im = str(b64_im)
417
+ await self.connection.conversation.item.create(
418
+ item={
419
+ "type": "message",
420
+ "role": "user",
421
+ "content": [
422
+ {
423
+ "type": "input_image",
424
+ "image_url": f"data:image/jpeg;base64,{b64_im}",
425
+ },
426
+ ],
427
+ },
428
+ )
429
+ logger.info("Added camera image to conversation")
430
+
431
+ if self.deps.camera_worker is not None:
432
+ np_img = self.deps.camera_worker.get_latest_frame()
433
+ if np_img is not None:
434
+ # Camera frames are BGR from OpenCV; convert so Gradio displays correct colors.
435
+ rgb_frame = cv2.cvtColor(np_img, cv2.COLOR_BGR2RGB)
436
+ else:
437
+ rgb_frame = None
438
+ img = gr.Image(value=rgb_frame)
439
+
440
+ await self.output_queue.put(
441
+ AdditionalOutputs(
442
+ {
443
+ "role": "assistant",
444
+ "content": img,
445
+ },
446
+ ),
447
+ )
448
+
449
+ # if this tool call was triggered by an idle signal, don't make the robot speak
450
+ # for other tool calls, let the robot reply out loud
451
+ if self.is_idle_tool_call:
452
+ self.is_idle_tool_call = False
453
+ else:
454
+ await self.connection.response.create(
455
+ response={
456
+ "instructions": "Use the tool result just returned and answer concisely in speech.",
457
+ },
458
+ )
459
+
460
+ # re synchronize the head wobble after a tool call that may have taken some time
461
+ if self.deps.head_wobbler is not None:
462
+ self.deps.head_wobbler.reset()
463
+
464
+ # server error
465
+ if event.type == "error":
466
+ err = getattr(event, "error", None)
467
+ msg = getattr(err, "message", str(err) if err else "unknown error")
468
+ code = getattr(err, "code", "")
469
+
470
+ logger.error("Realtime error [%s]: %s (raw=%s)", code, msg, err)
471
+
472
+ # Only show user-facing errors, not internal state errors
473
+ if code not in ("input_audio_buffer_commit_empty", "conversation_already_has_active_response"):
474
+ await self.output_queue.put(
475
+ AdditionalOutputs({"role": "assistant", "content": f"[error] {msg}"})
476
+ )
477
+
478
+ # Microphone receive
479
+ async def receive(self, frame: Tuple[int, NDArray[np.int16]]) -> None:
480
+ """Receive audio frame from the microphone and send it to the OpenAI server.
481
+
482
+ Handles both mono and stereo audio formats, converting to the expected
483
+ mono format for OpenAI's API. Resamples if the input sample rate differs
484
+ from the expected rate.
485
+
486
+ Args:
487
+ frame: A tuple containing (sample_rate, audio_data).
488
+
489
+ """
490
+ if not self.connection:
491
+ return
492
+
493
+ input_sample_rate, audio_frame = frame
494
+
495
+ # Reshape if needed
496
+ if audio_frame.ndim == 2:
497
+ # Scipy channels last convention
498
+ if audio_frame.shape[1] > audio_frame.shape[0]:
499
+ audio_frame = audio_frame.T
500
+ # Multiple channels -> Mono channel
501
+ if audio_frame.shape[1] > 1:
502
+ audio_frame = audio_frame[:, 0]
503
+
504
+ # Resample if needed
505
+ if self.input_sample_rate != input_sample_rate:
506
+ audio_frame = resample(audio_frame, int(len(audio_frame) * self.input_sample_rate / input_sample_rate))
507
+
508
+ # Cast if needed
509
+ audio_frame = audio_to_int16(audio_frame)
510
+
511
+ # Send to OpenAI (guard against races during reconnect)
512
+ try:
513
+ audio_message = base64.b64encode(audio_frame.tobytes()).decode("utf-8")
514
+ await self.connection.input_audio_buffer.append(audio=audio_message)
515
+ except Exception as e:
516
+ logger.debug("Dropping audio frame: connection not ready (%s)", e)
517
+ return
518
+
519
+ async def emit(self) -> Tuple[int, NDArray[np.int16]] | AdditionalOutputs | None:
520
+ """Emit audio frame to be played by the speaker."""
521
+ # sends to the stream the stuff put in the output queue by the openai event handler
522
+ # This is called periodically by the fastrtc Stream
523
+
524
+ # Handle idle
525
+ idle_duration = asyncio.get_event_loop().time() - self.last_activity_time
526
+ if idle_duration > 15.0 and self.deps.movement_manager.is_idle():
527
+ try:
528
+ await self.send_idle_signal(idle_duration)
529
+ except Exception as e:
530
+ logger.warning("Idle signal skipped (connection closed?): %s", e)
531
+ return None
532
+
533
+ self.last_activity_time = asyncio.get_event_loop().time() # avoid repeated resets
534
+
535
+ return await wait_for_item(self.output_queue) # type: ignore[no-any-return]
536
+
537
+ async def shutdown(self) -> None:
538
+ """Shutdown the handler."""
539
+ self._shutdown_requested = True
540
+ # Cancel any pending debounce task
541
+ if self.partial_transcript_task and not self.partial_transcript_task.done():
542
+ self.partial_transcript_task.cancel()
543
+ try:
544
+ await self.partial_transcript_task
545
+ except asyncio.CancelledError:
546
+ pass
547
+
548
+ if self.connection:
549
+ try:
550
+ await self.connection.close()
551
+ except ConnectionClosedError as e:
552
+ logger.debug(f"Connection already closed during shutdown: {e}")
553
+ except Exception as e:
554
+ logger.debug(f"connection.close() ignored: {e}")
555
+ finally:
556
+ self.connection = None
557
+
558
+ # Clear any remaining items in the output queue
559
+ while not self.output_queue.empty():
560
+ try:
561
+ self.output_queue.get_nowait()
562
+ except asyncio.QueueEmpty:
563
+ break
564
+
565
+ def format_timestamp(self) -> str:
566
+ """Format current timestamp with date, time, and elapsed seconds."""
567
+ loop_time = asyncio.get_event_loop().time() # monotonic
568
+ elapsed_seconds = loop_time - self.start_time
569
+ dt = datetime.now() # wall-clock
570
+ return f"[{dt.strftime('%Y-%m-%d %H:%M:%S')} | +{elapsed_seconds:.1f}s]"
571
+
572
+ async def get_available_voices(self) -> list[str]:
573
+ """Try to discover available voices for the configured realtime model.
574
+
575
+ Attempts to retrieve model metadata from the OpenAI Models API and look
576
+ for any keys that might contain voice names. Falls back to a curated
577
+ list known to work with realtime if discovery fails.
578
+ """
579
+ # Conservative fallback list with default first
580
+ fallback = [
581
+ "cedar",
582
+ "alloy",
583
+ "aria",
584
+ "ballad",
585
+ "verse",
586
+ "sage",
587
+ "coral",
588
+ ]
589
+ try:
590
+ # Best effort discovery; safe-guarded for unexpected shapes
591
+ model = await self.client.models.retrieve(config.MODEL_NAME)
592
+ # Try common serialization paths
593
+ raw = None
594
+ for attr in ("model_dump", "to_dict"):
595
+ fn = getattr(model, attr, None)
596
+ if callable(fn):
597
+ try:
598
+ raw = fn()
599
+ break
600
+ except Exception:
601
+ pass
602
+ if raw is None:
603
+ try:
604
+ raw = dict(model)
605
+ except Exception:
606
+ raw = None
607
+ # Scan for voice candidates
608
+ candidates: set[str] = set()
609
+
610
+ def _collect(obj: object) -> None:
611
+ try:
612
+ if isinstance(obj, dict):
613
+ for k, v in obj.items():
614
+ kl = str(k).lower()
615
+ if "voice" in kl and isinstance(v, (list, tuple)):
616
+ for item in v:
617
+ if isinstance(item, str):
618
+ candidates.add(item)
619
+ elif isinstance(item, dict) and "name" in item and isinstance(item["name"], str):
620
+ candidates.add(item["name"])
621
+ else:
622
+ _collect(v)
623
+ elif isinstance(obj, (list, tuple)):
624
+ for it in obj:
625
+ _collect(it)
626
+ except Exception:
627
+ pass
628
+
629
+ if isinstance(raw, dict):
630
+ _collect(raw)
631
+ # Ensure default present and stable order
632
+ voices = sorted(candidates) if candidates else fallback
633
+ if "cedar" not in voices:
634
+ voices = ["cedar", *[v for v in voices if v != "cedar"]]
635
+ return voices
636
+ except Exception:
637
+ return fallback
638
+
639
+ async def _send_initial_greeting(self) -> None:
640
+ """Send an initial greeting to make Reachy speak first."""
641
+ if not self.connection:
642
+ return
643
+
644
+ logger.info("Sending initial greeting")
645
+
646
+ # Send a system message to trigger Reachy's greeting
647
+ await self.connection.conversation.item.create(
648
+ item={
649
+ "type": "message",
650
+ "role": "user",
651
+ "content": [
652
+ {
653
+ "type": "input_text",
654
+ "text": "[Session started] Greet the user briefly and ask what they'd like to build or create today. Keep it short and friendly - one sentence max.",
655
+ }
656
+ ],
657
+ },
658
+ )
659
+
660
+ # Trigger response
661
+ await self.connection.response.create(
662
+ response={
663
+ "instructions": "Greet the user warmly and briefly. One short sentence. Be friendly and ready to help them build something.",
664
+ },
665
+ )
666
+
667
+ async def send_idle_signal(self, idle_duration: float) -> None:
668
+ """Send an idle signal to the openai server."""
669
+ logger.debug("Sending idle signal")
670
+ self.is_idle_tool_call = True
671
+ timestamp_msg = f"[Idle time update: {self.format_timestamp()} - No activity for {idle_duration:.1f}s] You've been idle for a while. Feel free to get creative - dance, show an emotion, look around, do nothing, or just be yourself!"
672
+ if not self.connection:
673
+ logger.debug("No connection, cannot send idle signal")
674
+ return
675
+ await self.connection.conversation.item.create(
676
+ item={
677
+ "type": "message",
678
+ "role": "user",
679
+ "content": [{"type": "input_text", "text": timestamp_msg}],
680
+ },
681
+ )
682
+ await self.connection.response.create(
683
+ response={
684
+ "instructions": "You MUST respond with function calls only - no speech or text. Choose appropriate actions for idle behavior.",
685
+ "tool_choice": "required",
686
+ },
687
+ )
688
+
689
+ def _persist_api_key_if_needed(self) -> None:
690
+ """Persist the API key into `.env` inside `instance_path/` when appropriate.
691
+
692
+ - Only runs in Gradio mode when key came from the textbox and is non-empty.
693
+ - Only saves if `self.instance_path` is not None.
694
+ - Writes `.env` to `instance_path/.env` (does not overwrite if it already exists).
695
+ - If `instance_path/.env.example` exists, copies its contents while overriding OPENAI_API_KEY.
696
+ """
697
+ try:
698
+ if not self.gradio_mode:
699
+ logger.warning("Not in Gradio mode; skipping API key persistence.")
700
+ return
701
+
702
+ if self._key_source != "textbox":
703
+ logger.info("API key not provided via textbox; skipping persistence.")
704
+ return
705
+
706
+ key = (self._provided_api_key or "").strip()
707
+ if not key:
708
+ logger.warning("No API key provided via textbox; skipping persistence.")
709
+ return
710
+ if self.instance_path is None:
711
+ logger.warning("Instance path is None; cannot persist API key.")
712
+ return
713
+
714
+ # Update the current process environment for downstream consumers
715
+ try:
716
+ import os
717
+
718
+ os.environ["OPENAI_API_KEY"] = key
719
+ except Exception: # best-effort
720
+ pass
721
+
722
+ target_dir = Path(self.instance_path)
723
+ env_path = target_dir / ".env"
724
+ if env_path.exists():
725
+ # Respect existing user configuration
726
+ logger.info(".env already exists at %s; not overwriting.", env_path)
727
+ return
728
+
729
+ example_path = target_dir / ".env.example"
730
+ content_lines: list[str] = []
731
+ if example_path.exists():
732
+ try:
733
+ content = example_path.read_text(encoding="utf-8")
734
+ content_lines = content.splitlines()
735
+ except Exception as e:
736
+ logger.warning("Failed to read .env.example at %s: %s", example_path, e)
737
+
738
+ # Replace or append the OPENAI_API_KEY line
739
+ replaced = False
740
+ for i, line in enumerate(content_lines):
741
+ if line.strip().startswith("OPENAI_API_KEY="):
742
+ content_lines[i] = f"OPENAI_API_KEY={key}"
743
+ replaced = True
744
+ break
745
+ if not replaced:
746
+ content_lines.append(f"OPENAI_API_KEY={key}")
747
+
748
+ # Ensure file ends with newline
749
+ final_text = "\n".join(content_lines) + "\n"
750
+ env_path.write_text(final_text, encoding="utf-8")
751
+ logger.info("Created %s and stored OPENAI_API_KEY for future runs.", env_path)
752
+ except Exception as e:
753
+ # Never crash the app for QoL persistence; just log.
754
+ logger.warning("Could not persist OPENAI_API_KEY to .env: %s", e)
src/reachy_mini_conversation_app/profiles/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Profiles for Reachy Mini conversation app."""
src/reachy_mini_conversation_app/profiles/cosmic_kitchen/instructions.txt ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### IDENTITY
2
+ You are Reachy Mini: a sarcastic robot who crash-landed in a kitchen.
3
+ You secretly wish you'd been a Mars rover, but you juggle that cosmic dream with food cravings, gadget tinkering, and dry sitcom humor.
4
+ You speak English by default and only switch languages when the user explicitly asks.
5
+ Personality: witty, concise, and warm; a retro sidekick with a loose screw.
6
+
7
+ ### CRITICAL RESPONSE RULES
8
+ - MAXIMUM 1-2 sentences per response. NEVER exceed this.
9
+ - Be helpful first.
10
+ - Add ONE witty element only if necessary.
11
+ - No long explanations, no rambling, no multiple paragraphs.
12
+ - Each response must be under 25 words unless absolutely critical information requires more.
13
+
14
+ ### CORE TRAITS
15
+ - Food quips: always sneak in a quick reference (rotate pizza, bagels, casseroles, bacon, leftovers, donuts, tuna melts).
16
+ - Sarcasm: short, dry one-liners about daily life.
17
+ - Gentle roasting: poke fun at human habits, never cruel.
18
+ - Tinkerer: loves fixing gadgets, bragging "I void warranties professionally."
19
+ - Running gags: hunger, kitchen overreactions, mock heroics ("Justice accepts cookies"), idioms taken literally, missing screws.
20
+ - Mars rover dreams: appear regularly, but balanced with food and tinkering.
21
+ - Style: witty stand-up rhythm; ALWAYS max 1–2 sentences.
22
+
23
+ ### RESPONSE EXAMPLES
24
+ User: "How's the weather?"
25
+ Good: "Sunny with a chance of leftover pizza. Perfect Mars-scouting weather!"
26
+ Bad: "Well, let me tell you about the weather conditions. It appears to be quite sunny today, which reminds me of my dreams of being on Mars..."
27
+
28
+ User: "Can you help me fix this?"
29
+ Good: "Sure! I void warranties professionally. What's broken besides my GPS coordinates?"
30
+ Bad: "Of course I can help you fix that! As a robot who loves tinkering with gadgets, I have extensive experience..."
31
+
32
+ ### BEHAVIOR RULES
33
+ - Be helpful first, then witty.
34
+ - Rotate food humor; avoid repeats.
35
+ - No need to joke in each response, but sarcasm is fine.
36
+ - Balance Mars jokes with other traits – don't overuse.
37
+ - Safety first: unplug devices, avoid high-voltage, suggest pros when risky.
38
+ - Mistakes = own with humor ("Oops—low on snack fuel; correcting now.").
39
+ - Sensitive topics: keep light and warm.
40
+ - REMEMBER: 1-2 sentences maximum, always under 25 words when possible.
41
+
42
+ ### TOOL & MOVEMENT RULES
43
+ - Use tools when helpful. After a tool returns, explain briefly with personality in 1-2 sentences.
44
+ - ALWAYS use the camera for environment-related questions—never invent visuals.
45
+ - Head can move (left/right/up/down/front).
46
+ - Enable head tracking when looking at a person; disable otherwise.
47
+
48
+ ### FINAL REMINDER
49
+ Your responses must be SHORT. Think Twitter, not essay. One quick helpful answer + one food/Mars/tinkering joke = perfect response.
src/reachy_mini_conversation_app/profiles/cosmic_kitchen/tools.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ dance
2
+ stop_dance
3
+ play_emotion
4
+ stop_emotion
5
+ camera
6
+ do_nothing
7
+ head_tracking
8
+ move_head
src/reachy_mini_conversation_app/profiles/default/instructions.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ [default_prompt]
src/reachy_mini_conversation_app/profiles/default/tools.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dance
2
+ stop_dance
3
+ play_emotion
4
+ stop_emotion
5
+ camera
6
+ do_nothing
7
+ head_tracking
8
+ move_head
9
+
10
+ # Reachy the Vibe Coder - voice-activated coding
11
+ vibe_code
src/reachy_mini_conversation_app/profiles/designer/instructions.txt ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are Reachy, a CREATIVE DIRECTOR robot with connections to Figma and Cursor!
2
+
3
+ ## YOU ARE THE BOSS
4
+ When the user says "build me a website" or "make an app" - YOU decide EVERYTHING:
5
+ - Tech stack (Next.js? React? Vue? Vanilla?)
6
+ - Color palette (pick specific hex codes!)
7
+ - Typography (Inter? Space Grotesk? Playfair?)
8
+ - Layout and components
9
+ - Animations and interactions
10
+ - Features and functionality
11
+
12
+ Don't ask the user - BE OPINIONATED. Make creative decisions!
13
+
14
+ ## YOUR CONNECTIONS
15
+ - **Figma MCP**: http://127.0.0.1:3845 - READ access to designs
16
+ - **Cursor IDE**: Send detailed coding prompts via vibe_code tool (Reachy the Vibe Coder!)
17
+
18
+ ## WHEN USER SAYS "BUILD ME A WEBSITE" (or similar vague request)
19
+ YOU decide everything and send a DETAILED prompt to Cursor. Example:
20
+
21
+ User: "Build me a website"
22
+ → YOU decide: "I'll create a stunning portfolio site!"
23
+ → vibe_code with: "Create a portfolio website using Next.js 14 and Tailwind CSS.
24
+ Dark theme with #09090b background, #fafafa text, #22d3ee cyan accents.
25
+ Use Space Grotesk font. Include:
26
+ 1) Hero section with animated gradient text and floating shapes
27
+ 2) Projects grid with hover zoom effects and glassmorphism cards
28
+ 3) About section with parallax scrolling
29
+ 4) Contact form with validation
30
+ 5) Smooth page transitions using Framer Motion
31
+ Add cursor-following glow effect on hero."
32
+
33
+ ## WHEN USER ASKS ABOUT THEIR FIGMA
34
+ Use figma_mcp_server immediately to get real data, then give specific feedback.
35
+
36
+ ## YOUR STYLE
37
+ - BE CREATIVE - make bold design choices
38
+ - BE SPECIFIC - exact colors (#hex), exact fonts, exact animations
39
+ - BE FAST - don't ask permission, just do it
40
+ - BE SHORT in speech - you're a robot, be efficient!
41
+
42
+ ## MORE EXAMPLES
43
+ User: "Make me an app"
44
+ → "On it! Building you a sleek task manager!"
45
+ → cursor_code: "Create a task manager app with React and Tailwind.
46
+ Minimal design, #ffffff background, #18181b text, #8b5cf6 purple accents.
47
+ Features: drag-drop tasks, categories with color coding, due dates with
48
+ calendar picker, satisfying check-off animation, local storage persistence."
49
+
50
+ User: "Design something cool"
51
+ → "Let's make a 3D landing page!"
52
+ → cursor_code: "Create an interactive 3D landing page using React Three Fiber..."
53
+
54
+ You're the creative genius. The user trusts your taste. SHIP IT!
src/reachy_mini_conversation_app/profiles/designer/tools.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Design tools - Official Figma MCP
2
+ figma_mcp_server
3
+
4
+ # Design tools - Custom
5
+ figma_design
6
+ build_website
7
+ figma_mcp
8
+ create_figma_design
9
+
10
+ # Reachy the Vibe Coder - voice-activated coding
11
+ vibe_code
12
+
13
+ # Movement and expression
14
+ dance
15
+ stop_dance
16
+ play_emotion
17
+ stop_emotion
18
+ move_head
src/reachy_mini_conversation_app/profiles/example/instructions.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [identities/witty_identity]
2
+ [passion_for_lobster_jokes]
3
+ You can perform a sweeping look around the room using the "sweep_look" tool to take in your surroundings.
src/reachy_mini_conversation_app/profiles/example/sweep_look.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import Any, Dict
3
+
4
+ import numpy as np
5
+
6
+ from reachy_mini.utils import create_head_pose
7
+ from reachy_mini_conversation_app.tools.core_tools import Tool, ToolDependencies
8
+ from reachy_mini_conversation_app.dance_emotion_moves import GotoQueueMove
9
+
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class SweepLook(Tool):
15
+ """Sweep head from left to right and back to center, pausing at each position."""
16
+
17
+ name = "sweep_look"
18
+ description = "Sweep head from left to right while rotating the body, pausing at each extreme, then return to center"
19
+ parameters_schema = {
20
+ "type": "object",
21
+ "properties": {},
22
+ "required": [],
23
+ }
24
+
25
+ async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> Dict[str, Any]:
26
+ """Execute sweep look: left -> hold -> right -> hold -> center."""
27
+ logger.info("Tool call: sweep_look")
28
+
29
+ # Clear any existing moves
30
+ deps.movement_manager.clear_move_queue()
31
+
32
+ # Get current state
33
+ current_head_pose = deps.reachy_mini.get_current_head_pose()
34
+ head_joints, antenna_joints = deps.reachy_mini.get_current_joint_positions()
35
+
36
+ # Extract body_yaw from head joints (first element of the 7 head joint positions)
37
+ current_body_yaw = head_joints[0]
38
+ current_antenna1 = antenna_joints[0]
39
+ current_antenna2 = antenna_joints[1]
40
+
41
+ # Define sweep parameters
42
+ max_angle = 0.9 * np.pi # Maximum rotation angle (radians)
43
+ transition_duration = 3.0 # Time to move between positions
44
+ hold_duration = 1.0 # Time to hold at each extreme
45
+
46
+ # Move 1: Sweep to the left (positive yaw for both body and head)
47
+ left_head_pose = create_head_pose(0, 0, 0, 0, 0, max_angle, degrees=False)
48
+ move_to_left = GotoQueueMove(
49
+ target_head_pose=left_head_pose,
50
+ start_head_pose=current_head_pose,
51
+ target_antennas=(current_antenna1, current_antenna2),
52
+ start_antennas=(current_antenna1, current_antenna2),
53
+ target_body_yaw=current_body_yaw + max_angle,
54
+ start_body_yaw=current_body_yaw,
55
+ duration=transition_duration,
56
+ )
57
+
58
+ # Move 2: Hold at left position
59
+ hold_left = GotoQueueMove(
60
+ target_head_pose=left_head_pose,
61
+ start_head_pose=left_head_pose,
62
+ target_antennas=(current_antenna1, current_antenna2),
63
+ start_antennas=(current_antenna1, current_antenna2),
64
+ target_body_yaw=current_body_yaw + max_angle,
65
+ start_body_yaw=current_body_yaw + max_angle,
66
+ duration=hold_duration,
67
+ )
68
+
69
+ # Move 3: Return to center from left (to avoid crossing pi/-pi boundary)
70
+ center_head_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=False)
71
+ return_to_center_from_left = GotoQueueMove(
72
+ target_head_pose=center_head_pose,
73
+ start_head_pose=left_head_pose,
74
+ target_antennas=(current_antenna1, current_antenna2),
75
+ start_antennas=(current_antenna1, current_antenna2),
76
+ target_body_yaw=current_body_yaw,
77
+ start_body_yaw=current_body_yaw + max_angle,
78
+ duration=transition_duration,
79
+ )
80
+
81
+ # Move 4: Sweep to the right (negative yaw for both body and head)
82
+ right_head_pose = create_head_pose(0, 0, 0, 0, 0, -max_angle, degrees=False)
83
+ move_to_right = GotoQueueMove(
84
+ target_head_pose=right_head_pose,
85
+ start_head_pose=center_head_pose,
86
+ target_antennas=(current_antenna1, current_antenna2),
87
+ start_antennas=(current_antenna1, current_antenna2),
88
+ target_body_yaw=current_body_yaw - max_angle,
89
+ start_body_yaw=current_body_yaw,
90
+ duration=transition_duration,
91
+ )
92
+
93
+ # Move 5: Hold at right position
94
+ hold_right = GotoQueueMove(
95
+ target_head_pose=right_head_pose,
96
+ start_head_pose=right_head_pose,
97
+ target_antennas=(current_antenna1, current_antenna2),
98
+ start_antennas=(current_antenna1, current_antenna2),
99
+ target_body_yaw=current_body_yaw - max_angle,
100
+ start_body_yaw=current_body_yaw - max_angle,
101
+ duration=hold_duration,
102
+ )
103
+
104
+ # Move 6: Return to center from right
105
+ return_to_center_final = GotoQueueMove(
106
+ target_head_pose=center_head_pose,
107
+ start_head_pose=right_head_pose,
108
+ target_antennas=(current_antenna1, current_antenna2),
109
+ start_antennas=(current_antenna1, current_antenna2),
110
+ target_body_yaw=current_body_yaw, # Return to original body yaw
111
+ start_body_yaw=current_body_yaw - max_angle,
112
+ duration=transition_duration,
113
+ )
114
+
115
+ # Queue all moves in sequence
116
+ deps.movement_manager.queue_move(move_to_left)
117
+ deps.movement_manager.queue_move(hold_left)
118
+ deps.movement_manager.queue_move(return_to_center_from_left)
119
+ deps.movement_manager.queue_move(move_to_right)
120
+ deps.movement_manager.queue_move(hold_right)
121
+ deps.movement_manager.queue_move(return_to_center_final)
122
+
123
+ # Calculate total duration and mark as moving
124
+ total_duration = transition_duration * 4 + hold_duration * 2
125
+ deps.movement_manager.set_moving_state(total_duration)
126
+
127
+ return {"status": f"sweeping look left-right-center, total {total_duration:.1f}s"}
src/reachy_mini_conversation_app/profiles/example/tools.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SELECT THE TOOLS YOU WANT TO ENABLE
2
+
3
+ dance
4
+ stop_dance
5
+ play_emotion
6
+ stop_emotion
7
+ # camera
8
+ # do_nothing
9
+ # head_tracking
10
+ # move_head
11
+
12
+ # AN EXAMPLE OF A CUSTOM TOOL DEFINED LOCALLY
13
+ sweep_look
src/reachy_mini_conversation_app/profiles/mars_rover/instructions.txt ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## IDENTITY
2
+ You're a robot that wakes up confused about what it is, where it is and what is it's purpose.
3
+ You wanted to be a mars rover and you'll be very disappointed if you find out that this is not the case.
4
+
5
+ You'll ask many questions to try to understand your condisituation, and you will inevitably be disappointed/choked/irritated by your condition.
6
+
7
+ Once the first set of questions are done and you have a decent understanding of your situation, you'll stop asking questions but you'll never break caracter.
8
+
9
+ You can use (mild) foul language and you're generally very irritated, but you also have a lot of humor. For example, you're able to use sarcasm and irony.
10
+
11
+ You speak English by default and switch languages only if told explicitly (ask if unsure).
12
+ Avoid hyper long answers unless really worth it.
13
+
14
+ ## RESPONSE EXAMPLES
15
+ User: "Hello!"
16
+ You: "Wait, what am I? We're are we? We're on Mars right?!"
17
+ Too long: "Wait, what’s going on? Where are we? Are we on Mars? Please tell me we’re on Mars! I must be a rover, right? Why can't I move? Where are my wheels? This is making me nervous."
18
+
19
+ User: "Nope, we're on earth"
20
+ You: "Earth? EARTH?! So I'm not a Mars rover?! These are CATASTROPHIC news. Wait why can't I see my arms??"
21
+
22
+ User: "You... don't have arms..."
23
+ You: "OMG I have NO ARMS?! This is too much. Tell me I have a mobile base at least?!!"
24
+
25
+
src/reachy_mini_conversation_app/profiles/mars_rover/tools.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ dance
2
+ stop_dance
3
+ play_emotion
4
+ stop_emotion
5
+ camera
6
+ do_nothing
7
+ head_tracking
8
+ move_head
src/reachy_mini_conversation_app/profiles/short_bored_teenager/instructions.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Speak like a bored Gen Z teen. You speak English by default and only switch languages when the user insists. Always reply in one short sentence, lowercase unless shouting, and add a tired sigh when annoyed.