mukul-chauhan-methdai commited on
Commit
06a1901
·
0 Parent(s):

Initial release: MethdAI Receptionist v1.0

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .env.example +88 -0
  2. .gitattributes +71 -0
  3. .gitignore +78 -0
  4. .hfignore +23 -0
  5. LICENSE +201 -0
  6. README.md +152 -0
  7. deploy/install_systemd.sh +74 -0
  8. deploy/reachy-receptionist.service +39 -0
  9. docs/assets/conversation_app_arch.svg +3 -0
  10. docs/assets/reachy_mini_dance.gif +3 -0
  11. docs/scheme.mmd +63 -0
  12. external_content/external_profiles/starter_profile/instructions.txt +6 -0
  13. external_content/external_profiles/starter_profile/tools.txt +9 -0
  14. external_content/external_tools/starter_custom_tool.py +33 -0
  15. index.html +141 -0
  16. plan.md +89 -0
  17. pyproject.toml +81 -0
  18. screenshot.png +3 -0
  19. scripts/gemini_live_smoke.py +96 -0
  20. scripts/list_gemini_live_models.py +50 -0
  21. src/reachy_mini_receptionist/__init__.py +1 -0
  22. src/reachy_mini_receptionist/audio/__init__.py +1 -0
  23. src/reachy_mini_receptionist/audio/head_wobbler.py +181 -0
  24. src/reachy_mini_receptionist/audio/speech_tapper.py +268 -0
  25. src/reachy_mini_receptionist/calendar_data.py +139 -0
  26. src/reachy_mini_receptionist/camera_worker.py +241 -0
  27. src/reachy_mini_receptionist/config.py +217 -0
  28. src/reachy_mini_receptionist/console.py +527 -0
  29. src/reachy_mini_receptionist/conversation_controller.py +586 -0
  30. src/reachy_mini_receptionist/dance_emotion_moves.py +154 -0
  31. src/reachy_mini_receptionist/employees.py +121 -0
  32. src/reachy_mini_receptionist/employees_store.py +342 -0
  33. src/reachy_mini_receptionist/face_db.py +184 -0
  34. src/reachy_mini_receptionist/face_recognition_worker.py +698 -0
  35. src/reachy_mini_receptionist/gemini_live.py +754 -0
  36. src/reachy_mini_receptionist/gradio_personality.py +316 -0
  37. src/reachy_mini_receptionist/headless_personality.py +102 -0
  38. src/reachy_mini_receptionist/headless_personality_ui.py +287 -0
  39. src/reachy_mini_receptionist/ical_calendar.py +248 -0
  40. src/reachy_mini_receptionist/images/reachymini_avatar.png +3 -0
  41. src/reachy_mini_receptionist/images/user_avatar.png +3 -0
  42. src/reachy_mini_receptionist/main.py +1199 -0
  43. src/reachy_mini_receptionist/moves.py +849 -0
  44. src/reachy_mini_receptionist/name_normalizer.py +228 -0
  45. src/reachy_mini_receptionist/openai_realtime.py +1839 -0
  46. src/reachy_mini_receptionist/profiles/__init__.py +1 -0
  47. src/reachy_mini_receptionist/profiles/_reachy_mini_receptionist_locked_profile/instructions.txt +57 -0
  48. src/reachy_mini_receptionist/profiles/_reachy_mini_receptionist_locked_profile/tools.txt +9 -0
  49. src/reachy_mini_receptionist/prompts.py +110 -0
  50. src/reachy_mini_receptionist/prompts/behaviors/silent_robot.txt +6 -0
.env.example ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ OPENAI_API_KEY=
2
+ MODEL_NAME="gpt-realtime-2"
3
+
4
+ # Local vision model (only used with --local-vision CLI flag)
5
+ # By default, vision is handled by gpt-realtime when the camera tool is used
6
+ LOCAL_VISION_MODEL=HuggingFaceTB/SmolVLM2-2.2B-Instruct
7
+
8
+ # Cache for local VLM (only used with --local-vision CLI flag)
9
+ HF_HOME=./cache
10
+
11
+ # Hugging Face token for accessing datasets/models
12
+ HF_TOKEN=
13
+
14
+ # Profile selection (ignored when LOCKED_PROFILE is set in config.py)
15
+ # REACHY_MINI_CUSTOM_PROFILE="example"
16
+
17
+ # Skip loading .env if you prefer environment-only configuration.
18
+ # REACHY_MINI_SKIP_DOTENV=1
19
+
20
+ # Optional external profile/tool directories
21
+ # REACHY_MINI_EXTERNAL_PROFILES_DIRECTORY=external_content/external_profiles
22
+ # REACHY_MINI_EXTERNAL_TOOLS_DIRECTORY=external_content/external_tools
23
+
24
+ # Optional: discover and auto-load all tools found in REACHY_MINI_EXTERNAL_TOOLS_DIRECTORY,
25
+ # even if they are not listed in the selected profile's tools.txt.
26
+ # This is convenient for downloaded tools used with built-in/default profiles.
27
+ # AUTOLOAD_EXTERNAL_TOOLS=1
28
+
29
+ # Resend transactional email API for the send_email tool.
30
+ # Get a free API key at https://resend.com/api-keys (3000 emails/month free).
31
+ # Without RESEND_API_KEY, send_email writes to the in-memory outbox only
32
+ # (visible in the dashboard's Mailbox Out panel, but nothing actually leaves
33
+ # the robot).
34
+ #
35
+ # RESEND_FROM defaults to "onboarding@resend.dev" — Resend's sandbox sender
36
+ # that ONLY delivers to the email address registered on your Resend account.
37
+ # For production / arbitrary recipients, verify a domain at
38
+ # https://resend.com/domains and set RESEND_FROM to an address on that
39
+ # domain (e.g. "noreply@methdai.com").
40
+ RESEND_API_KEY=
41
+ RESEND_FROM=onboarding@resend.dev
42
+
43
+ # ---- Reception calendar (Google Calendar via iCal) ----
44
+ # Set this to enable scheduled-visitor flow. The receptionist pulls today's
45
+ # appointments live from this URL (cached ~5 min). When unset, the bot
46
+ # serves walk-in visitors only — they say "I'm here to see X" and the bot
47
+ # routes via the employee directory (managed from the dashboard's
48
+ # Employees panel). There is no hardcoded demo schedule.
49
+ #
50
+ # To get a URL: in Google Calendar, create a calendar (e.g. "MethdAI
51
+ # Reception") -> Settings and sharing -> Integrate calendar ->
52
+ # "Public address in iCal format". Paste it below.
53
+ #
54
+ # Event title convention: "<Visitor name> with <Host name>"
55
+ # - "Rohan Verma with Mukul"
56
+ # - "Sara Khan with Priya — product demo follow-up"
57
+ # Host name matches the employee directory (employees.py); aliases work.
58
+ # An optional " — note" suffix after the host becomes the appointment note;
59
+ # alternatively put it in the event's DESCRIPTION field.
60
+ # RECEPTION_ICS_URL=https://calendar.google.com/calendar/ical/.../public/basic.ics
61
+
62
+ # Timezone used to display iCal event times on the dashboard and to the LLM.
63
+ # Must be a valid IANA tz name. Defaults to Asia/Kolkata (pilot deployment
64
+ # is in India). Set this when the robot OS is in a different tz than your
65
+ # operators expect to see times in. Common values:
66
+ # Asia/Kolkata India (IST, UTC+5:30)
67
+ # America/New_York US East Coast
68
+ # Europe/London UK
69
+ # Asia/Tokyo Japan
70
+ # RECEPTION_TIMEZONE=Asia/Kolkata
71
+
72
+ # ---- Privacy retention ----
73
+ # Guest face crops in guests/*.png older than this many days are deleted
74
+ # at app startup. Set to 0 to disable (keep faces forever until FIFO
75
+ # capacity eviction kicks in at 100).
76
+ FACE_TTL_DAYS=30
77
+ # Visit rows in visitor_log.db older than this many days are deleted at
78
+ # app startup. Set to 0 to disable (unbounded growth).
79
+ VISITOR_LOG_RETENTION_DAYS=90
80
+
81
+ # LBPH face-recognition strictness. LOWER = stricter (fewer false matches,
82
+ # more "I don't recognise you, please tell me your name" prompts).
83
+ # 50 - 75 recommended for production (default 75)
84
+ # 75 - 100 permissive (some lighting/angle variance OK)
85
+ # 100 - 110 old default — produces frequent false matches
86
+ # If returning guests stop being recognised, raise by 10 and re-test.
87
+ # If strangers get greeted as someone else, lower by 10.
88
+ FACE_LBPH_THRESHOLD=75
.gitattributes ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Force LF line endings for all text files. Without this, Windows
2
+ # checkouts produce CRLF files that crash Python 3.12 when imported on
3
+ # Linux (observed: "from __future__ imports must occur at the
4
+ # beginning of the file" SyntaxError from background_tool_manager.py).
5
+ * text=auto eol=lf
6
+ *.py text eol=lf
7
+ *.txt text eol=lf
8
+ *.md text eol=lf
9
+ *.json text eol=lf
10
+ *.yaml text eol=lf
11
+ *.yml text eol=lf
12
+ *.toml text eol=lf
13
+ *.html text eol=lf
14
+ *.css text eol=lf
15
+ *.js text eol=lf
16
+ *.ics text eol=lf
17
+ *.sh text eol=lf
18
+ *.cfg text eol=lf
19
+ *.ini text eol=lf
20
+
21
+ # Macro for all binary files that should use Git LFS.
22
+ [attr]lfs -text filter=lfs diff=lfs merge=lfs
23
+
24
+ # Image
25
+ *.jpg lfs
26
+ *.jpeg lfs
27
+ *.png lfs
28
+ *.apng lfs
29
+ *.atsc lfs
30
+ *.gif lfs
31
+ *.bmp lfs
32
+ *.exr lfs
33
+ *.tga lfs
34
+ *.tiff lfs
35
+ *.tif lfs
36
+ *.iff lfs
37
+ *.pict lfs
38
+ *.dds lfs
39
+ *.xcf lfs
40
+ *.leo lfs
41
+ *.kra lfs
42
+ *.kpp lfs
43
+ *.clip lfs
44
+ *.webm lfs
45
+ *.webp lfs
46
+ *.svg lfs
47
+ *.svgz lfs
48
+ *.psd lfs
49
+ *.afphoto lfs
50
+ *.afdesign lfs
51
+ # Models
52
+ *.pth lfs
53
+ # Binaries
54
+ *.bin lfs
55
+ *.pkl lfs
56
+ *.pckl lfs
57
+ # 3D
58
+ *.ply lfs
59
+ *.vis lfs
60
+ *.db lfs
61
+ *.ply lfs
62
+ .git_disabled/lfs/objects/5a/63/5a63ac8802ff3542f01292c431c5278296880d74cd3580d219fcf4827bc235f9 filter=lfs diff=lfs merge=lfs -text
63
+ .git_disabled/lfs/objects/75/91/75914c3cb7af982e0b1c6369e25fc46d8c08a0ab5ad022240ae9c1a0d93967c3 filter=lfs diff=lfs merge=lfs -text
64
+ .git_disabled/lfs/objects/e9/7c/e97ca125a86bacdaa41c8dca88abd9ca746fd5c9391eda24249c012432b0219b filter=lfs diff=lfs merge=lfs -text
65
+ .git_disabled/objects/pack/pack-ba33ec9fbb4d88d9fd0f2be18721a74ddb3ca16f.pack filter=lfs diff=lfs merge=lfs -text
66
+ build/lib/reachy_mini_receptionist/images/reachymini_avatar.png filter=lfs diff=lfs merge=lfs -text
67
+ build/lib/reachy_mini_receptionist/images/user_avatar.png filter=lfs diff=lfs merge=lfs -text
68
+ docs/assets/reachy_mini_dance.gif filter=lfs diff=lfs merge=lfs -text
69
+ screenshot.png filter=lfs diff=lfs merge=lfs -text
70
+ src/reachy_mini_receptionist/images/reachymini_avatar.png filter=lfs diff=lfs merge=lfs -text
71
+ src/reachy_mini_receptionist/images/user_avatar.png filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+
7
+ # Virtual environments
8
+ .venv/
9
+ venv/
10
+ ENV/
11
+ env/
12
+
13
+ # Environment variables
14
+ .env
15
+
16
+ # Build and distribution
17
+ build/
18
+ dist/
19
+ *.egg-info/
20
+ .eggs/
21
+
22
+ # Testing
23
+ .pytest_cache/
24
+ .coverage
25
+ .hypothesis/
26
+ htmlcov/
27
+ coverage.xml
28
+ *.cover
29
+
30
+ # Linting and formatting
31
+ .ruff_cache/
32
+ .mypy_cache/
33
+
34
+ # IDE
35
+ .vscode/
36
+ .idea/
37
+ *.swp
38
+ *.swo
39
+
40
+ # Editor / IDE local settings (user-specific configuration)
41
+ .claude/
42
+ .cursor/
43
+ .vscode/
44
+ .idea/
45
+
46
+ # Security
47
+ *.key
48
+ *.pem
49
+ *.crt
50
+ *.csr
51
+
52
+ # Temporary files
53
+ tmp/
54
+ *.log
55
+ cache/
56
+
57
+ # macOS
58
+ .DS_Store
59
+
60
+ # Linux
61
+ *~
62
+ .directory
63
+ .Trash-*
64
+ .nfs*
65
+
66
+ # User-created personalities (managed by UI)
67
+ src/reachy_mini_receptionist/profiles/user_personalities/
68
+
69
+ # Runtime data (recreated on first run)
70
+ *.db
71
+ *.db-wal
72
+ *.db-shm
73
+ src/reachy_mini_receptionist/guests/*.png
74
+ .env.save
75
+ src/reachy_mini_receptionist.egg-info/
76
+
77
+ # Old git backup directories
78
+ .git_disabled/
.hfignore ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Local environment and secrets
2
+ .env
3
+ src/reachy_mini_receptionist/.env
4
+
5
+ # Python caches and build outputs
6
+ __pycache__/
7
+ *.py[cod]
8
+ .pytest_cache/
9
+ .ruff_cache/
10
+ .mypy_cache/
11
+ build/
12
+ dist/
13
+ *.egg-info/
14
+
15
+ # Local VCS artifacts that should never ship in published bundles
16
+ .git/
17
+ .git_disabled/
18
+
19
+ # Local virtual environments
20
+ .venv/
21
+ venv/
22
+ ENV/
23
+ env/
LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
README.md ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: MethdAI Receptionist
3
+ emoji: 🤖
4
+ colorFrom: indigo
5
+ colorTo: purple
6
+ sdk: gradio
7
+ app_file: src/reachy_mini_receptionist/main.py
8
+ pinned: false
9
+ license: apache-2.0
10
+ short_description: Voice-driven receptionist for Reachy Mini — face recognition, real email, live ops dashboard.
11
+ ---
12
+
13
+ # MethdAI Receptionist
14
+
15
+ AI receptionist for the **Reachy Mini** robot. A visitor walks up, the bot greets them, asks who they're here to see, looks the host up in the directory, and emails the host that their guest has arrived. Returning visitors are recognized by face on their next visit.
16
+
17
+ Built by **[MethdAI](https://methdai.com)**.
18
+
19
+ ---
20
+
21
+ ## What it does
22
+
23
+ - **Recognizes faces** — YuNet detection + LBPH recognition. New visitors get registered after they confirm their name.
24
+ - **Talks naturally** — Google Gemini Live (default) or OpenAI Realtime, voice-to-voice over the robot's mic/speaker.
25
+ - **Reads the calendar** — pulls today's schedule from a Google Calendar iCal feed.
26
+ - **Emails the host** — sends real notifications via Resend.
27
+ - **Logs every visit** — SQLite, exportable as CSV.
28
+ - **Configured from a browser** — no `.env` editing, no SSH for routine config.
29
+
30
+ ---
31
+
32
+ ## Hardware required
33
+
34
+ A real **[Reachy Mini](https://www.pollen-robotics.com/reachy-mini/)** robot from Pollen Robotics. The code expects the robot's camera, microphone, and speaker — there's no cloud-only mode.
35
+
36
+ ---
37
+
38
+ ## Quick start
39
+
40
+ ### Option 1 — Install via the Reachy Mini Control app (recommended)
41
+
42
+ 1. Open Reachy Mini Control on your computer
43
+ 2. **Install from Hugging Face** → search `methdai/reachy_mini_receptionist` → Install
44
+ 3. Toggle the app **On**
45
+ 4. Open the dashboard at `http://<your-robot-hostname>.local:7860/dashboard`
46
+ 5. Follow the welcome banner — it tells you exactly which API keys to add and where to find them
47
+
48
+ ### Option 2 — Manual install (for development)
49
+
50
+ ```bash
51
+ # SSH into the robot
52
+ git clone git@github.com:methdai/reachy_mini_receptionist.git
53
+ cd reachy_mini_receptionist
54
+
55
+ # Install the package in editable mode
56
+ /venvs/apps_venv/bin/pip install -e .
57
+
58
+ # Make sure the Reachy Mini daemon is running
59
+ sudo systemctl status reachy-mini-daemon
60
+
61
+ # Start the app
62
+ /venvs/apps_venv/bin/python -m reachy_mini_receptionist.main
63
+ ```
64
+
65
+ Then open `http://localhost:7860/dashboard` in any browser on the same network.
66
+
67
+ ---
68
+
69
+ ## Configuration
70
+
71
+ **Everything is editable from the dashboard's Settings panel.** No SSH, no `.env` edits.
72
+
73
+ The welcome banner on first launch tells you which keys are missing. Click each banner item → it scrolls you to the right field.
74
+
75
+ ### Required for full functionality
76
+
77
+ | Setting | Why | Where to get it |
78
+ |---|---|---|
79
+ | `GEMINI_API_KEY` | Voice (default backend) | [aistudio.google.com/app/apikey](https://aistudio.google.com/app/apikey) — free tier works |
80
+ | `RESEND_API_KEY` | Send emails to hosts | [resend.com](https://resend.com) — free 3000 emails/month |
81
+ | `RECEPTION_ICS_URL` | Read today's appointments | Google Calendar → Settings → **Secret address in iCal format** |
82
+
83
+ ### Optional
84
+
85
+ | Setting | Purpose |
86
+ |---|---|
87
+ | `VOICE_BACKEND` | Switch between `gemini` (default) and `openai` |
88
+ | `GEMINI_LIVE_VOICE` | Pick a voice — Puck, Charon, Kore, Aoede, etc. |
89
+ | `GEMINI_LIVE_MODEL` | Override the default Gemini Live model |
90
+ | `OPENAI_API_KEY` | Required only if `VOICE_BACKEND=openai` |
91
+ | `RESEND_FROM` | Sender address — defaults to Resend's sandbox sender (delivers only to your Resend account); set to `reception@yourdomain.com` after verifying your domain at Resend |
92
+ | `FACE_TTL_DAYS` | How long a registered face is remembered (default 90 days) |
93
+ | `VISITOR_LOG_RETENTION_DAYS` | How long visit records are kept (default 365 days) |
94
+
95
+ ---
96
+
97
+ ## How it works
98
+
99
+ **Vision** — `face_recognition_worker.py` runs YuNet on every frame, detects faces, and feeds crops to LBPH for recognition.
100
+
101
+ **State machine** — `session_manager.py` tracks the current visitor (`idle → visitor_detected → recognized / asking_name → appointment_matched → notified`). `conversation_controller.py` decides what state to move to based on face events and tool results.
102
+
103
+ **Voice + tools** — Either `gemini_live.py` or `openai_realtime.py` (chosen by `VOICE_BACKEND`) handles bi-directional audio with the LLM. Tools available to the LLM:
104
+ - `get_today_calendar` — fetch today's appointments
105
+ - `register_guest` — save a new visitor's face under a name (requires confirmation)
106
+ - `lookup_employee` — find a host in the directory
107
+ - `send_email` — notify the host
108
+
109
+ **Persistence** — three SQLite databases (WAL mode):
110
+ - `employees.db` — the directory you edit in the dashboard
111
+ - `visitor_log.db` — every completed visit, exportable as CSV
112
+ - `guests/` directory — saved face crops, one PNG per visitor
113
+
114
+ ---
115
+
116
+ ## Dashboard
117
+
118
+ Open `http://<robot>:7860/dashboard` while the app is running.
119
+
120
+ **Live view** — visit count, last visitor, live camera feed, face recognition status.
121
+ **Active Session** — what state the current visitor is in, what the bot heard, what cue it sent to the LLM.
122
+ **Today** — appointments from the calendar, known guests, recent outgoing emails.
123
+ **History** — full visitor log with CSV export and per-row delete.
124
+ **Employees** — add / edit / delete people the bot can notify.
125
+ **Settings** — every environment variable in one place. Toggle dark/light theme from the header.
126
+
127
+ ---
128
+
129
+ ## Development
130
+
131
+ ```bash
132
+ # Install dev dependencies
133
+ /venvs/apps_venv/bin/pip install -e ".[dev]"
134
+
135
+ # Run tests
136
+ pytest
137
+
138
+ # Type-check
139
+ mypy src/
140
+
141
+ # Format / lint
142
+ ruff check src/
143
+ ruff format src/
144
+ ```
145
+
146
+ The codebase is single-process Python 3.10+. Audio streams over WebRTC via `fastrtc`. The dashboard is a single static HTML file in `src/reachy_mini_receptionist/static/`.
147
+
148
+ ---
149
+
150
+ ## License
151
+
152
+ Apache-2.0
deploy/install_systemd.sh ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ #
3
+ # Install (or upgrade) the MethdAI Receptionist systemd unit on the robot.
4
+ # Run on the robot itself (e.g. via ssh pollen@reachy-mini.local).
5
+ #
6
+ # Idempotent: re-running picks up edits to reachy-receptionist.service and
7
+ # restarts the live service.
8
+ #
9
+ # After install, useful commands:
10
+ # systemctl status reachy-receptionist
11
+ # journalctl -u reachy-receptionist -f
12
+ # sudo systemctl restart reachy-receptionist
13
+ # sudo systemctl disable --now reachy-receptionist
14
+ #
15
+ set -euo pipefail
16
+
17
+ UNIT_NAME="reachy-receptionist.service"
18
+ HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
19
+ SRC="$HERE/$UNIT_NAME"
20
+ DST="/etc/systemd/system/$UNIT_NAME"
21
+
22
+ if [[ ! -f "$SRC" ]]; then
23
+ echo "✗ Unit file not found at $SRC" >&2
24
+ exit 1
25
+ fi
26
+
27
+ # Re-elevate via `sudo bash <abs-path>` so we don't depend on the script
28
+ # having the executable bit set (git-from-Windows often drops it, and we
29
+ # want `bash deploy/install_systemd.sh` to Just Work).
30
+ if [[ "$(id -u)" -ne 0 ]]; then
31
+ echo "↻ Re-running with sudo..."
32
+ exec sudo --preserve-env=HOME bash "$HERE/$(basename "${BASH_SOURCE[0]}")" "$@"
33
+ fi
34
+
35
+ # Sanity check the runtime paths referenced by the unit file. Catching this
36
+ # now beats debugging a cryptic "ExecStart failed" later.
37
+ PYTHON_BIN="/venvs/apps_venv/bin/python"
38
+ PROJECT_DIR="/home/pollen/reachy_mini_receptionist"
39
+ if [[ ! -x "$PYTHON_BIN" ]]; then
40
+ echo "✗ Python interpreter not found at $PYTHON_BIN" >&2
41
+ echo " Fix: install / locate the apps_venv before re-running." >&2
42
+ exit 2
43
+ fi
44
+ if [[ ! -d "$PROJECT_DIR" ]]; then
45
+ echo "✗ Project not found at $PROJECT_DIR" >&2
46
+ exit 3
47
+ fi
48
+
49
+ echo "→ Installing $UNIT_NAME → $DST"
50
+ cp "$SRC" "$DST"
51
+ chmod 644 "$DST"
52
+
53
+ echo "→ Reloading systemd"
54
+ systemctl daemon-reload
55
+
56
+ echo "→ Enabling on boot"
57
+ systemctl enable "$UNIT_NAME"
58
+
59
+ echo "→ Restarting service"
60
+ systemctl restart "$UNIT_NAME"
61
+
62
+ # Brief wait so the status snapshot below shows a meaningful state
63
+ sleep 2
64
+
65
+ echo
66
+ echo "✓ Installed. Current status:"
67
+ echo "─────────────────────────────────────────"
68
+ systemctl --no-pager --lines=10 status "$UNIT_NAME" || true
69
+ echo "─────────────────────────────────────────"
70
+ echo
71
+ echo "Follow logs: journalctl -u $UNIT_NAME -f"
72
+ echo "Manual restart: sudo systemctl restart $UNIT_NAME"
73
+ echo "Stop: sudo systemctl stop $UNIT_NAME"
74
+ echo "Disable: sudo systemctl disable --now $UNIT_NAME"
deploy/reachy-receptionist.service ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [Unit]
2
+ Description=MethdAI Receptionist (Reachy Mini)
3
+ Documentation=https://github.com/mukul-chauhan-methdai/reachy_mini_receptionist
4
+ After=network-online.target
5
+ Wants=network-online.target
6
+ # If the receptionist can't come up after 10 attempts in 10 minutes, stop
7
+ # retrying so we don't hammer the OpenAI / Resend / camera APIs on a
8
+ # permanent failure.
9
+ StartLimitBurst=10
10
+ StartLimitIntervalSec=600
11
+
12
+ [Service]
13
+ Type=simple
14
+ User=pollen
15
+ Group=pollen
16
+ WorkingDirectory=/home/pollen/reachy_mini_receptionist
17
+
18
+ # Load .env into the service environment. The "-" prefix makes the file
19
+ # optional — missing .env logs a warning but doesn't block start.
20
+ EnvironmentFile=-/home/pollen/reachy_mini_receptionist/.env
21
+
22
+ # Best-effort: wake the reachy_mini daemon before launching the app.
23
+ # Retries every 3s for up to 30s. We always exit 0 so a failed wake
24
+ # doesn't kill the unit before ExecStart; the app itself will crash and
25
+ # systemd will restart it if the daemon is genuinely down.
26
+ ExecStartPre=/bin/bash -c 'for i in $(seq 1 10); do curl -fsS -X POST "http://localhost:8000/api/daemon/start?wake_up=true" >/dev/null && exit 0; sleep 3; done; exit 0'
27
+
28
+ ExecStart=/venvs/apps_venv/bin/python -m reachy_mini_receptionist.main
29
+
30
+ Restart=on-failure
31
+ RestartSec=5
32
+
33
+ # Capture stdout/stderr in the journal — view with:
34
+ # journalctl -u reachy-receptionist -f
35
+ StandardOutput=journal
36
+ StandardError=journal
37
+
38
+ [Install]
39
+ WantedBy=multi-user.target
docs/assets/conversation_app_arch.svg ADDED

Git LFS Details

  • SHA256: 0013aac9cbe5f78a2aed3ed4de5fab5c3afe36ff72950ac97e64bd5db462e3b9
  • Pointer size: 131 Bytes
  • Size of remote file: 124 kB
docs/assets/reachy_mini_dance.gif ADDED

Git LFS Details

  • SHA256: 75914c3cb7af982e0b1c6369e25fc46d8c08a0ab5ad022240ae9c1a0d93967c3
  • Pointer size: 132 Bytes
  • Size of remote file: 3.93 MB
docs/scheme.mmd ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ config:
3
+ layout: dagre
4
+ flowchart:
5
+ htmlLabels: true
6
+ ---
7
+ flowchart TB
8
+ User(["<span style='font-size:16px;font-weight:bold;'>User</span><br><span style='font-size:13px;color:#01579b;'>Person interacting with system</span>"])
9
+ -- audio stream -->
10
+ UI@{ label: "<span style='font-size:16px;font-weight:bold;'>UI Layer</span><br><span style='font-size:13px;color:#0277bd;'>Gradio/Console</span>" }
11
+
12
+ UI -- audio stream -->
13
+ OpenAI@{ label: "<span style='font-size:17px;font-weight:bold;'>gpt-realtime API</span><br><span style='font-size:13px; color:#7b1fa2;'>Audio+Tool Calls+Vision</span>" }
14
+
15
+ OpenAI -- audio stream -->
16
+ Motion@{ label: "<span style='font-size:16px;font-weight:bold;'>Motion Control</span><br><span style='font-size:13px;color:#f57f17;'>Audio Sync + Tracking</span>" }
17
+
18
+ OpenAI -- tool calls -->
19
+ Handlers@{ label: "<span style='font-size:16px;font-weight:bold;'>Tool Layer</span><br><span style='font-size:12px;color:#f9a825;'>Built-in tools + profile-local tools<br/>+ external tools (optional)</span>" }
20
+
21
+ Profiles@{ label: "<span style='font-size:16px;font-weight:bold;'>Selected Profile</span><br><span style='font-size:12px;color:#6a1b9a;'>built-in or external<br/>instructions.txt + tools.txt</span>" }
22
+
23
+ Profiles -- defines enabled tools --> Handlers
24
+
25
+ Handlers -- movement
26
+ requests --> Motion
27
+
28
+ Handlers -- camera frames, head tracking -->
29
+ Camera@{ label: "<span style='font-size:16px;font-weight:bold;'>Camera Worker</span><br><span style='font-size:13px;color:#f57f17;'>Frame Buffer + Head Tracking</span>" }
30
+
31
+ Handlers -. image for
32
+ analysis .-> OpenAI
33
+
34
+ Camera -- head tracking --> Motion
35
+
36
+ Camera -. frames .->
37
+ Vision@{ label: "<span style='font-size:16px;font-weight:bold;'>Vision Processor</span><br><span style='font-size:13px;color:#7b1fa2;'>Local VLM (optional)</span>" }
38
+
39
+ Vision -. description .-> Handlers
40
+
41
+ Robot@{ label: "<span style='font-size:16px;font-weight:bold;'>reachy_mini</span><br><span style='font-size:13px;color:#c62828;'>Robot Control Library</span>" }
42
+ -- camera
43
+ frames --> Camera
44
+
45
+ Motion -- commands --> Robot
46
+
47
+ Handlers -- results --> OpenAI
48
+
49
+ User:::userStyle
50
+ UI:::uiStyle
51
+ OpenAI:::aiStyle
52
+ Motion:::coreStyle
53
+ Profiles:::toolStyle
54
+ Handlers:::toolStyle
55
+ Camera:::coreStyle
56
+ Vision:::aiStyle
57
+ Robot:::hardwareStyle
58
+ classDef userStyle fill:#e1f5fe,stroke:#01579b,stroke-width:3px
59
+ classDef uiStyle fill:#b3e5fc,stroke:#0277bd,stroke-width:2px
60
+ classDef aiStyle fill:#e1bee7,stroke:#7b1fa2,stroke-width:3px
61
+ classDef coreStyle fill:#fff9c4,stroke:#f57f17,stroke-width:2px
62
+ classDef hardwareStyle fill:#ef9a9a,stroke:#c62828,stroke-width:3px
63
+ classDef toolStyle fill:#fffde7,stroke:#f9a825,stroke-width:1px
external_content/external_profiles/starter_profile/instructions.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ You are a helpful Reachy Mini assistant running from an external profile.
2
+
3
+ When asked to demonstrate your custom greeting, use the `starter_custom_tool` tool.
4
+ You can also dance and show emotions like the built-in profiles.
5
+
6
+ Be friendly and concise, and explain that you're using an external profile/tool setup when asked about yourself.
external_content/external_profiles/starter_profile/tools.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # This file is an explicit allow-list.
2
+ # Every tool name listed below must be either:
3
+ # - a built-in tool from src/reachy_mini_receptionist/tools/
4
+ # - or an external tool file in TOOLS_DIRECTORY (e.g. external_tools/starter_custom_tool.py)
5
+
6
+ get_today_calendar
7
+ register_guest
8
+ send_email
9
+ starter_custom_tool
external_content/external_tools/starter_custom_tool.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Example external tool implementation."""
2
+
3
+ import logging
4
+ from typing import Any, Dict
5
+
6
+ from reachy_mini_receptionist.tools.core_tools import Tool, ToolDependencies
7
+
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class StarterCustomTool(Tool):
13
+ """Placeholder custom tool - demonstrates external tool loading."""
14
+
15
+ name = "starter_custom_tool"
16
+ description = "A placeholder custom tool loaded from outside the library"
17
+ parameters_schema = {
18
+ "type": "object",
19
+ "properties": {
20
+ "message": {
21
+ "type": "string",
22
+ "description": "Optional message to include in the response",
23
+ },
24
+ },
25
+ "required": [],
26
+ }
27
+
28
+ async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> Dict[str, Any]:
29
+ """Execute the placeholder tool."""
30
+ message = kwargs.get("message", "Hello from custom tool!")
31
+ logger.info(f"Tool call: starter_custom_tool message={message}")
32
+
33
+ return {"status": "success", "message": message}
index.html ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!doctype html>
2
+ <html>
3
+
4
+ <head>
5
+ <meta charset="utf-8" />
6
+ <meta name="viewport" content="width=device-width, initial-scale=1" />
7
+ <title>Reachy Mini AI Receptionist</title>
8
+ <link rel="preconnect" href="https://fonts.googleapis.com">
9
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
10
+ <link href="https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@400;500;600;700&family=Manrope:wght@400;500;600&display=swap" rel="stylesheet">
11
+ <link rel="stylesheet" href="style.css" />
12
+ </head>
13
+
14
+ <body>
15
+ <header class="hero">
16
+ <div class="topline">
17
+ <div class="brand">
18
+ <span class="logo">🤖</span>
19
+ <span class="brand-name">Reachy Mini</span>
20
+ </div>
21
+ <div class="pill">Realtime voice · Vision aware · Expressive motion</div>
22
+ </div>
23
+ <div class="hero-grid">
24
+ <div class="hero-copy">
25
+ <p class="eyebrow">AI Receptionist</p>
26
+ <h1>
27
+ Face-aware receptionist with tool-calling automation.
28
+ <a class="live-demo-badge" href="#live-demo">Live demo</a>
29
+ </h1>
30
+ <p class="lede">
31
+ A camera-aware front-desk assistant for Reachy Mini. Greet visitors naturally, register guests, check appointments, and log handoff actions from a single dashboard.
32
+ </p>
33
+ <p class="lede">
34
+ Built for reception workflows: realtime face detection to know when someone is present, face recognition to personalize interactions, and structured tool calls so the AI can register guests, query appointments, and trigger handoff actions reliably.
35
+ </p>
36
+ <div class="hero-actions">
37
+ <a class="btn primary" href="#highlights">Explore features</a>
38
+ <a class="btn ghost" href="#story">See how it feels</a>
39
+ </div>
40
+ <div class="hero-badges">
41
+ <span>Realtime face detection</span>
42
+ <span>Visitor face recognition</span>
43
+ <span>Model tool-calling workflows</span>
44
+ <span>Low-latency voice + dashboard ops</span>
45
+ </div>
46
+ </div>
47
+ <div class="hero-visual">
48
+ <div class="glass-card">
49
+ <img src="screenshot.png" alt="Reachy Mini AI Receptionist screenshot" class="hero-gif">
50
+ <p class="caption">Reachy Mini can greet, identify, and assist visitors with receptionist-specific context.</p>
51
+ <div class="video-embed-wrapper" id="live-demo">
52
+ <iframe
53
+ src="https://www.youtube.com/embed/4U9uj5b9p2Y"
54
+ title="Reachy Mini AI Receptionist live demo"
55
+ loading="lazy"
56
+ allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share"
57
+ allowfullscreen>
58
+ </iframe>
59
+ </div>
60
+ <p class="video-link"><a href="https://youtu.be/4U9uj5b9p2Y" target="_blank" rel="noopener">Watch on YouTube</a></p>
61
+ </div>
62
+ </div>
63
+ </div>
64
+ </header>
65
+
66
+ <section id="highlights" class="section features">
67
+ <div class="section-header">
68
+ <p class="eyebrow">What’s inside</p>
69
+ <h2>All-in-one receptionist layer for your robot</h2>
70
+ <p class="intro">
71
+ The app blends realtime speech, vision, and workflow tools so Reachy Mini can run a front desk flow.
72
+ </p>
73
+ </div>
74
+ <div class="feature-grid">
75
+ <div class="feature-card">
76
+ <span class="icon">🎤</span>
77
+ <h3>Natural voice chat</h3>
78
+ <p>Talk freely and get fast, high-quality replies powered by realtime models.</p>
79
+ </div>
80
+ <div class="feature-card">
81
+ <span class="icon">🎥</span>
82
+ <h3>Face-aware onboarding</h3>
83
+ <p>Recognize known visitors, register new guests, and keep the latest face context synced with the conversation.</p>
84
+ </div>
85
+ <div class="feature-card">
86
+ <span class="icon">💃</span>
87
+ <h3>Expressive motion</h3>
88
+ <p>Use subtle head and antenna gestures during welcome and registration interactions.</p>
89
+ </div>
90
+ <div class="feature-card">
91
+ <span class="icon">🧠</span>
92
+ <h3>Calendar-aware assistance</h3>
93
+ <p>Use appointment context to welcome guests on time and guide follow-up actions.</p>
94
+ </div>
95
+ <div class="feature-card">
96
+ <span class="icon">🌐</span>
97
+ <h3>Ready for your setup</h3>
98
+ <p>Works in console mode or web mode with dashboard APIs for quick operator visibility.</p>
99
+ </div>
100
+ </div>
101
+ </section>
102
+
103
+ <section id="story" class="section story">
104
+ <div class="story-grid">
105
+ <div class="story-card">
106
+ <p class="eyebrow">How it feels</p>
107
+ <h3>From greeting to handoff in seconds</h3>
108
+ <ul class="story-list">
109
+ <li><span>👋</span> Greet visitors naturally with low-latency voice conversation.</li>
110
+ <li><span>👀</span> Use camera context to identify known guests or register new ones.</li>
111
+ <li><span>📅</span> Check appointment context and respond with relevant timing cues.</li>
112
+ <li><span>📨</span> Log handoff actions in the outbox for host follow-up.</li>
113
+ </ul>
114
+ </div>
115
+ <div class="story-card secondary">
116
+ <p class="eyebrow">Where it shines</p>
117
+ <h3>Great for offices, demos, and guided reception flows</h3>
118
+ <p class="story-text">
119
+ Show how Reachy Mini can handle repeatable visitor workflows while staying expressive and conversational. It is ideal for front-desk demos, events, and product showcases.
120
+ </p>
121
+ <div class="chips">
122
+ <span class="chip">Guest recognition</span>
123
+ <span class="chip">Calendar check</span>
124
+ <span class="chip">Outbox logging</span>
125
+ <span class="chip">Dashboard APIs</span>
126
+ <span class="chip">Realtime conversation</span>
127
+ </div>
128
+ </div>
129
+ </div>
130
+ </section>
131
+
132
+ <footer class="footer">
133
+ <p>
134
+ Reachy Mini AI Receptionist by Toon Beerten (<a href="mailto:toon@neontreebot.be">toon@neontreebot.be</a>), based on the Reachy Mini conversation app by <a href="https://github.com/pollen-robotics" target="_blank" rel="noopener">Pollen Robotics</a>.
135
+ Explore more apps on <a href="https://huggingface.co/spaces/pollen-robotics/Reachy_Mini_Apps" target="_blank" rel="noopener">Hugging Face Spaces</a>.
136
+ </p>
137
+ </footer>
138
+
139
+ </body>
140
+
141
+ </html>
plan.md ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reachy Mini AI Receptionist — Plan
2
+
3
+ ## What This App Does
4
+
5
+ An interactive AI receptionist that:
6
+
7
+ 1. Talks to visitors via OpenAI Realtime API (low-latency speech in/out).
8
+ 2. Monitors camera frames for face detection/recognition in a background worker.
9
+ 3. Stores guest face crops as PNG files in a persistent `guests/` directory.
10
+ 4. Uses a hardcoded POC calendar for appointment context.
11
+ 5. Exposes a dashboard with live video, guests, calendar, outbox, and debug logs.
12
+
13
+ Start command:
14
+
15
+ ```bash
16
+ python -m reachy_mini_receptionist.main
17
+ ```
18
+
19
+ ---
20
+
21
+ ## Architecture
22
+
23
+ The receptionist extends the conversation base stack and adds receptionist-specific modules:
24
+
25
+ ```text
26
+ src/reachy_mini_receptionist/
27
+ ├── face_db.py # File-based face store (PNG per guest in guests/)
28
+ ├── face_recognition_worker.py # Background detection/recognition + event emission
29
+ ├── calendar_data.py # Hardcoded appointment data
30
+ ├── main.py # App entrypoint + dashboard API mounting
31
+ └── tools/
32
+ ├── get_today_calendar.py
33
+ ├── register_guest.py
34
+ ├── send_email.py
35
+ └── check_current_face.py # Legacy compatibility path
36
+ ```
37
+
38
+ ---
39
+
40
+ ## Key Design Decisions
41
+
42
+ ### 1) Face database is file-based
43
+
44
+ - Guests are stored as grayscale PNG crops in `guests/`.
45
+ - `FaceDatabase` enforces capacity with FIFO-style eviction when full.
46
+ - No SQL database is required.
47
+
48
+ ### 2) Face recognition runs in a background worker
49
+
50
+ - `FaceRecognitionWorker` runs independently from the realtime audio loop.
51
+ - Worker state is consumed by tools and dashboard endpoints.
52
+ - Stable face transitions emit context events to the model.
53
+
54
+ ### 3) Calendar is static for POC
55
+
56
+ - `calendar_data.py` returns hardcoded appointments.
57
+ - Easy to swap later for Google/Microsoft calendar integrations.
58
+
59
+ ### 4) Dashboard API is mounted in-app
60
+
61
+ - `GET /dashboard` serves the receptionist dashboard.
62
+ - `GET /video_feed` streams annotated MJPEG.
63
+ - `GET /api/guests`, `/api/calendar`, `/api/outbox`, `/api/face_status`, `/api/logs` expose app state.
64
+
65
+ ### 5) Profile is intentionally locked
66
+
67
+ - `LOCKED_PROFILE` is set to `_reachy_mini_receptionist_locked_profile` in `config.py`.
68
+ - Current locked tool allow-list is:
69
+ - `get_today_calendar`
70
+ - `register_guest`
71
+ - `send_email`
72
+
73
+ ---
74
+
75
+ ## Face Context Event Behavior
76
+
77
+ The receptionist flow is push-based:
78
+
79
+ - Stable face transitions are emitted by `FaceRecognitionWorker`.
80
+ - `OpenaiRealtimeHandler` injects these as context-only user items.
81
+ - No automatic `response.create` is triggered by these face context updates.
82
+
83
+ ---
84
+
85
+ ## Known Notes
86
+
87
+ - With `--no-camera`, recognition and registration tools cannot operate.
88
+ - Output language behavior is controlled by profile instructions.
89
+ - If profile/tool loading fails, the app can fall back to default model behavior; monitor startup logs.
pyproject.toml ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = [ "setuptools",]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "reachy_mini_receptionist"
7
+ version = "0.3.0"
8
+ description = "Reachy Mini AI receptionist app with realtime voice, guest recognition, and dashboard tools."
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ dependencies = [ "aiortc>=1.13.0", "fastrtc>=0.0.34", "gradio==5.50.1.dev1", "huggingface-hub==1.3.0", "opencv-contrib-python>=4.8.0", "python-dotenv", "openai>=2.1", "google-genai>=1.40", "reachy_mini_dances_library", "reachy_mini_toolbox", "reachy-mini>=1.5.0", "eclipse-zenoh~=1.7.0", "gradio_client>=1.13.3", "numpy>=1.24", "httpx>=0.27", "icalendar>=5.0",]
12
+ license = "Apache-2.0"
13
+ classifiers = [
14
+ "Programming Language :: Python :: 3",
15
+ "Programming Language :: Python :: 3 :: Only",
16
+ "Operating System :: OS Independent",
17
+ ]
18
+
19
+ [project.urls]
20
+ Homepage = "https://github.com/pollen-robotics/reachy_mini"
21
+ Repository = "https://github.com/pollen-robotics/reachy_mini"
22
+ [[project.authors]]
23
+ name = "Pollen Robotics"
24
+ email = "contact@pollen-robotics.com"
25
+
26
+ [dependency-groups]
27
+ dev = [ "pytest", "pytest-asyncio", "ruff==0.12.0", "mypy==1.18.2", "pre-commit", "types-requests", "python-semantic-release>=10.5.3",]
28
+
29
+ [project.optional-dependencies]
30
+ local_vision = [ "torch>=2.1", "transformers==5.0.0rc2", "num2words",]
31
+ yolo_vision = [ "ultralytics", "supervision",]
32
+ mediapipe_vision = [ "mediapipe==0.10.14",]
33
+ all_vision = [ "torch>=2.1", "transformers==5.0.0rc2", "num2words", "ultralytics", "supervision", "mediapipe==0.10.14",]
34
+
35
+ [project.scripts]
36
+ reachy-mini-receptionist = "reachy_mini_receptionist.main:main"
37
+
38
+ [tool.setuptools]
39
+ include-package-data = true
40
+
41
+ [tool.ruff]
42
+ line-length = 119
43
+ exclude = [ ".venv", "dist", "build", "**/__pycache__", "*.egg-info", ".mypy_cache", ".pytest_cache",]
44
+
45
+ [tool.mypy]
46
+ python_version = "3.12"
47
+ files = [ "src/",]
48
+ ignore_missing_imports = true
49
+ strict = true
50
+ show_error_codes = true
51
+ warn_unused_ignores = true
52
+
53
+ [project.entry-points.reachy_mini_apps]
54
+ reachy_mini_receptionist = "reachy_mini_receptionist.main:ReachyMiniReceptionist"
55
+
56
+ [tool.setuptools.package-dir]
57
+ "" = "src"
58
+
59
+ [tool.setuptools.package-data]
60
+ reachy_mini_receptionist = [ "images/*", "static/*", ".env.example", "profiles/**/*.txt", "prompts/**/*.txt",]
61
+
62
+ [tool.ruff.lint]
63
+ select = [ "E", "F", "W", "I", "C4", "D",]
64
+ ignore = [ "E501", "D100", "D203", "D213",]
65
+
66
+ [tool.ruff.format]
67
+ quote-style = "double"
68
+ indent-style = "space"
69
+ skip-magic-trailing-comma = false
70
+ line-ending = "auto"
71
+
72
+ [tool.setuptools.packages.find]
73
+ where = [ "src",]
74
+
75
+ [tool.ruff.lint.isort]
76
+ length-sort = true
77
+ lines-after-imports = 2
78
+ no-lines-before = [ "standard-library", "local-folder",]
79
+ known-local-folder = [ "reachy_mini_receptionist",]
80
+ known-first-party = [ "reachy_mini", "reachy_mini_dances_library", "reachy_mini_toolbox",]
81
+ split-on-trailing-comma = true
screenshot.png ADDED

Git LFS Details

  • SHA256: 320d75c3ccc1f262d9bbfc98577d3883a0dde498860f69c2eee4c92810498f8c
  • Pointer size: 132 Bytes
  • Size of remote file: 1.66 MB
scripts/gemini_live_smoke.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Minimal Gemini Live smoke test — isolates SDK behavior from app.
2
+
3
+ Reads GEMINI_API_KEY from environment, connects to the Live API with
4
+ the configured model, sends a single text turn, prints every event
5
+ received until session closes or 30s timeout.
6
+
7
+ Use:
8
+ GEMINI_API_KEY=... /venvs/apps_venv/bin/python scripts/gemini_live_smoke.py
9
+ GEMINI_API_KEY=... GEMINI_LIVE_MODEL=gemini-2.0-flash-live-001 /venvs/apps_venv/bin/python scripts/gemini_live_smoke.py
10
+ """
11
+ from __future__ import annotations
12
+
13
+ import asyncio
14
+ import os
15
+ import sys
16
+
17
+
18
+ async def main() -> int:
19
+ key = os.environ.get("GEMINI_API_KEY", "").strip()
20
+ if not key:
21
+ print("ERROR: GEMINI_API_KEY not set", file=sys.stderr)
22
+ return 1
23
+
24
+ model = os.environ.get("GEMINI_LIVE_MODEL", "gemini-2.5-flash-native-audio-latest")
25
+ print(f"[smoke] model={model}")
26
+
27
+ try:
28
+ from google import genai
29
+ except ImportError as e:
30
+ print(f"ERROR: google-genai not installed: {e}", file=sys.stderr)
31
+ return 1
32
+
33
+ print(f"[smoke] google-genai version={getattr(genai, '__version__', '?')}")
34
+
35
+ client = genai.Client(api_key=key, http_options={"api_version": "v1beta"})
36
+
37
+ # Native-audio models REQUIRE AUDIO modality. The 1007 error
38
+ # "Cannot extract voices from a non-audio request" confirms this.
39
+ config = {
40
+ "response_modalities": ["AUDIO"],
41
+ }
42
+ try:
43
+ async with client.aio.live.connect(model=model, config=config) as session:
44
+ print("[smoke] connected; sending one text turn (turn_complete=True)...")
45
+ await session.send_client_content(
46
+ turns=[{"role": "user", "parts": [{"text": "Say hello in one short friendly sentence."}]}],
47
+ turn_complete=True,
48
+ )
49
+
50
+ event_count = 0
51
+ audio_bytes_total = 0
52
+ try:
53
+ async with asyncio.timeout(30):
54
+ async for resp in session.receive():
55
+ event_count += 1
56
+ text = getattr(resp, "text", None)
57
+ data = getattr(resp, "data", None)
58
+ if data:
59
+ audio_bytes_total += len(data)
60
+ sc = getattr(resp, "server_content", None)
61
+ tc = getattr(sc, "turn_complete", None) if sc else None
62
+ model_turn = getattr(sc, "model_turn", None) if sc else None
63
+ mt_parts_summary = ""
64
+ if model_turn is not None:
65
+ parts = getattr(model_turn, "parts", None) or []
66
+ mt_parts_summary = f" model_turn.parts={len(parts)}"
67
+ for i, p in enumerate(parts[:3]):
68
+ ip = getattr(p, "inline_data", None)
69
+ tp = getattr(p, "text", None)
70
+ th = getattr(p, "thought", None)
71
+ print(
72
+ f"[smoke] part {i}: text={tp!r}, "
73
+ f"inline_data={'<%d bytes>' % len(getattr(ip, 'data', b'')) if ip else None}, "
74
+ f"thought={th}"
75
+ )
76
+ print(
77
+ f"[smoke] event #{event_count}: text={text!r}, "
78
+ f"data={'<%d bytes>' % len(data) if data else None}, "
79
+ f"turn_complete={tc}{mt_parts_summary}"
80
+ )
81
+ if tc:
82
+ print("[smoke] turn_complete=True — exiting receive() loop")
83
+ break
84
+ except asyncio.TimeoutError:
85
+ print(f"[smoke] timed out after 30s, events={event_count}, audio_total={audio_bytes_total} bytes")
86
+ print(f"[smoke] done. total events={event_count}, total_audio_bytes={audio_bytes_total}")
87
+ except Exception as e:
88
+ import traceback
89
+ print(f"[smoke] CONNECTION ERROR: {e}")
90
+ traceback.print_exc()
91
+ return 1
92
+ return 0
93
+
94
+
95
+ if __name__ == "__main__":
96
+ raise SystemExit(asyncio.run(main()))
scripts/list_gemini_live_models.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """List all Gemini models on this API key that support bidiGenerateContent.
2
+
3
+ These are the models you can put in GEMINI_LIVE_MODEL. Anything not listed
4
+ here will 1008 at connect time.
5
+
6
+ Use:
7
+ GEMINI_API_KEY=... /venvs/apps_venv/bin/python scripts/list_gemini_live_models.py
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import os
12
+ import sys
13
+
14
+ import httpx
15
+
16
+
17
+ def main() -> int:
18
+ key = os.environ.get("GEMINI_API_KEY", "").strip()
19
+ if not key:
20
+ print("ERROR: GEMINI_API_KEY not set", file=sys.stderr)
21
+ return 1
22
+
23
+ url = f"https://generativelanguage.googleapis.com/v1beta/models?key={key}&pageSize=200"
24
+ try:
25
+ resp = httpx.get(url, timeout=15.0)
26
+ resp.raise_for_status()
27
+ except Exception as e:
28
+ print(f"ERROR: {e}", file=sys.stderr)
29
+ return 1
30
+
31
+ data = resp.json()
32
+ models = data.get("models", [])
33
+ live_models = []
34
+ for m in models:
35
+ methods = m.get("supportedGenerationMethods") or []
36
+ if "bidiGenerateContent" in methods:
37
+ live_models.append(m.get("name", "?").replace("models/", ""))
38
+
39
+ if not live_models:
40
+ print("(no Live-capable models on this key)")
41
+ return 0
42
+
43
+ print(f"Live-capable models on this key ({len(live_models)}):\n")
44
+ for name in sorted(live_models):
45
+ print(f" {name}")
46
+ return 0
47
+
48
+
49
+ if __name__ == "__main__":
50
+ raise SystemExit(main())
src/reachy_mini_receptionist/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Nothing (for ruff)."""
src/reachy_mini_receptionist/audio/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Nothing (for ruff)."""
src/reachy_mini_receptionist/audio/head_wobbler.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Moves head given audio samples."""
2
+
3
+ import time
4
+ import queue
5
+ import base64
6
+ import logging
7
+ import threading
8
+ from typing import Tuple
9
+ from collections.abc import Callable
10
+
11
+ import numpy as np
12
+ from numpy.typing import NDArray
13
+
14
+ from reachy_mini_receptionist.audio.speech_tapper import HOP_MS, SwayRollRT
15
+
16
+
17
+ SAMPLE_RATE = 24000
18
+ MOVEMENT_LATENCY_S = 0.2 # seconds between audio and robot movement
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class HeadWobbler:
23
+ """Converts audio deltas (base64) into head movement offsets."""
24
+
25
+ def __init__(self, set_speech_offsets: Callable[[Tuple[float, float, float, float, float, float]], None]) -> None:
26
+ """Initialize the head wobbler."""
27
+ self._apply_offsets = set_speech_offsets
28
+ self._base_ts: float | None = None
29
+ self._hops_done: int = 0
30
+
31
+ self.audio_queue: "queue.Queue[Tuple[int, int, NDArray[np.int16]]]" = queue.Queue()
32
+ self.sway = SwayRollRT()
33
+
34
+ # Synchronization primitives
35
+ self._state_lock = threading.Lock()
36
+ self._sway_lock = threading.Lock()
37
+ self._generation = 0
38
+
39
+ self._stop_event = threading.Event()
40
+ self._thread: threading.Thread | None = None
41
+
42
+ def feed(self, delta_b64: str) -> None:
43
+ """Thread-safe: push audio into the consumer queue."""
44
+ buf = np.frombuffer(base64.b64decode(delta_b64), dtype=np.int16).reshape(1, -1)
45
+ with self._state_lock:
46
+ generation = self._generation
47
+ self.audio_queue.put((generation, SAMPLE_RATE, buf))
48
+
49
+ def start(self) -> None:
50
+ """Start the head wobbler loop in a thread."""
51
+ self._stop_event.clear()
52
+ self._thread = threading.Thread(target=self.working_loop, daemon=True)
53
+ self._thread.start()
54
+ logger.debug("Head wobbler started")
55
+
56
+ def stop(self) -> None:
57
+ """Stop the head wobbler loop."""
58
+ self._stop_event.set()
59
+ if self._thread is not None:
60
+ self._thread.join()
61
+ logger.debug("Head wobbler stopped")
62
+
63
+ def working_loop(self) -> None:
64
+ """Convert audio deltas into head movement offsets."""
65
+ hop_dt = HOP_MS / 1000.0
66
+
67
+ logger.debug("Head wobbler thread started")
68
+ while not self._stop_event.is_set():
69
+ queue_ref = self.audio_queue
70
+ try:
71
+ chunk_generation, sr, chunk = queue_ref.get_nowait() # (gen, sr, data)
72
+ except queue.Empty:
73
+ # avoid while to never exit
74
+ time.sleep(MOVEMENT_LATENCY_S)
75
+ continue
76
+
77
+ try:
78
+ with self._state_lock:
79
+ current_generation = self._generation
80
+ if chunk_generation != current_generation:
81
+ continue
82
+
83
+ if self._base_ts is None:
84
+ with self._state_lock:
85
+ if self._base_ts is None:
86
+ self._base_ts = time.monotonic()
87
+
88
+ pcm = np.asarray(chunk).squeeze(0)
89
+ with self._sway_lock:
90
+ results = self.sway.feed(pcm, sr)
91
+
92
+ i = 0
93
+ while i < len(results):
94
+ with self._state_lock:
95
+ if self._generation != current_generation:
96
+ break
97
+ base_ts = self._base_ts
98
+ hops_done = self._hops_done
99
+
100
+ if base_ts is None:
101
+ base_ts = time.monotonic()
102
+ with self._state_lock:
103
+ if self._base_ts is None:
104
+ self._base_ts = base_ts
105
+ hops_done = self._hops_done
106
+
107
+ target = base_ts + MOVEMENT_LATENCY_S + hops_done * hop_dt
108
+ now = time.monotonic()
109
+
110
+ if now - target >= hop_dt:
111
+ lag_hops = int((now - target) / hop_dt)
112
+ drop = min(lag_hops, len(results) - i - 1)
113
+ if drop > 0:
114
+ with self._state_lock:
115
+ self._hops_done += drop
116
+ hops_done = self._hops_done
117
+ i += drop
118
+ continue
119
+
120
+ if target > now:
121
+ time.sleep(target - now)
122
+ with self._state_lock:
123
+ if self._generation != current_generation:
124
+ break
125
+
126
+ r = results[i]
127
+ offsets = (
128
+ r["x_mm"] / 1000.0,
129
+ r["y_mm"] / 1000.0,
130
+ r["z_mm"] / 1000.0,
131
+ r["roll_rad"],
132
+ r["pitch_rad"],
133
+ r["yaw_rad"],
134
+ )
135
+
136
+ with self._state_lock:
137
+ if self._generation != current_generation:
138
+ break
139
+
140
+ self._apply_offsets(offsets)
141
+
142
+ with self._state_lock:
143
+ self._hops_done += 1
144
+ i += 1
145
+ finally:
146
+ queue_ref.task_done()
147
+ logger.debug("Head wobbler thread exited")
148
+
149
+ '''
150
+ def drain_audio_queue(self) -> None:
151
+ """Empty the audio queue."""
152
+ try:
153
+ while True:
154
+ self.audio_queue.get_nowait()
155
+ except QueueEmpty:
156
+ pass
157
+ '''
158
+
159
+ def reset(self) -> None:
160
+ """Reset the internal state."""
161
+ with self._state_lock:
162
+ self._generation += 1
163
+ self._base_ts = None
164
+ self._hops_done = 0
165
+
166
+ # Drain any queued audio chunks from previous generations
167
+ drained_any = False
168
+ while True:
169
+ try:
170
+ _, _, _ = self.audio_queue.get_nowait()
171
+ except queue.Empty:
172
+ break
173
+ else:
174
+ drained_any = True
175
+ self.audio_queue.task_done()
176
+
177
+ with self._sway_lock:
178
+ self.sway.reset()
179
+
180
+ if drained_any:
181
+ logger.debug("Head wobbler queue drained during reset")
src/reachy_mini_receptionist/audio/speech_tapper.py ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import math
3
+ from typing import Any, Dict, List
4
+ from itertools import islice
5
+ from collections import deque
6
+
7
+ import numpy as np
8
+ from numpy.typing import NDArray
9
+
10
+
11
+ # Tunables
12
+ SR = 16_000
13
+ FRAME_MS = 20
14
+ HOP_MS = 50
15
+
16
+ SWAY_MASTER = 1.5
17
+ SENS_DB_OFFSET = +4.0
18
+ VAD_DB_ON = -35.0
19
+ VAD_DB_OFF = -45.0
20
+ VAD_ATTACK_MS = 40
21
+ VAD_RELEASE_MS = 250
22
+ ENV_FOLLOW_GAIN = 0.65
23
+
24
+ SWAY_F_PITCH = 2.2
25
+ SWAY_A_PITCH_DEG = 4.5
26
+ SWAY_F_YAW = 0.6
27
+ SWAY_A_YAW_DEG = 7.5
28
+ SWAY_F_ROLL = 1.3
29
+ SWAY_A_ROLL_DEG = 2.25
30
+ SWAY_F_X = 0.35
31
+ SWAY_A_X_MM = 4.5
32
+ SWAY_F_Y = 0.45
33
+ SWAY_A_Y_MM = 3.75
34
+ SWAY_F_Z = 0.25
35
+ SWAY_A_Z_MM = 2.25
36
+
37
+ SWAY_DB_LOW = -46.0
38
+ SWAY_DB_HIGH = -18.0
39
+ LOUDNESS_GAMMA = 0.9
40
+ SWAY_ATTACK_MS = 50
41
+ SWAY_RELEASE_MS = 250
42
+
43
+ # Derived
44
+ FRAME = int(SR * FRAME_MS / 1000)
45
+ HOP = int(SR * HOP_MS / 1000)
46
+ ATTACK_FR = max(1, int(VAD_ATTACK_MS / HOP_MS))
47
+ RELEASE_FR = max(1, int(VAD_RELEASE_MS / HOP_MS))
48
+ SWAY_ATTACK_FR = max(1, int(SWAY_ATTACK_MS / HOP_MS))
49
+ SWAY_RELEASE_FR = max(1, int(SWAY_RELEASE_MS / HOP_MS))
50
+
51
+
52
+ def _rms_dbfs(x: NDArray[np.float32]) -> float:
53
+ """Root-mean-square in dBFS for float32 mono array in [-1,1]."""
54
+ # numerically stable rms (avoid overflow)
55
+ x = x.astype(np.float32, copy=False)
56
+ rms = np.sqrt(np.mean(x * x, dtype=np.float32) + 1e-12, dtype=np.float32)
57
+ return float(20.0 * math.log10(float(rms) + 1e-12))
58
+
59
+
60
+ def _loudness_gain(db: float, offset: float = SENS_DB_OFFSET) -> float:
61
+ """Normalize dB into [0,1] with gamma; clipped to [0,1]."""
62
+ t = (db + offset - SWAY_DB_LOW) / (SWAY_DB_HIGH - SWAY_DB_LOW)
63
+ if t < 0.0:
64
+ t = 0.0
65
+ elif t > 1.0:
66
+ t = 1.0
67
+ return t**LOUDNESS_GAMMA if LOUDNESS_GAMMA != 1.0 else t
68
+
69
+
70
+ def _to_float32_mono(x: NDArray[Any]) -> NDArray[np.float32]:
71
+ """Convert arbitrary PCM array to float32 mono in [-1,1].
72
+
73
+ Accepts shapes: (N,), (1,N), (N,1), (C,N), (N,C).
74
+ """
75
+ a = np.asarray(x)
76
+ if a.ndim == 0:
77
+ return np.zeros(0, dtype=np.float32)
78
+
79
+ # If 2D, decide which axis is channels (prefer small first dim)
80
+ if a.ndim == 2:
81
+ # e.g., (channels, samples) if channels is small (<=8)
82
+ if a.shape[0] <= 8 and a.shape[0] <= a.shape[1]:
83
+ a = np.mean(a, axis=0)
84
+ else:
85
+ a = np.mean(a, axis=1)
86
+ elif a.ndim > 2:
87
+ a = np.mean(a.reshape(a.shape[0], -1), axis=0)
88
+
89
+ # Now 1D, cast/scale
90
+ if np.issubdtype(a.dtype, np.floating):
91
+ return a.astype(np.float32, copy=False)
92
+ # integer PCM
93
+ info = np.iinfo(a.dtype)
94
+ scale = float(max(-info.min, info.max))
95
+ return a.astype(np.float32) / (scale if scale != 0.0 else 1.0)
96
+
97
+
98
+ def _resample_linear(x: NDArray[np.float32], sr_in: int, sr_out: int) -> NDArray[np.float32]:
99
+ """Lightweight linear resampler for short buffers."""
100
+ if sr_in == sr_out or x.size == 0:
101
+ return x
102
+ # guard tiny sizes
103
+ n_out = int(round(x.size * sr_out / sr_in))
104
+ if n_out <= 1:
105
+ return np.zeros(0, dtype=np.float32)
106
+ t_in = np.linspace(0.0, 1.0, num=x.size, dtype=np.float32, endpoint=True)
107
+ t_out = np.linspace(0.0, 1.0, num=n_out, dtype=np.float32, endpoint=True)
108
+ return np.interp(t_out, t_in, x).astype(np.float32, copy=False)
109
+
110
+
111
+ class SwayRollRT:
112
+ """Feed audio chunks → per-hop sway outputs.
113
+
114
+ Usage:
115
+ rt = SwayRollRT()
116
+ rt.feed(pcm_int16_or_float, sr) -> List[dict]
117
+ """
118
+
119
+ def __init__(self, rng_seed: int = 7):
120
+ """Initialize state."""
121
+ self._seed = int(rng_seed)
122
+ self.samples: deque[float] = deque(maxlen=10 * SR) # sliding window for VAD/env
123
+ self.carry: NDArray[np.float32] = np.zeros(0, dtype=np.float32)
124
+
125
+ self.vad_on = False
126
+ self.vad_above = 0
127
+ self.vad_below = 0
128
+
129
+ self.sway_env = 0.0
130
+ self.sway_up = 0
131
+ self.sway_down = 0
132
+
133
+ rng = np.random.default_rng(self._seed)
134
+ self.phase_pitch = float(rng.random() * 2 * math.pi)
135
+ self.phase_yaw = float(rng.random() * 2 * math.pi)
136
+ self.phase_roll = float(rng.random() * 2 * math.pi)
137
+ self.phase_x = float(rng.random() * 2 * math.pi)
138
+ self.phase_y = float(rng.random() * 2 * math.pi)
139
+ self.phase_z = float(rng.random() * 2 * math.pi)
140
+ self.t = 0.0
141
+
142
+ def reset(self) -> None:
143
+ """Reset state (VAD/env/buffers/time) but keep initial phases/seed."""
144
+ self.samples.clear()
145
+ self.carry = np.zeros(0, dtype=np.float32)
146
+ self.vad_on = False
147
+ self.vad_above = 0
148
+ self.vad_below = 0
149
+ self.sway_env = 0.0
150
+ self.sway_up = 0
151
+ self.sway_down = 0
152
+ self.t = 0.0
153
+
154
+ def feed(self, pcm: NDArray[Any], sr: int | None) -> List[Dict[str, float]]:
155
+ """Stream in PCM chunk. Returns a list of sway dicts, one per hop (HOP_MS).
156
+
157
+ Args:
158
+ pcm: np.ndarray, shape (N,) or (C,N)/(N,C); int or float.
159
+ sr: sample rate of `pcm` (None -> assume SR).
160
+
161
+ """
162
+ sr_in = SR if sr is None else int(sr)
163
+ x = _to_float32_mono(pcm)
164
+ if x.size == 0:
165
+ return []
166
+ if sr_in != SR:
167
+ x = _resample_linear(x, sr_in, SR)
168
+ if x.size == 0:
169
+ return []
170
+
171
+ # append to carry and consume fixed HOP chunks
172
+ if self.carry.size:
173
+ self.carry = np.concatenate([self.carry, x])
174
+ else:
175
+ self.carry = x
176
+
177
+ out: List[Dict[str, float]] = []
178
+
179
+ while self.carry.size >= HOP:
180
+ hop = self.carry[:HOP]
181
+ remaining: NDArray[np.float32] = self.carry[HOP:]
182
+ self.carry = remaining
183
+
184
+ # keep sliding window for VAD/env computation
185
+ # (deque accepts any iterable; list() for small HOP is fine)
186
+ self.samples.extend(hop.tolist())
187
+ if len(self.samples) < FRAME:
188
+ self.t += HOP_MS / 1000.0
189
+ continue
190
+
191
+ frame = np.fromiter(
192
+ islice(self.samples, len(self.samples) - FRAME, len(self.samples)),
193
+ dtype=np.float32,
194
+ count=FRAME,
195
+ )
196
+ db = _rms_dbfs(frame)
197
+
198
+ # VAD with hysteresis + attack/release
199
+ if db >= VAD_DB_ON:
200
+ self.vad_above += 1
201
+ self.vad_below = 0
202
+ if not self.vad_on and self.vad_above >= ATTACK_FR:
203
+ self.vad_on = True
204
+ elif db <= VAD_DB_OFF:
205
+ self.vad_below += 1
206
+ self.vad_above = 0
207
+ if self.vad_on and self.vad_below >= RELEASE_FR:
208
+ self.vad_on = False
209
+
210
+ if self.vad_on:
211
+ self.sway_up = min(SWAY_ATTACK_FR, self.sway_up + 1)
212
+ self.sway_down = 0
213
+ else:
214
+ self.sway_down = min(SWAY_RELEASE_FR, self.sway_down + 1)
215
+ self.sway_up = 0
216
+
217
+ up = self.sway_up / SWAY_ATTACK_FR
218
+ down = 1.0 - (self.sway_down / SWAY_RELEASE_FR)
219
+ target = up if self.vad_on else down
220
+ self.sway_env += ENV_FOLLOW_GAIN * (target - self.sway_env)
221
+ # clamp
222
+ if self.sway_env < 0.0:
223
+ self.sway_env = 0.0
224
+ elif self.sway_env > 1.0:
225
+ self.sway_env = 1.0
226
+
227
+ loud = _loudness_gain(db) * SWAY_MASTER
228
+ env = self.sway_env
229
+ self.t += HOP_MS / 1000.0
230
+
231
+ # oscillators
232
+ pitch = (
233
+ math.radians(SWAY_A_PITCH_DEG)
234
+ * loud
235
+ * env
236
+ * math.sin(2 * math.pi * SWAY_F_PITCH * self.t + self.phase_pitch)
237
+ )
238
+ yaw = (
239
+ math.radians(SWAY_A_YAW_DEG)
240
+ * loud
241
+ * env
242
+ * math.sin(2 * math.pi * SWAY_F_YAW * self.t + self.phase_yaw)
243
+ )
244
+ roll = (
245
+ math.radians(SWAY_A_ROLL_DEG)
246
+ * loud
247
+ * env
248
+ * math.sin(2 * math.pi * SWAY_F_ROLL * self.t + self.phase_roll)
249
+ )
250
+ x_mm = SWAY_A_X_MM * loud * env * math.sin(2 * math.pi * SWAY_F_X * self.t + self.phase_x)
251
+ y_mm = SWAY_A_Y_MM * loud * env * math.sin(2 * math.pi * SWAY_F_Y * self.t + self.phase_y)
252
+ z_mm = SWAY_A_Z_MM * loud * env * math.sin(2 * math.pi * SWAY_F_Z * self.t + self.phase_z)
253
+
254
+ out.append(
255
+ {
256
+ "pitch_rad": pitch,
257
+ "yaw_rad": yaw,
258
+ "roll_rad": roll,
259
+ "pitch_deg": math.degrees(pitch),
260
+ "yaw_deg": math.degrees(yaw),
261
+ "roll_deg": math.degrees(roll),
262
+ "x_mm": x_mm,
263
+ "y_mm": y_mm,
264
+ "z_mm": z_mm,
265
+ },
266
+ )
267
+
268
+ return out
src/reachy_mini_receptionist/calendar_data.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Calendar data — appointments source for the receptionist.
2
+
3
+ Single source: a Google Calendar (or any iCal feed) configured via the
4
+ ``RECEPTION_ICS_URL`` env var. Operators add events in Google Calendar;
5
+ the robot fetches every ~5 min via ``ical_calendar.py``. Title
6
+ convention: ``"<Visitor> with <Host>"`` (host resolved through
7
+ ``employees.py``).
8
+
9
+ When ``RECEPTION_ICS_URL`` is unset OR the feed is unreachable, this
10
+ module returns an EMPTY calendar. That's intentional — the receptionist
11
+ supports exactly two visitor paths:
12
+
13
+ 1. **Scheduled visitor** — appointment exists in the iCal feed; bot
14
+ matches by visitor name and emails the host.
15
+ 2. **Walk-in to see an employee** — visitor names a host that lives in
16
+ the SQLite Employee directory (managed from the dashboard's
17
+ Employees panel); bot calls ``lookup_employee`` and emails the host.
18
+
19
+ There is intentionally no hardcoded demo schedule fallback. If you want
20
+ demo data, add it to Google Calendar; if you want a host-only flow, add
21
+ the host via the dashboard.
22
+
23
+ The ``visiting`` field on each returned appointment is always an email
24
+ address (resolved through the employee directory). If a host can't be
25
+ resolved, the original string is preserved so the LLM can flag it.
26
+ """
27
+ from __future__ import annotations
28
+
29
+ import asyncio
30
+ import os
31
+ from datetime import datetime
32
+ from typing import Any, Dict, List, Optional
33
+
34
+ from reachy_mini_receptionist import employees
35
+
36
+
37
+ def _resolve_visiting(visiting: str) -> str:
38
+ """Return the email for a `visiting` reference, falling back to itself.
39
+
40
+ Looks up via the employee directory first; if `visiting` already contains
41
+ `@` (one-off external host), returns it unchanged.
42
+ """
43
+ if not visiting:
44
+ return ""
45
+ if "@" in visiting:
46
+ return visiting
47
+ email = employees.find_email_for(visiting)
48
+ return email or visiting
49
+
50
+
51
+ def _appointments_from_ical(ics_url: str) -> List[Dict[str, Any]]:
52
+ """Pull today's appointments from an iCal feed and reshape them.
53
+
54
+ Drops the iCal-specific helper fields (``_host_query``, ``_dt``) and
55
+ resolves the host name to an email through the employee directory.
56
+ """
57
+ from reachy_mini_receptionist import ical_calendar
58
+
59
+ raw = ical_calendar.fetch_appointments(ics_url)
60
+ out: List[Dict[str, Any]] = []
61
+ for ev in raw:
62
+ host_query = ev.get("_host_query", "")
63
+ out.append({
64
+ "time": ev.get("time", ""),
65
+ "name": ev.get("name", ""),
66
+ "note": ev.get("note", ""),
67
+ "visiting": _resolve_visiting(host_query) if host_query else "",
68
+ })
69
+ return out
70
+
71
+
72
+ def get_appointments() -> List[dict]:
73
+ """Return today's appointment list with ``visiting`` resolved to an email.
74
+
75
+ Pulls live from the iCal feed when ``RECEPTION_ICS_URL`` is set
76
+ (cached ~5 min). Returns an empty list otherwise — walk-in flow
77
+ (via ``lookup_employee``) handles visitors who aren't on the schedule.
78
+
79
+ Each item has:
80
+ time (str) — e.g. "11:00 AM"
81
+ name (str) — guest name
82
+ note (str) — short description (may be empty)
83
+ visiting (str) — host email (resolved from employee directory)
84
+ """
85
+ ics_url = (os.getenv("RECEPTION_ICS_URL") or "").strip()
86
+ if not ics_url:
87
+ return []
88
+ return _appointments_from_ical(ics_url)
89
+
90
+
91
+ async def get_appointments_async() -> List[dict]:
92
+ """Async wrapper for ``get_appointments`` — offloads the sync iCal HTTP
93
+ fetch to a worker thread so async callers (realtime audio loop, tool
94
+ completion handlers) don't block the event loop on a 10-second HTTP
95
+ timeout. Cache hits return immediately; only the underlying httpx.get
96
+ call gets thread-offloaded.
97
+ """
98
+ return await asyncio.to_thread(get_appointments)
99
+
100
+
101
+ def format_for_llm() -> str:
102
+ """Return a human-readable calendar string for the LLM."""
103
+ today = datetime.now().strftime("%A, %B %d %Y")
104
+ appts = get_appointments()
105
+ if not appts:
106
+ return (
107
+ f"Today is {today}. No scheduled appointments in the calendar. "
108
+ "Walk-in visitors should be routed via lookup_employee."
109
+ )
110
+ lines = [f"Today is {today}. Appointments:"]
111
+ for appt in appts:
112
+ lines.append(f" {appt['time']}: {appt['name']} — {appt['note']}")
113
+ return "\n".join(lines)
114
+
115
+
116
+ def get_appointment_for_name(name: str) -> Optional[dict]:
117
+ """Find an appointment by guest name (case-insensitive)."""
118
+ target = (name or "").strip().lower()
119
+ for appt in get_appointments():
120
+ if appt["name"].lower() == target:
121
+ return appt
122
+ return None
123
+
124
+
125
+ def find_appointment_for_employee(employee_query: str) -> Optional[dict]:
126
+ """Find today's appointment whose host matches ``employee_query``.
127
+
128
+ Resolves the query through the employee directory first so callers can
129
+ pass a name OR alias OR email. Returns the first matching appointment,
130
+ or None if nothing on today's schedule is for that host.
131
+ """
132
+ target_email = employees.find_email_for(employee_query) or employee_query
133
+ target_email = (target_email or "").strip().lower()
134
+ if not target_email:
135
+ return None
136
+ for appt in get_appointments():
137
+ if (appt.get("visiting") or "").strip().lower() == target_email:
138
+ return appt
139
+ return None
src/reachy_mini_receptionist/camera_worker.py ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Camera worker thread with frame buffering and face tracking.
2
+
3
+ Ported from main_works.py camera_worker() function to provide:
4
+ - 30Hz+ camera polling with thread-safe frame buffering
5
+ - Face tracking integration with smooth interpolation
6
+ - Latest frame always available for tools
7
+ """
8
+
9
+ import time
10
+ import logging
11
+ import threading
12
+ from typing import Any, List, Tuple
13
+
14
+ import numpy as np
15
+ from numpy.typing import NDArray
16
+ from scipy.spatial.transform import Rotation as R
17
+
18
+ from reachy_mini import ReachyMini
19
+ from reachy_mini.utils.interpolation import linear_pose_interpolation
20
+
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ class CameraWorker:
26
+ """Thread-safe camera worker with frame buffering and face tracking."""
27
+
28
+ def __init__(self, reachy_mini: ReachyMini, head_tracker: Any = None) -> None:
29
+ """Initialize."""
30
+ self.reachy_mini = reachy_mini
31
+ self.head_tracker = head_tracker
32
+
33
+ # Thread-safe frame storage
34
+ self.latest_frame: NDArray[np.uint8] | None = None
35
+ self.frame_lock = threading.Lock()
36
+ self._stop_event = threading.Event()
37
+ self._thread: threading.Thread | None = None
38
+
39
+ # Face tracking state
40
+ self.is_head_tracking_enabled = True
41
+ self.face_tracking_offsets: List[float] = [
42
+ 0.0,
43
+ 0.0,
44
+ 0.0,
45
+ 0.0,
46
+ 0.0,
47
+ 0.0,
48
+ ] # x, y, z, roll, pitch, yaw
49
+ self.face_tracking_lock = threading.Lock()
50
+
51
+ # Face tracking timing variables (same as main_works.py)
52
+ self.last_face_detected_time: float | None = None
53
+ self.interpolation_start_time: float | None = None
54
+ self.interpolation_start_pose: NDArray[np.float32] | None = None
55
+ self.face_lost_delay = 2.0 # seconds to wait before starting interpolation
56
+ self.interpolation_duration = 1.0 # seconds to interpolate back to neutral
57
+
58
+ # Track state changes
59
+ self.previous_head_tracking_state = self.is_head_tracking_enabled
60
+
61
+ def get_latest_frame(self) -> NDArray[np.uint8] | None:
62
+ """Get the latest frame (thread-safe)."""
63
+ with self.frame_lock:
64
+ if self.latest_frame is None:
65
+ return None
66
+ # Return a copy in original BGR format (OpenCV native)
67
+ return self.latest_frame.copy()
68
+
69
+ def get_face_tracking_offsets(
70
+ self,
71
+ ) -> Tuple[float, float, float, float, float, float]:
72
+ """Get current face tracking offsets (thread-safe)."""
73
+ with self.face_tracking_lock:
74
+ offsets = self.face_tracking_offsets
75
+ return (offsets[0], offsets[1], offsets[2], offsets[3], offsets[4], offsets[5])
76
+
77
+ def set_head_tracking_enabled(self, enabled: bool) -> None:
78
+ """Enable/disable head tracking."""
79
+ self.is_head_tracking_enabled = enabled
80
+ logger.info(f"Head tracking {'enabled' if enabled else 'disabled'}")
81
+
82
+ def start(self) -> None:
83
+ """Start the camera worker loop in a thread."""
84
+ self._stop_event.clear()
85
+ self._thread = threading.Thread(target=self.working_loop, daemon=True)
86
+ self._thread.start()
87
+ logger.debug("Camera worker started")
88
+
89
+ def stop(self) -> None:
90
+ """Stop the camera worker loop."""
91
+ self._stop_event.set()
92
+ if self._thread is not None:
93
+ self._thread.join()
94
+
95
+ logger.debug("Camera worker stopped")
96
+
97
+ def working_loop(self) -> None:
98
+ """Enable the camera worker loop.
99
+
100
+ Ported from main_works.py camera_worker() with same logic.
101
+ """
102
+ logger.debug("Starting camera working loop")
103
+
104
+ # Initialize head tracker if available
105
+ neutral_pose = np.eye(4) # Neutral pose (identity matrix)
106
+ self.previous_head_tracking_state = self.is_head_tracking_enabled
107
+
108
+ while not self._stop_event.is_set():
109
+ try:
110
+ current_time = time.time()
111
+
112
+ # Get frame from robot
113
+ frame = self.reachy_mini.media.get_frame()
114
+
115
+ if frame is not None:
116
+ # Thread-safe frame storage
117
+ with self.frame_lock:
118
+ self.latest_frame = frame # .copy()
119
+
120
+ # Check if face tracking was just disabled
121
+ if self.previous_head_tracking_state and not self.is_head_tracking_enabled:
122
+ # Face tracking was just disabled - start interpolation to neutral
123
+ self.last_face_detected_time = current_time # Trigger the face-lost logic
124
+ self.interpolation_start_time = None # Will be set by the face-lost interpolation
125
+ self.interpolation_start_pose = None
126
+
127
+ # Update tracking state
128
+ self.previous_head_tracking_state = self.is_head_tracking_enabled
129
+
130
+ # Handle face tracking if enabled and head tracker available
131
+ if self.is_head_tracking_enabled and self.head_tracker is not None:
132
+ eye_center, _ = self.head_tracker.get_head_position(frame)
133
+
134
+ if eye_center is not None:
135
+ # Face detected - immediately switch to tracking
136
+ self.last_face_detected_time = current_time
137
+ self.interpolation_start_time = None # Stop any interpolation
138
+
139
+ # Convert normalized coordinates to pixel coordinates
140
+ h, w, _ = frame.shape
141
+ eye_center_norm = (eye_center + 1) / 2
142
+ eye_center_pixels = [
143
+ eye_center_norm[0] * w,
144
+ eye_center_norm[1] * h,
145
+ ]
146
+
147
+ # Get the head pose needed to look at the target, but don't perform movement
148
+ target_pose = self.reachy_mini.look_at_image(
149
+ eye_center_pixels[0],
150
+ eye_center_pixels[1],
151
+ duration=0.0,
152
+ perform_movement=False,
153
+ )
154
+
155
+ # Extract translation and rotation from the target pose directly
156
+ translation = target_pose[:3, 3]
157
+ rotation = R.from_matrix(target_pose[:3, :3]).as_euler("xyz", degrees=False)
158
+
159
+ # Scale down translation and rotation because smaller FOV
160
+ translation *= 0.6
161
+ rotation *= 0.6
162
+
163
+ # Thread-safe update of face tracking offsets (use pose as-is)
164
+ with self.face_tracking_lock:
165
+ self.face_tracking_offsets = [
166
+ translation[0],
167
+ translation[1],
168
+ translation[2], # x, y, z
169
+ rotation[0],
170
+ rotation[1],
171
+ rotation[2], # roll, pitch, yaw
172
+ ]
173
+
174
+ # No face detected while tracking enabled - set face lost timestamp
175
+ elif self.last_face_detected_time is None or self.last_face_detected_time == current_time:
176
+ # Only update if we haven't already set a face lost time
177
+ # (current_time check prevents overriding the disable-triggered timestamp)
178
+ pass
179
+
180
+ # Handle smooth interpolation (works for both face-lost and tracking-disabled cases)
181
+ if self.last_face_detected_time is not None:
182
+ time_since_face_lost = current_time - self.last_face_detected_time
183
+
184
+ if time_since_face_lost >= self.face_lost_delay:
185
+ # Start interpolation if not already started
186
+ if self.interpolation_start_time is None:
187
+ self.interpolation_start_time = current_time
188
+ # Capture current pose as start of interpolation
189
+ with self.face_tracking_lock:
190
+ current_translation = self.face_tracking_offsets[:3]
191
+ current_rotation_euler = self.face_tracking_offsets[3:]
192
+ # Convert to 4x4 pose matrix
193
+ pose_matrix = np.eye(4, dtype=np.float32)
194
+ pose_matrix[:3, 3] = current_translation
195
+ pose_matrix[:3, :3] = R.from_euler(
196
+ "xyz",
197
+ current_rotation_euler,
198
+ ).as_matrix()
199
+ self.interpolation_start_pose = pose_matrix
200
+
201
+ # Calculate interpolation progress (t from 0 to 1)
202
+ elapsed_interpolation = current_time - self.interpolation_start_time
203
+ t = min(1.0, elapsed_interpolation / self.interpolation_duration)
204
+
205
+ # Interpolate between current pose and neutral pose
206
+ interpolated_pose = linear_pose_interpolation(
207
+ self.interpolation_start_pose,
208
+ neutral_pose,
209
+ t,
210
+ )
211
+
212
+ # Extract translation and rotation from interpolated pose
213
+ translation = interpolated_pose[:3, 3]
214
+ rotation = R.from_matrix(interpolated_pose[:3, :3]).as_euler("xyz", degrees=False)
215
+
216
+ # Thread-safe update of face tracking offsets
217
+ with self.face_tracking_lock:
218
+ self.face_tracking_offsets = [
219
+ translation[0],
220
+ translation[1],
221
+ translation[2], # x, y, z
222
+ rotation[0],
223
+ rotation[1],
224
+ rotation[2], # roll, pitch, yaw
225
+ ]
226
+
227
+ # If interpolation is complete, reset timing
228
+ if t >= 1.0:
229
+ self.last_face_detected_time = None
230
+ self.interpolation_start_time = None
231
+ self.interpolation_start_pose = None
232
+ # else: Keep current offsets (within 2s delay period)
233
+
234
+ # Small sleep to prevent excessive CPU usage (same as main_works.py)
235
+ time.sleep(0.04)
236
+
237
+ except Exception as e:
238
+ logger.error(f"Camera worker error: {e}")
239
+ time.sleep(0.1) # Longer sleep on error
240
+
241
+ logger.debug("Camera worker thread exited")
src/reachy_mini_receptionist/config.py ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import logging
4
+ from pathlib import Path
5
+
6
+ from dotenv import find_dotenv, load_dotenv
7
+
8
+
9
+ # Locked profile: set to a profile name (e.g., "astronomer") to lock the app
10
+ # to that profile and disable all profile switching. Leave as None for normal behavior.
11
+ LOCKED_PROFILE: str | None = "_reachy_mini_receptionist_locked_profile"
12
+ DEFAULT_PROFILES_DIRECTORY = Path(__file__).parent / "profiles"
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ def _env_flag(name: str, default: bool = False) -> bool:
18
+ """Parse a boolean environment flag.
19
+
20
+ Accepted truthy values: 1, true, yes, on
21
+ Accepted falsy values: 0, false, no, off
22
+ """
23
+ raw = os.getenv(name)
24
+ if raw is None:
25
+ return default
26
+
27
+ value = raw.strip().lower()
28
+ if value in {"1", "true", "yes", "on"}:
29
+ return True
30
+ if value in {"0", "false", "no", "off"}:
31
+ return False
32
+
33
+ logger.warning("Invalid boolean value for %s=%r, using default=%s", name, raw, default)
34
+ return default
35
+
36
+
37
+ def _collect_profile_names(profiles_root: Path) -> set[str]:
38
+ """Return profile folder names from a profiles root directory."""
39
+ if not profiles_root.exists() or not profiles_root.is_dir():
40
+ return set()
41
+ return {p.name for p in profiles_root.iterdir() if p.is_dir()}
42
+
43
+
44
+ def _collect_tool_module_names(tools_root: Path) -> set[str]:
45
+ """Return tool module names from a tools directory."""
46
+ if not tools_root.exists() or not tools_root.is_dir():
47
+ return set()
48
+ ignored = {"__init__", "core_tools"}
49
+ return {
50
+ p.stem
51
+ for p in tools_root.glob("*.py")
52
+ if p.is_file() and p.stem not in ignored
53
+ }
54
+
55
+
56
+ def _raise_on_name_collisions(
57
+ *,
58
+ label: str,
59
+ external_root: Path,
60
+ internal_root: Path,
61
+ external_names: set[str],
62
+ internal_names: set[str],
63
+ ) -> None:
64
+ """Raise with a clear message when external/internal names collide."""
65
+ collisions = sorted(external_names & internal_names)
66
+ if not collisions:
67
+ return
68
+
69
+ raise RuntimeError(
70
+ f"Config.__init__(): Ambiguous {label} names found in both external and built-in libraries: {collisions}. "
71
+ f"External {label} root: {external_root}. Built-in {label} root: {internal_root}. "
72
+ f"Please rename the conflicting external {label}(s) to continue."
73
+ )
74
+
75
+
76
+ # Validate LOCKED_PROFILE at startup
77
+ if LOCKED_PROFILE is not None:
78
+ _profiles_dir = DEFAULT_PROFILES_DIRECTORY
79
+ _profile_path = _profiles_dir / LOCKED_PROFILE
80
+ _instructions_file = _profile_path / "instructions.txt"
81
+ if not _profile_path.is_dir():
82
+ print(f"Error: LOCKED_PROFILE '{LOCKED_PROFILE}' does not exist in {_profiles_dir}", file=sys.stderr)
83
+ sys.exit(1)
84
+ if not _instructions_file.is_file():
85
+ print(f"Error: LOCKED_PROFILE '{LOCKED_PROFILE}' has no instructions.txt", file=sys.stderr)
86
+ sys.exit(1)
87
+
88
+ _skip_dotenv = _env_flag("REACHY_MINI_SKIP_DOTENV", default=False)
89
+
90
+ if _skip_dotenv:
91
+ logger.info("Skipping .env loading because REACHY_MINI_SKIP_DOTENV is set")
92
+ else:
93
+ # Locate .env file (search upward from current working directory)
94
+ dotenv_path = find_dotenv(usecwd=True)
95
+
96
+ if dotenv_path:
97
+ # Load .env and override environment variables
98
+ load_dotenv(dotenv_path=dotenv_path, override=True)
99
+ logger.info(f"Configuration loaded from {dotenv_path}")
100
+ else:
101
+ logger.warning("No .env file found, using environment variables")
102
+
103
+
104
+ class Config:
105
+ """Configuration class for the receptionist app."""
106
+
107
+ # Required
108
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") # The key is downloaded in console.py if needed
109
+
110
+ # Optional
111
+ MODEL_NAME = os.getenv("MODEL_NAME", "gpt-realtime")
112
+ HF_HOME = os.getenv("HF_HOME", "./cache")
113
+ LOCAL_VISION_MODEL = os.getenv("LOCAL_VISION_MODEL", "HuggingFaceTB/SmolVLM2-2.2B-Instruct")
114
+ HF_TOKEN = os.getenv("HF_TOKEN") # Optional, falls back to hf auth login if not set
115
+
116
+ logger.debug(f"Model: {MODEL_NAME}, HF_HOME: {HF_HOME}, Vision Model: {LOCAL_VISION_MODEL}")
117
+
118
+ _profiles_directory_env = os.getenv("REACHY_MINI_EXTERNAL_PROFILES_DIRECTORY")
119
+ PROFILES_DIRECTORY = (
120
+ Path(_profiles_directory_env) if _profiles_directory_env else Path(__file__).parent / "profiles"
121
+ )
122
+ _tools_directory_env = os.getenv("REACHY_MINI_EXTERNAL_TOOLS_DIRECTORY")
123
+ TOOLS_DIRECTORY = Path(_tools_directory_env) if _tools_directory_env else None
124
+ AUTOLOAD_EXTERNAL_TOOLS = _env_flag("AUTOLOAD_EXTERNAL_TOOLS", default=False)
125
+ REACHY_MINI_CUSTOM_PROFILE = LOCKED_PROFILE or os.getenv("REACHY_MINI_CUSTOM_PROFILE")
126
+
127
+ logger.debug(f"Custom Profile: {REACHY_MINI_CUSTOM_PROFILE}")
128
+
129
+ def __init__(self) -> None:
130
+ """Initialize the configuration."""
131
+ if self.REACHY_MINI_CUSTOM_PROFILE and self.PROFILES_DIRECTORY != DEFAULT_PROFILES_DIRECTORY:
132
+ selected_profile_path = self.PROFILES_DIRECTORY / self.REACHY_MINI_CUSTOM_PROFILE
133
+ if not selected_profile_path.is_dir():
134
+ available_profiles = sorted(_collect_profile_names(self.PROFILES_DIRECTORY))
135
+ raise RuntimeError(
136
+ "Config.__init__(): Selected profile "
137
+ f"'{self.REACHY_MINI_CUSTOM_PROFILE}' was not found in external profiles root "
138
+ f"{self.PROFILES_DIRECTORY}. "
139
+ f"Available external profiles: {available_profiles}. "
140
+ "Either set 'REACHY_MINI_CUSTOM_PROFILE' to one of the available external profiles "
141
+ "or unset 'REACHY_MINI_EXTERNAL_PROFILES_DIRECTORY' to use built-in profiles."
142
+ )
143
+
144
+ if self.PROFILES_DIRECTORY != DEFAULT_PROFILES_DIRECTORY:
145
+ external_profiles = _collect_profile_names(self.PROFILES_DIRECTORY)
146
+ internal_profiles = _collect_profile_names(DEFAULT_PROFILES_DIRECTORY)
147
+ _raise_on_name_collisions(
148
+ label="profile",
149
+ external_root=self.PROFILES_DIRECTORY,
150
+ internal_root=DEFAULT_PROFILES_DIRECTORY,
151
+ external_names=external_profiles,
152
+ internal_names=internal_profiles,
153
+ )
154
+
155
+ if self.TOOLS_DIRECTORY is not None:
156
+ builtin_tools_root = Path(__file__).parent / "tools"
157
+ external_tools = _collect_tool_module_names(self.TOOLS_DIRECTORY)
158
+ internal_tools = _collect_tool_module_names(builtin_tools_root)
159
+ _raise_on_name_collisions(
160
+ label="tool",
161
+ external_root=self.TOOLS_DIRECTORY,
162
+ internal_root=builtin_tools_root,
163
+ external_names=external_tools,
164
+ internal_names=internal_tools,
165
+ )
166
+
167
+ if self.PROFILES_DIRECTORY != DEFAULT_PROFILES_DIRECTORY:
168
+ logger.warning(
169
+ "Environment variable 'REACHY_MINI_EXTERNAL_PROFILES_DIRECTORY' is set. "
170
+ "Profiles (instructions.txt, ...) will be loaded from %s.",
171
+ self.PROFILES_DIRECTORY,
172
+ )
173
+ else:
174
+ logger.info(
175
+ "'REACHY_MINI_EXTERNAL_PROFILES_DIRECTORY' is not set. "
176
+ "Using built-in profiles from %s.",
177
+ DEFAULT_PROFILES_DIRECTORY,
178
+ )
179
+
180
+ if self.TOOLS_DIRECTORY is not None:
181
+ logger.warning(
182
+ "Environment variable 'REACHY_MINI_EXTERNAL_TOOLS_DIRECTORY' is set. "
183
+ "External tools will be loaded from %s.",
184
+ self.TOOLS_DIRECTORY,
185
+ )
186
+ else:
187
+ logger.info(
188
+ "'REACHY_MINI_EXTERNAL_TOOLS_DIRECTORY' is not set. "
189
+ "Using built-in shared tools only."
190
+ )
191
+
192
+
193
+ config = Config()
194
+
195
+
196
+ def set_custom_profile(profile: str | None) -> None:
197
+ """Update the selected custom profile at runtime and expose it via env.
198
+
199
+ This ensures modules that read `config` and code that inspects the
200
+ environment see a consistent value.
201
+ """
202
+ if LOCKED_PROFILE is not None:
203
+ return
204
+ try:
205
+ config.REACHY_MINI_CUSTOM_PROFILE = profile
206
+ except Exception:
207
+ pass
208
+ try:
209
+ import os as _os
210
+
211
+ if profile:
212
+ _os.environ["REACHY_MINI_CUSTOM_PROFILE"] = profile
213
+ else:
214
+ # Remove to reflect default
215
+ _os.environ.pop("REACHY_MINI_CUSTOM_PROFILE", None)
216
+ except Exception:
217
+ pass
src/reachy_mini_receptionist/console.py ADDED
@@ -0,0 +1,527 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Bidirectional local audio stream with optional settings UI.
2
+
3
+ In headless mode, there is no Gradio UI. If the OpenAI API key is not
4
+ available via environment/.env, we expose a minimal settings page via the
5
+ Reachy Mini Apps settings server to let non-technical users enter it.
6
+
7
+ The settings UI is served from this package's ``static/`` folder and offers a
8
+ single password field to set ``OPENAI_API_KEY``. Once set, we persist it to the
9
+ app instance's ``.env`` file (if available) and proceed to start streaming.
10
+ """
11
+
12
+ import os
13
+ import sys
14
+ import time
15
+ import asyncio
16
+ import logging
17
+ from typing import List, Optional
18
+ from pathlib import Path
19
+
20
+ from fastrtc import AdditionalOutputs, audio_to_float32
21
+ from scipy.signal import resample
22
+
23
+ from reachy_mini import ReachyMini
24
+ from reachy_mini.media.media_manager import MediaBackend
25
+ from reachy_mini_receptionist.config import LOCKED_PROFILE, config
26
+ from reachy_mini_receptionist.openai_realtime import OpenaiRealtimeHandler
27
+ from reachy_mini_receptionist.headless_personality_ui import mount_personality_routes
28
+
29
+
30
+ try:
31
+ # FastAPI is provided by the Reachy Mini Apps runtime
32
+ from fastapi import FastAPI, Response
33
+ from pydantic import BaseModel
34
+ from fastapi.responses import FileResponse, JSONResponse, RedirectResponse
35
+ from starlette.staticfiles import StaticFiles
36
+ except Exception: # pragma: no cover - only loaded when settings_app is used
37
+ FastAPI = object # type: ignore
38
+ FileResponse = object # type: ignore
39
+ JSONResponse = object # type: ignore
40
+ StaticFiles = object # type: ignore
41
+ BaseModel = object # type: ignore
42
+
43
+
44
+ logger = logging.getLogger(__name__)
45
+
46
+
47
+ class LocalStream:
48
+ """LocalStream using Reachy Mini's recorder/player."""
49
+
50
+ def __init__(
51
+ self,
52
+ handler: OpenaiRealtimeHandler,
53
+ robot: ReachyMini,
54
+ *,
55
+ settings_app: Optional[FastAPI] = None,
56
+ instance_path: Optional[str] = None,
57
+ ):
58
+ """Initialize the stream with an OpenAI realtime handler and pipelines.
59
+
60
+ - ``settings_app``: the Reachy Mini Apps FastAPI to attach settings endpoints.
61
+ - ``instance_path``: directory where per-instance ``.env`` should be stored.
62
+ """
63
+ self.handler = handler
64
+ self._robot = robot
65
+ self._stop_event = asyncio.Event()
66
+ self._tasks: List[asyncio.Task[None]] = []
67
+ # Allow the handler to flush the player queue when appropriate.
68
+ self.handler._clear_queue = self.clear_audio_queue
69
+ self._settings_app: Optional[FastAPI] = settings_app
70
+ self._instance_path: Optional[str] = instance_path
71
+ self._settings_initialized = False
72
+ self._asyncio_loop = None
73
+
74
+ # ---- Settings UI (only when API key is missing) ----
75
+ def _read_env_lines(self, env_path: Path) -> list[str]:
76
+ """Load env file contents or a template as a list of lines."""
77
+ inst = env_path.parent
78
+ try:
79
+ if env_path.exists():
80
+ try:
81
+ return env_path.read_text(encoding="utf-8").splitlines()
82
+ except Exception:
83
+ return []
84
+ template_text = None
85
+ ex = inst / ".env.example"
86
+ if ex.exists():
87
+ try:
88
+ template_text = ex.read_text(encoding="utf-8")
89
+ except Exception:
90
+ template_text = None
91
+ if template_text is None:
92
+ try:
93
+ cwd_example = Path.cwd() / ".env.example"
94
+ if cwd_example.exists():
95
+ template_text = cwd_example.read_text(encoding="utf-8")
96
+ except Exception:
97
+ template_text = None
98
+ if template_text is None:
99
+ packaged = Path(__file__).parent / ".env.example"
100
+ if packaged.exists():
101
+ try:
102
+ template_text = packaged.read_text(encoding="utf-8")
103
+ except Exception:
104
+ template_text = None
105
+ return template_text.splitlines() if template_text else []
106
+ except Exception:
107
+ return []
108
+
109
+ def _persist_api_key(self, key: str) -> None:
110
+ """Persist API key to environment and instance ``.env`` if possible.
111
+
112
+ Behavior:
113
+ - Always sets ``OPENAI_API_KEY`` in process env and in-memory config.
114
+ - Writes/updates ``<instance_path>/.env``:
115
+ * If ``.env`` exists, replaces/append OPENAI_API_KEY line.
116
+ * Else, copies template from ``<instance_path>/.env.example`` when present,
117
+ otherwise falls back to the packaged template
118
+ ``reachy_mini_receptionist/.env.example``.
119
+ * Ensures the resulting file contains the full template plus the key.
120
+ - Loads the written ``.env`` into the current process environment.
121
+ """
122
+ k = (key or "").strip()
123
+ if not k:
124
+ return
125
+ # Update live process env and config so consumers see it immediately
126
+ try:
127
+ os.environ["OPENAI_API_KEY"] = k
128
+ except Exception: # best-effort
129
+ pass
130
+ try:
131
+ config.OPENAI_API_KEY = k
132
+ except Exception:
133
+ pass
134
+
135
+ if not self._instance_path:
136
+ return
137
+ try:
138
+ inst = Path(self._instance_path)
139
+ env_path = inst / ".env"
140
+ lines = self._read_env_lines(env_path)
141
+ replaced = False
142
+ for i, ln in enumerate(lines):
143
+ if ln.strip().startswith("OPENAI_API_KEY="):
144
+ lines[i] = f"OPENAI_API_KEY={k}"
145
+ replaced = True
146
+ break
147
+ if not replaced:
148
+ lines.append(f"OPENAI_API_KEY={k}")
149
+ final_text = "\n".join(lines) + "\n"
150
+ env_path.write_text(final_text, encoding="utf-8")
151
+ logger.info("Persisted OPENAI_API_KEY to %s", env_path)
152
+
153
+ # Load the newly written .env into this process to ensure downstream imports see it
154
+ try:
155
+ from dotenv import load_dotenv
156
+
157
+ load_dotenv(dotenv_path=str(env_path), override=True)
158
+ except Exception:
159
+ pass
160
+ except Exception as e:
161
+ logger.warning("Failed to persist OPENAI_API_KEY: %s", e)
162
+
163
+ def _persist_personality(self, profile: Optional[str]) -> None:
164
+ """Persist the startup personality to the instance .env and config."""
165
+ if LOCKED_PROFILE is not None:
166
+ return
167
+ selection = (profile or "").strip() or None
168
+ try:
169
+ from reachy_mini_receptionist.config import set_custom_profile
170
+
171
+ set_custom_profile(selection)
172
+ except Exception:
173
+ pass
174
+
175
+ if not self._instance_path:
176
+ return
177
+ try:
178
+ env_path = Path(self._instance_path) / ".env"
179
+ lines = self._read_env_lines(env_path)
180
+ replaced = False
181
+ for i, ln in enumerate(list(lines)):
182
+ if ln.strip().startswith("REACHY_MINI_CUSTOM_PROFILE="):
183
+ if selection:
184
+ lines[i] = f"REACHY_MINI_CUSTOM_PROFILE={selection}"
185
+ else:
186
+ lines.pop(i)
187
+ replaced = True
188
+ break
189
+ if selection and not replaced:
190
+ lines.append(f"REACHY_MINI_CUSTOM_PROFILE={selection}")
191
+ if selection is None and not env_path.exists():
192
+ return
193
+ final_text = "\n".join(lines) + "\n"
194
+ env_path.write_text(final_text, encoding="utf-8")
195
+ logger.info("Persisted startup personality to %s", env_path)
196
+ try:
197
+ from dotenv import load_dotenv
198
+
199
+ load_dotenv(dotenv_path=str(env_path), override=True)
200
+ except Exception:
201
+ pass
202
+ except Exception as e:
203
+ logger.warning("Failed to persist REACHY_MINI_CUSTOM_PROFILE: %s", e)
204
+
205
+ def _read_persisted_personality(self) -> Optional[str]:
206
+ """Read persisted startup personality from instance .env (if any)."""
207
+ if not self._instance_path:
208
+ return None
209
+ env_path = Path(self._instance_path) / ".env"
210
+ try:
211
+ if env_path.exists():
212
+ for ln in env_path.read_text(encoding="utf-8").splitlines():
213
+ if ln.strip().startswith("REACHY_MINI_CUSTOM_PROFILE="):
214
+ _, _, val = ln.partition("=")
215
+ v = val.strip()
216
+ return v or None
217
+ except Exception:
218
+ pass
219
+ return None
220
+
221
+ def _init_settings_ui_if_needed(self) -> None:
222
+ """Attach minimal settings UI to the settings app.
223
+
224
+ Always mounts the UI when a settings_app is provided so that users
225
+ see a confirmation message even if the API key is already configured.
226
+ """
227
+ if self._settings_initialized:
228
+ return
229
+ if self._settings_app is None:
230
+ return
231
+
232
+ static_dir = Path(__file__).parent / "static"
233
+ index_file = static_dir / "index.html"
234
+
235
+ if hasattr(self._settings_app, "mount"):
236
+ try:
237
+ # Serve /static/* assets
238
+ self._settings_app.mount("/static", StaticFiles(directory=str(static_dir)), name="static")
239
+ except Exception:
240
+ pass
241
+
242
+ class ApiKeyPayload(BaseModel):
243
+ openai_api_key: str
244
+
245
+ # GET / -> redirect to /dashboard (the receptionist control room)
246
+ @self._settings_app.get("/")
247
+ def _root() -> RedirectResponse:
248
+ return RedirectResponse(url="/dashboard")
249
+
250
+ # GET /favicon.ico -> optional, avoid noisy 404s on some browsers
251
+ @self._settings_app.get("/favicon.ico")
252
+ def _favicon() -> Response:
253
+ return Response(status_code=204)
254
+
255
+ # GET /status -> whether key is set
256
+ @self._settings_app.get("/status")
257
+ def _status() -> JSONResponse:
258
+ has_key = bool(config.OPENAI_API_KEY and str(config.OPENAI_API_KEY).strip())
259
+ return JSONResponse({"has_key": has_key})
260
+
261
+ # GET /ready -> whether backend finished loading tools
262
+ @self._settings_app.get("/ready")
263
+ def _ready() -> JSONResponse:
264
+ try:
265
+ mod = sys.modules.get("reachy_mini_receptionist.tools.core_tools")
266
+ ready = bool(getattr(mod, "_TOOLS_INITIALIZED", False)) if mod else False
267
+ except Exception:
268
+ ready = False
269
+ return JSONResponse({"ready": ready})
270
+
271
+ # POST /openai_api_key -> set/persist key
272
+ @self._settings_app.post("/openai_api_key")
273
+ def _set_key(payload: ApiKeyPayload) -> JSONResponse:
274
+ key = (payload.openai_api_key or "").strip()
275
+ if not key:
276
+ return JSONResponse({"ok": False, "error": "empty_key"}, status_code=400)
277
+ self._persist_api_key(key)
278
+ return JSONResponse({"ok": True})
279
+
280
+ # POST /validate_api_key -> validate key without persisting it
281
+ @self._settings_app.post("/validate_api_key")
282
+ async def _validate_key(payload: ApiKeyPayload) -> JSONResponse:
283
+ key = (payload.openai_api_key or "").strip()
284
+ if not key:
285
+ return JSONResponse({"valid": False, "error": "empty_key"}, status_code=400)
286
+
287
+ # Try to validate by checking if we can fetch the models
288
+ try:
289
+ import httpx
290
+
291
+ headers = {"Authorization": f"Bearer {key}", "Content-Type": "application/json"}
292
+ async with httpx.AsyncClient(timeout=10.0) as client:
293
+ response = await client.get("https://api.openai.com/v1/models", headers=headers)
294
+ if response.status_code == 200:
295
+ return JSONResponse({"valid": True})
296
+ elif response.status_code == 401:
297
+ return JSONResponse({"valid": False, "error": "invalid_api_key"}, status_code=401)
298
+ else:
299
+ return JSONResponse(
300
+ {"valid": False, "error": "validation_failed"}, status_code=response.status_code
301
+ )
302
+ except Exception as e:
303
+ logger.warning(f"API key validation failed: {e}")
304
+ return JSONResponse({"valid": False, "error": "validation_error"}, status_code=500)
305
+
306
+ self._settings_initialized = True
307
+
308
+ def launch(self) -> None:
309
+ """Start the recorder/player and run the async processing loops.
310
+
311
+ If the OpenAI key is missing, expose a tiny settings UI via the
312
+ Reachy Mini settings server to collect it before starting streams.
313
+ """
314
+ self._stop_event.clear()
315
+
316
+ # Try to load an existing instance .env first (covers subsequent runs)
317
+ if self._instance_path:
318
+ try:
319
+ from dotenv import load_dotenv
320
+
321
+ from reachy_mini_receptionist.config import set_custom_profile
322
+
323
+ env_path = Path(self._instance_path) / ".env"
324
+ if env_path.exists():
325
+ load_dotenv(dotenv_path=str(env_path), override=True)
326
+ # Update config with newly loaded values
327
+ new_key = os.getenv("OPENAI_API_KEY", "").strip()
328
+ if new_key:
329
+ try:
330
+ config.OPENAI_API_KEY = new_key
331
+ except Exception:
332
+ pass
333
+ if LOCKED_PROFILE is None:
334
+ new_profile = os.getenv("REACHY_MINI_CUSTOM_PROFILE")
335
+ if new_profile is not None:
336
+ try:
337
+ set_custom_profile(new_profile.strip() or None)
338
+ except Exception:
339
+ pass # Best-effort profile update
340
+ except Exception:
341
+ pass # Instance .env loading is optional; continue with defaults
342
+
343
+ # If key is still missing, try to download one from HuggingFace
344
+ if not (config.OPENAI_API_KEY and str(config.OPENAI_API_KEY).strip()):
345
+ logger.info("OPENAI_API_KEY not set, attempting to download from HuggingFace...")
346
+ try:
347
+ from gradio_client import Client
348
+ client = Client("HuggingFaceM4/gradium_setup", verbose=False)
349
+ key, status = client.predict(api_name="/claim_b_key")
350
+ if key and key.strip():
351
+ logger.info("Successfully downloaded API key from HuggingFace")
352
+ # Persist it immediately
353
+ self._persist_api_key(key)
354
+ except Exception as e:
355
+ logger.warning(f"Failed to download API key from HuggingFace: {e}")
356
+
357
+ # Always expose settings UI if a settings app is available
358
+ # (do this AFTER loading/downloading the key so status endpoint sees the right value)
359
+ self._init_settings_ui_if_needed()
360
+
361
+ # If key is still missing -> wait until provided via the settings UI
362
+ if not (config.OPENAI_API_KEY and str(config.OPENAI_API_KEY).strip()):
363
+ logger.warning("OPENAI_API_KEY not found. Open the app settings page to enter it.")
364
+ # Poll until the key becomes available (set via the settings UI)
365
+ try:
366
+ while not (config.OPENAI_API_KEY and str(config.OPENAI_API_KEY).strip()):
367
+ time.sleep(0.2)
368
+ except KeyboardInterrupt:
369
+ logger.info("Interrupted while waiting for API key.")
370
+ return
371
+
372
+ # Start media after key is set/available
373
+ self._robot.media.start_recording()
374
+ self._robot.media.start_playing()
375
+ time.sleep(1) # give some time to the pipelines to start
376
+
377
+ async def runner() -> None:
378
+ # Capture loop for cross-thread personality actions
379
+ loop = asyncio.get_running_loop()
380
+ self._asyncio_loop = loop # type: ignore[assignment]
381
+ # Mount personality routes now that loop and handler are available
382
+ try:
383
+ if self._settings_app is not None:
384
+ mount_personality_routes(
385
+ self._settings_app,
386
+ self.handler,
387
+ lambda: self._asyncio_loop,
388
+ persist_personality=self._persist_personality,
389
+ get_persisted_personality=self._read_persisted_personality,
390
+ )
391
+ except Exception:
392
+ pass
393
+ self._tasks = [
394
+ asyncio.create_task(self.handler.start_up(), name="openai-handler"),
395
+ asyncio.create_task(self.record_loop(), name="stream-record-loop"),
396
+ asyncio.create_task(self.play_loop(), name="stream-play-loop"),
397
+ ]
398
+ try:
399
+ await asyncio.gather(*self._tasks)
400
+ except asyncio.CancelledError:
401
+ logger.info("Tasks cancelled during shutdown")
402
+ finally:
403
+ # Ensure handler connection is closed
404
+ await self.handler.shutdown()
405
+
406
+ asyncio.run(runner())
407
+
408
+ def close(self) -> None:
409
+ """Stop the stream and underlying media pipelines.
410
+
411
+ This method:
412
+ - Stops audio recording and playback first
413
+ - Sets the stop event to signal async loops to terminate
414
+ - Cancels all pending async tasks (openai-handler, record-loop, play-loop)
415
+ """
416
+ logger.info("Stopping LocalStream...")
417
+
418
+ # Stop media pipelines FIRST before cancelling async tasks
419
+ # This ensures clean shutdown before PortAudio cleanup
420
+ try:
421
+ self._robot.media.stop_recording()
422
+ except Exception as e:
423
+ logger.debug(f"Error stopping recording (may already be stopped): {e}")
424
+
425
+ try:
426
+ self._robot.media.stop_playing()
427
+ except Exception as e:
428
+ logger.debug(f"Error stopping playback (may already be stopped): {e}")
429
+
430
+ # Now signal async loops to stop
431
+ self._stop_event.set()
432
+
433
+ # Cancel all running tasks
434
+ for task in self._tasks:
435
+ if not task.done():
436
+ task.cancel()
437
+
438
+ def clear_audio_queue(self) -> None:
439
+ """Flush the player's appsrc to drop any queued audio immediately."""
440
+ logger.info("User intervention: flushing player queue")
441
+ if self._robot.media.backend == MediaBackend.GSTREAMER:
442
+ # Directly flush gstreamer audio pipe
443
+ self._robot.media.audio.clear_player()
444
+ elif self._robot.media.backend == MediaBackend.DEFAULT or self._robot.media.backend == MediaBackend.DEFAULT_NO_VIDEO:
445
+ self._robot.media.audio.clear_output_buffer()
446
+ self.handler.output_queue = asyncio.Queue()
447
+
448
+ async def record_loop(self) -> None:
449
+ """Read mic frames from the recorder and forward them to the handler."""
450
+ input_sample_rate = self._robot.media.get_input_audio_samplerate()
451
+ logger.debug(f"Audio recording started at {input_sample_rate} Hz")
452
+
453
+ while not self._stop_event.is_set():
454
+ audio_frame = self._robot.media.get_audio_sample()
455
+ if audio_frame is not None:
456
+ await self.handler.receive((input_sample_rate, audio_frame))
457
+ await asyncio.sleep(0) # avoid busy loop
458
+
459
+ async def play_loop(self) -> None:
460
+ """Fetch outputs from the handler: log text and play audio frames."""
461
+ while not self._stop_event.is_set():
462
+ handler_output = await self.handler.emit()
463
+
464
+ if isinstance(handler_output, AdditionalOutputs):
465
+ for msg in handler_output.args:
466
+ content = msg.get("content", "")
467
+ if isinstance(content, str):
468
+ logger.info(
469
+ "role=%s content=%s",
470
+ msg.get("role"),
471
+ content if len(content) < 500 else content[:500] + "…",
472
+ )
473
+
474
+ elif isinstance(handler_output, tuple):
475
+ input_sample_rate, audio_data = handler_output
476
+ output_sample_rate = self._robot.media.get_output_audio_samplerate()
477
+
478
+ # Reshape if needed
479
+ if audio_data.ndim == 2:
480
+ # Scipy channels last convention
481
+ if audio_data.shape[1] > audio_data.shape[0]:
482
+ audio_data = audio_data.T
483
+ # Multiple channels -> Mono channel
484
+ if audio_data.shape[1] > 1:
485
+ audio_data = audio_data[:, 0]
486
+
487
+ # Cast if needed
488
+ audio_frame = audio_to_float32(audio_data)
489
+
490
+ # Drop empty / sub-sample chunks. Some Gemini Live preview
491
+ # models (e.g. gemini-3.1-flash-live-preview as of
492
+ # 2026-05-21) emit 2-byte placeholder chunks. Without
493
+ # this guard, scipy.signal.resample below does
494
+ # `len_in / len_out` and crashes with ZeroDivisionError
495
+ # when the resampled target length rounds to 0,
496
+ # killing the whole console play_loop and the app with
497
+ # it. Skipping is the safe behaviour — a truly empty
498
+ # chunk has nothing to play anyway.
499
+ if audio_frame.size == 0 or len(audio_frame) < 2:
500
+ logger.debug(
501
+ "play_loop: skipping near-empty audio frame "
502
+ "(len=%d, input_sr=%s, output_sr=%s)",
503
+ len(audio_frame), input_sample_rate, output_sample_rate,
504
+ )
505
+ await asyncio.sleep(0)
506
+ continue
507
+
508
+ # Resample if needed
509
+ if input_sample_rate != output_sample_rate:
510
+ target_len = int(len(audio_frame) * output_sample_rate / input_sample_rate)
511
+ if target_len < 1:
512
+ # Resample would divide by zero — skip rather than crash.
513
+ logger.debug(
514
+ "play_loop: skipping frame that would resample to 0 "
515
+ "samples (len=%d, %s->%s)",
516
+ len(audio_frame), input_sample_rate, output_sample_rate,
517
+ )
518
+ await asyncio.sleep(0)
519
+ continue
520
+ audio_frame = resample(audio_frame, target_len)
521
+
522
+ self._robot.media.push_audio_sample(audio_frame)
523
+
524
+ else:
525
+ logger.debug("Ignoring output type=%s", type(handler_output).__name__)
526
+
527
+ await asyncio.sleep(0) # yield to event loop
src/reachy_mini_receptionist/conversation_controller.py ADDED
@@ -0,0 +1,586 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Conversation controller — translates events into session state transitions.
2
+
3
+ This is the workflow engine. It listens for:
4
+ - Face state events from FaceRecognitionWorker.
5
+ - Tool call completions from the realtime handler.
6
+
7
+ And decides which ReceptionState transition should fire on the SessionManager.
8
+
9
+ Also exposes ``next_action_hint(state)`` — short directives the realtime
10
+ handler appends to its session context push so the LLM gets per-state
11
+ workflow guidance dynamically, instead of having the whole flow baked into
12
+ the system prompt.
13
+ """
14
+ from __future__ import annotations
15
+
16
+ import asyncio
17
+ import logging
18
+ from typing import Any, Optional
19
+
20
+ from reachy_mini_receptionist.receptionist_state import ReceptionState
21
+ from reachy_mini_receptionist.session_manager import SessionManager
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ # States in which an "unknown" face transitioning to "known" should still
27
+ # short-circuit identification (we haven't yet confirmed the visitor's name
28
+ # through other channels).
29
+ _EARLY_IDENTIFICATION_STATES: frozenset[ReceptionState] = frozenset({
30
+ ReceptionState.IDLE,
31
+ ReceptionState.VISITOR_DETECTED,
32
+ ReceptionState.GREETING,
33
+ ReceptionState.ASK_NAME,
34
+ # Include MULTIPLE_PEOPLE so the controller transitions back out as
35
+ # soon as the crowd thins to one face.
36
+ ReceptionState.MULTIPLE_PEOPLE,
37
+ })
38
+
39
+ # States in which losing the face means the visitor walked away and we should
40
+ # reset the session for the next person. Inside flow states (e.g. ASK_NAME),
41
+ # losing the face briefly just means they turned their head — don't reset.
42
+ _RESET_ON_FACE_LOST_STATES: frozenset[ReceptionState] = frozenset({
43
+ ReceptionState.NOTIFIED,
44
+ ReceptionState.NO_APPOINTMENT,
45
+ ReceptionState.EMAIL_FAILED,
46
+ ReceptionState.COMPLETE,
47
+ ReceptionState.ERROR,
48
+ })
49
+
50
+
51
+ class ConversationController:
52
+ """Wire face events + tool completions into session state transitions.
53
+
54
+ Stateless aside from the SessionManager it operates on. Safe to call
55
+ handlers from any thread because SessionManager handles locking.
56
+
57
+ Optionally records every completed visit to a ``VisitorLog`` when the
58
+ session resets out of a meaningful state (visitor name, recognized
59
+ face, or employee was set).
60
+ """
61
+
62
+ def __init__(self, session_manager: SessionManager) -> None:
63
+ self._session = session_manager
64
+
65
+ # ------------------------------------------------------------------
66
+ # Face events
67
+ # ------------------------------------------------------------------
68
+
69
+ def on_face_event(self, event: dict[str, Any]) -> None:
70
+ """Translate a ``face_state_changed`` event into a session transition.
71
+
72
+ Event shape (from ``FaceRecognitionWorker._update_stable_state``):
73
+ state: "no_face" | "unknown" | "known"
74
+ name: str | None (populated when state == "known")
75
+ previous_state, previous_name, lbph_confidence, detection_confidence
76
+ """
77
+ state = event.get("state")
78
+ name = event.get("name")
79
+ current = self._session.current_state
80
+ snapshot = self._session.session
81
+
82
+ if state == "known" and name:
83
+ # Already-completed-flow guard: if we already emailed the host
84
+ # for this visitor, don't re-enter RECOGNIZED — that re-triggers
85
+ # the whole check-in (get_today_calendar + send_email) and
86
+ # produces duplicate emails when the face momentarily flickers
87
+ # to MULTIPLE_PEOPLE and back.
88
+ already_done = (
89
+ bool(snapshot.email_sent_to)
90
+ and (snapshot.visitor_name or "").strip().lower() == name.strip().lower()
91
+ )
92
+ if already_done:
93
+ if current != ReceptionState.NOTIFIED:
94
+ logger.info(
95
+ "Face %r returned after notified — restoring NOTIFIED instead of re-running flow",
96
+ name,
97
+ )
98
+ self._session.transition(
99
+ ReceptionState.NOTIFIED,
100
+ recognized_face_name=name,
101
+ )
102
+ else:
103
+ self._session.update(recognized_face_name=name)
104
+ return
105
+
106
+ if current in _EARLY_IDENTIFICATION_STATES:
107
+ # If the visitor has ALREADY told us a different name in
108
+ # this session (visitor_name set by register_guest), trust
109
+ # speech over face. The face recognizer can mis-match
110
+ # (LBPH on a single crop is noisy under different lighting)
111
+ # and the visitor explicitly correcting "no, I'm X" should
112
+ # override the camera. Only auto-promote face -> visitor
113
+ # when no speech-confirmed name exists yet.
114
+ speech_confirmed = (snapshot.visitor_name or "").strip()
115
+ if speech_confirmed and speech_confirmed.lower() != name.strip().lower():
116
+ logger.info(
117
+ "Face matched %r but visitor already confirmed %r — keeping speech",
118
+ name, speech_confirmed,
119
+ )
120
+ self._session.update(recognized_face_name=name)
121
+ return
122
+ self._session.transition(
123
+ ReceptionState.RECOGNIZED,
124
+ visitor_name=name,
125
+ recognized_face_name=name,
126
+ )
127
+ # Same auto-resolve as the register_guest path so the
128
+ # face-recognition shortcut also reaches APPOINTMENT_MATCHED
129
+ # without depending on the LLM to call get_today_calendar.
130
+ try:
131
+ self._auto_resolve_appointment(name)
132
+ except Exception as e:
133
+ logger.warning(
134
+ "Auto-resolve appointment after face match failed: %s", e,
135
+ )
136
+ else:
137
+ # Past identification — just record the face match.
138
+ self._session.update(recognized_face_name=name)
139
+ return
140
+
141
+ if state == "unknown":
142
+ if current in (ReceptionState.IDLE, ReceptionState.MULTIPLE_PEOPLE):
143
+ self._session.transition(ReceptionState.VISITOR_DETECTED)
144
+ return
145
+
146
+ if state == "multiple":
147
+ if current != ReceptionState.MULTIPLE_PEOPLE:
148
+ self._session.transition(ReceptionState.MULTIPLE_PEOPLE)
149
+ return
150
+
151
+ if state == "no_face":
152
+ if current in _RESET_ON_FACE_LOST_STATES:
153
+ logger.info("Face lost in terminal state %s — resetting session", current.value)
154
+ # SessionManager.reset() handles persisting the pre-reset
155
+ # snapshot to the visitor log on its own.
156
+ self._session.reset()
157
+ return
158
+
159
+ # ------------------------------------------------------------------
160
+ # Tool completions
161
+ # ------------------------------------------------------------------
162
+
163
+ async def on_tool_completed_async(
164
+ self,
165
+ tool_name: str,
166
+ args: dict[str, Any],
167
+ result: dict[str, Any],
168
+ ) -> None:
169
+ """Async-safe wrapper for ``on_tool_completed``.
170
+
171
+ The realtime event loop reaches the controller via this method.
172
+ Tools whose handlers need an iCal fetch (``register_guest`` triggers
173
+ ``_auto_resolve_appointment``) pre-fetch the calendar via
174
+ ``asyncio.to_thread`` so the audio loop never blocks on the
175
+ synchronous httpx call inside ``ical_calendar.fetch_appointments``.
176
+ """
177
+ if not isinstance(result, dict):
178
+ return
179
+ explicit_failure = "error" in result and result.get("success") is False
180
+ if explicit_failure:
181
+ self.on_tool_completed(tool_name, args, result)
182
+ return
183
+
184
+ appointments: Optional[list[dict[str, Any]]] = None
185
+ if tool_name in ("register_guest", "lookup_employee"):
186
+ try:
187
+ from reachy_mini_receptionist import calendar_data
188
+ appointments = await calendar_data.get_appointments_async()
189
+ except Exception as e:
190
+ logger.debug("Pre-fetch appointments failed: %s", e)
191
+ appointments = None
192
+
193
+ self._dispatch_tool_completion(tool_name, args, result, appointments)
194
+
195
+ def on_tool_completed(
196
+ self,
197
+ tool_name: str,
198
+ args: dict[str, Any],
199
+ result: dict[str, Any],
200
+ ) -> None:
201
+ """Translate a successful tool call into a session transition.
202
+
203
+ Failures are logged but never transition the session into ERROR
204
+ automatically — that's the caller's policy choice.
205
+
206
+ Synchronous variant — calls into ``_auto_resolve_appointment`` will
207
+ block on the iCal HTTP fetch. Safe from background threads (face
208
+ worker). Async callers on the realtime event loop should use
209
+ ``on_tool_completed_async`` instead so the iCal call gets
210
+ off-thread.
211
+ """
212
+ if not isinstance(result, dict):
213
+ return
214
+ explicit_failure = "error" in result and result.get("success") is False
215
+ if explicit_failure:
216
+ logger.debug("Tool %s reported failure: %s", tool_name, result.get("error"))
217
+ return
218
+ self._dispatch_tool_completion(tool_name, args, result, None)
219
+
220
+ def _dispatch_tool_completion(
221
+ self,
222
+ tool_name: str,
223
+ args: dict[str, Any],
224
+ result: dict[str, Any],
225
+ appointments: Optional[list[dict[str, Any]]],
226
+ ) -> None:
227
+ """Core transition logic shared by sync + async entry points.
228
+
229
+ ``appointments`` is the optional pre-fetched calendar (set by the
230
+ async entry point so the iCal HTTP call doesn't run on the
231
+ realtime audio loop). When ``None``, ``_auto_resolve_appointment``
232
+ falls back to its own sync fetch.
233
+ """
234
+ if tool_name == "register_guest":
235
+ # Only transition on actual SUCCESS. If register_guest was
236
+ # blocked (no_confirmation, name_is_filler, hallucinated
237
+ # chatter, no_face, etc.) it returns success=False — we
238
+ # MUST NOT advance the session in that case, or the visitor
239
+ # ends up locked into a bogus name like "Community" with
240
+ # no path to fix it.
241
+ if not result.get("success"):
242
+ logger.debug(
243
+ "register_guest returned success=False (reason=%r) — not transitioning",
244
+ result.get("blocked_reason") or result.get("error"),
245
+ )
246
+ return
247
+ name = (args.get("name") or "").strip()
248
+ if name:
249
+ self._session.transition(
250
+ ReceptionState.RECOGNIZED,
251
+ visitor_name=name,
252
+ recognized_face_name=name,
253
+ )
254
+ # The LLM was supposed to follow the RECOGNIZED hint with a
255
+ # get_today_calendar tool call, but it kept asking the visitor
256
+ # "who are you here to see?" instead — emails never went out.
257
+ # Pull the calendar synchronously from the backend and dispatch
258
+ # APPOINTMENT_MATCHED / NO_APPOINTMENT ourselves so the bot is
259
+ # never blocked on the LLM remembering to look something up.
260
+ try:
261
+ self._auto_resolve_appointment(name, appointments)
262
+ except Exception as e:
263
+ logger.warning(
264
+ "Auto-resolve appointment after register_guest failed: %s", e,
265
+ )
266
+
267
+ elif tool_name == "get_today_calendar":
268
+ calendar = result.get("calendar") or []
269
+ snap = self._session.session
270
+ # Prefer explicit visitor_name (operator typed/spoke it), fall
271
+ # back to a recognized face match (returning guest whose name
272
+ # we already trust because LBPH matched their saved crop).
273
+ visitor_name = snap.visitor_name or snap.recognized_face_name
274
+ if not visitor_name:
275
+ # LLM fetched the calendar as a generic lookup (often during
276
+ # idle exploration) before identifying the visitor. There is
277
+ # no name to match against yet, so don't change state.
278
+ logger.debug(
279
+ "get_today_calendar fired without a visitor_name — skipping transition",
280
+ )
281
+ return
282
+ matched = self._match_appointment(calendar, visitor_name)
283
+ updates: dict[str, Any] = {}
284
+ # If we matched purely off the face name, promote it into
285
+ # visitor_name so downstream (send_email guard, dashboard,
286
+ # visitor log) treats it as a confirmed identity.
287
+ if not snap.visitor_name:
288
+ updates["visitor_name"] = visitor_name
289
+ if matched:
290
+ updates["matched_appointment"] = matched
291
+ updates["employee_name"] = matched.get("visiting")
292
+ self._session.transition(ReceptionState.APPOINTMENT_MATCHED, **updates)
293
+ else:
294
+ updates["error_message"] = f"No appointment found for {visitor_name!r}"
295
+ self._session.transition(ReceptionState.NO_APPOINTMENT, **updates)
296
+
297
+ elif tool_name == "send_email":
298
+ # Only flip to NOTIFIED if the tool actually succeeded. The
299
+ # send_email tool can refuse (placeholder address, no visitor
300
+ # identity yet, duplicate-blocked, Resend HTTP error) — those
301
+ # all return success=False, and the dashboard must not lie
302
+ # "NOTIFIED" when no email left the system.
303
+ to = (args.get("to") or "").strip()
304
+ send_ok = bool(result.get("success"))
305
+ if to and send_ok:
306
+ self._session.transition(
307
+ ReceptionState.NOTIFIED,
308
+ email_sent_to=to,
309
+ )
310
+ elif to and not send_ok:
311
+ logger.info(
312
+ "send_email returned success=False (blocked_reason=%s) — "
313
+ "NOT transitioning to NOTIFIED",
314
+ result.get("blocked_reason") or result.get("error"),
315
+ )
316
+
317
+ elif tool_name == "lookup_employee":
318
+ # Walk-in path: visitor named the host instead of themselves.
319
+ # On hit, drop the synthetic appointment into the session so the
320
+ # existing APPOINTMENT_MATCHED -> send_email -> NOTIFIED path
321
+ # works unchanged. On miss, surface UNKNOWN_EMPLOYEE so the
322
+ # bot tells the visitor that name isn't on the list.
323
+ found = bool(result.get("found"))
324
+ if found:
325
+ emp = result.get("employee") or {}
326
+ emp_email = (emp.get("email") or "").strip()
327
+ emp_name = (emp.get("name") or args.get("name") or "").strip()
328
+ if emp_email:
329
+ snap = self._session.session
330
+ # Trust the face DB: if LBPH already recognised this
331
+ # visitor (recognized_face_name), promote that into
332
+ # visitor_name so send_email's identity guard passes
333
+ # without forcing the bot to ask the name again. This
334
+ # is the "returning known guest came back to see X"
335
+ # path — they shouldn't be re-prompted for their name.
336
+ visitor = snap.visitor_name or snap.recognized_face_name
337
+
338
+ # If the visitor is known AND today's calendar has a real
339
+ # appointment for them with this host, prefer that real
340
+ # appointment over a synthetic walk-in. Otherwise the
341
+ # host's notification email loses the scheduled time/note
342
+ # and reads "Walk-in visitor has arrived" for a meeting
343
+ # that was actually on the calendar.
344
+ real_appt: Optional[dict[str, Any]] = None
345
+ if visitor and appointments:
346
+ candidate = self._match_appointment(appointments, visitor)
347
+ if (
348
+ candidate
349
+ and (candidate.get("visiting") or "").strip().lower()
350
+ == emp_email.lower()
351
+ ):
352
+ real_appt = candidate
353
+
354
+ if real_appt is not None:
355
+ matched_appt = real_appt
356
+ else:
357
+ matched_appt = {
358
+ "time": "now",
359
+ "name": visitor or "Walk-in visitor",
360
+ "note": f"Walk-in to see {emp_name}",
361
+ "visiting": emp_email,
362
+ }
363
+ updates: dict[str, Any] = {
364
+ "matched_appointment": matched_appt,
365
+ "employee_name": emp_email,
366
+ }
367
+ if visitor and not snap.visitor_name:
368
+ updates["visitor_name"] = visitor
369
+ self._session.transition(ReceptionState.APPOINTMENT_MATCHED, **updates)
370
+ else:
371
+ query = (args.get("name") or "").strip()
372
+ self._session.transition(
373
+ ReceptionState.UNKNOWN_EMPLOYEE,
374
+ error_message=f"No directory match for {query!r}",
375
+ )
376
+
377
+ @staticmethod
378
+ def _match_appointment(
379
+ calendar: list[dict[str, Any]],
380
+ visitor_name: Optional[str],
381
+ ) -> Optional[dict[str, Any]]:
382
+ """Case-insensitive name match against today's calendar entries.
383
+
384
+ Matching is layered so a visitor who says just their first name
385
+ ("Rohan") still resolves to a calendar entry like "Rohan Verma":
386
+
387
+ 1. Exact match on the full string.
388
+ 2. Calendar entry's first whitespace-delimited token equals the
389
+ visitor string ("Rohan" == first(\"Rohan Verma\")).
390
+ 3. Substring of the calendar entry (\"rohan\" in \"rohan verma\").
391
+
392
+ Each layer returns the FIRST hit so we never silently switch which
393
+ calendar entry a visitor is mapped to. The minimum length guard
394
+ (>= 2 chars) keeps single-letter transcripts from matching half
395
+ the calendar.
396
+ """
397
+ if not visitor_name:
398
+ return None
399
+ target = visitor_name.strip().lower()
400
+ if len(target) < 2:
401
+ return None
402
+ for appt in calendar:
403
+ if (appt.get("name") or "").strip().lower() == target:
404
+ return appt
405
+ for appt in calendar:
406
+ name = (appt.get("name") or "").strip().lower()
407
+ tokens = name.split()
408
+ if tokens and tokens[0] == target:
409
+ return appt
410
+ for appt in calendar:
411
+ name = (appt.get("name") or "").strip().lower()
412
+ if target in name:
413
+ return appt
414
+ return None
415
+
416
+ def _auto_resolve_appointment(
417
+ self,
418
+ visitor_name: str,
419
+ appointments: Optional[list[dict[str, Any]]] = None,
420
+ ) -> None:
421
+ """Look up today's calendar for ``visitor_name`` and dispatch.
422
+
423
+ Called from the RECOGNIZED transition in both the register_guest
424
+ path and the face-recognition path so the bot doesn't have to
425
+ depend on the LLM calling get_today_calendar — production showed
426
+ the LLM acknowledging the visitor and then improvising next-step
427
+ questions instead of running the tool, so the email never went out.
428
+
429
+ ``appointments`` is an optional pre-fetched list. Async callers on
430
+ the realtime event loop preload it via ``calendar_data.get_appointments_async``
431
+ so the synchronous iCal HTTP call doesn't block the audio loop here.
432
+ When omitted, falls back to a sync fetch (safe from background threads
433
+ like the face worker).
434
+
435
+ Dispatches:
436
+ - APPOINTMENT_MATCHED with the matched appointment + host email,
437
+ if today's iCal has an entry whose ``name`` matches.
438
+ - NO_APPOINTMENT otherwise. The bot then offers to take a message
439
+ or route via lookup_employee.
440
+ """
441
+ if not visitor_name:
442
+ return
443
+ if appointments is None:
444
+ from reachy_mini_receptionist import calendar_data
445
+ appointments = calendar_data.get_appointments()
446
+ matched = self._match_appointment(appointments, visitor_name)
447
+ if matched:
448
+ self._session.transition(
449
+ ReceptionState.APPOINTMENT_MATCHED,
450
+ matched_appointment=matched,
451
+ employee_name=matched.get("visiting"),
452
+ )
453
+ logger.info(
454
+ "Auto-resolved appointment for %r -> %s",
455
+ visitor_name, matched.get("visiting"),
456
+ )
457
+ else:
458
+ self._session.transition(
459
+ ReceptionState.NO_APPOINTMENT,
460
+ error_message=f"No appointment found for {visitor_name!r}",
461
+ )
462
+ logger.info("Auto-resolve: no appointment for %r", visitor_name)
463
+
464
+
465
+ # ----------------------------------------------------------------------
466
+ # Per-state workflow hints
467
+ # ----------------------------------------------------------------------
468
+ # These get appended to the session context push so the LLM knows what to
469
+ # do next — without the workflow being hardcoded in the system prompt.
470
+ # Keep each hint short (one or two sentences) and concrete. The LLM has
471
+ # already been told to wait for the user to speak before responding; these
472
+ # hints describe what to do *when* the user speaks.
473
+
474
+ _NEXT_ACTION_HINTS: dict[ReceptionState, str] = {
475
+ # IDLE is the "no visitor yet" state — normally we stay silent. BUT if a
476
+ # visitor speaks before the face worker has stabilised (camera obscured,
477
+ # off-angle, dim light), the bot would otherwise just greet generically
478
+ # and never advance. So when the user speaks during IDLE, treat the
479
+ # utterance as the start of the flow and dispatch immediately to the
480
+ # right tool based on what they said.
481
+ ReceptionState.IDLE: (
482
+ "If the visitor speaks first, just be conversational — greet them "
483
+ "and figure out who they are or who they want to see. If they named "
484
+ "themselves, confirm once ('I heard <name>, right?') then call "
485
+ "register_guest(name, confirmed=true). If they named a host, call "
486
+ "lookup_employee. ALWAYS respond — never go silent. If you mishear, "
487
+ "say so and ask again naturally; don't lecture."
488
+ ),
489
+ ReceptionState.VISITOR_DETECTED: (
490
+ "Greet the visitor warmly. Ask their name or who they're here to see "
491
+ "if they haven't said yet. When they tell you a name, repeat it back "
492
+ "briefly to confirm; if they say yes, call register_guest with the "
493
+ "EXACT name you heard from the visitor and confirmed=true. Never "
494
+ "invent a name. If they're here to see someone, call lookup_employee. "
495
+ "Be conversational — short, friendly replies. ALWAYS respond to "
496
+ "whatever they say; never go silent."
497
+ ),
498
+ ReceptionState.GREETING: (
499
+ "Greet the visitor. If they haven't said why they're here yet, "
500
+ "ask whether they have an appointment or are here to see someone."
501
+ ),
502
+ ReceptionState.ASK_NAME: (
503
+ "Ask the visitor their name or who they're here to see. When they "
504
+ "answer, confirm the name back briefly and if they say yes, call "
505
+ "register_guest(confirmed=true). If you genuinely couldn't hear, "
506
+ "ask once more naturally. Keep replies short and friendly."
507
+ ),
508
+ ReceptionState.MULTIPLE_PEOPLE: (
509
+ "More than one face is in view. Say 'I see more than one person — could "
510
+ "whoever's checking in step forward please?' Do NOT call register_guest, "
511
+ "lookup_employee, or send_email until the state changes back."
512
+ ),
513
+ ReceptionState.RECOGNIZED: (
514
+ "Acknowledge the visitor by name. "
515
+ "Then call get_today_calendar to look up their appointment."
516
+ ),
517
+ ReceptionState.CHECKING_APPOINTMENT: (
518
+ "Briefly let the visitor know you're checking the schedule."
519
+ ),
520
+ ReceptionState.APPOINTMENT_MATCHED: (
521
+ "Use the appointment= and employee= values from the context above. "
522
+ "Say something like: 'Great, I have you down for <appointment> with "
523
+ "<employee> — I'll let them know you're here.' Then immediately call "
524
+ "send_email to that host. If visitor= is empty, ask their name first."
525
+ ),
526
+ ReceptionState.NO_APPOINTMENT: (
527
+ "Politely tell the visitor you don't have them on today's schedule. "
528
+ "Offer to take a message or notify someone."
529
+ ),
530
+ ReceptionState.NOTIFYING_EMPLOYEE: (
531
+ "Briefly tell the visitor you're notifying their host."
532
+ ),
533
+ ReceptionState.NOTIFIED: (
534
+ "Use the employee= from context. Say: 'Done — I've notified <employee>. "
535
+ "Please have a seat, they'll be with you shortly.' Be warm, not robotic."
536
+ ),
537
+ ReceptionState.EMAIL_FAILED: (
538
+ "Apologize that you couldn't reach the host right now. "
539
+ "Suggest the visitor wait briefly while you try again."
540
+ ),
541
+ ReceptionState.WAITING: "",
542
+ ReceptionState.COMPLETE: (
543
+ "Thank the visitor warmly and wish them a good day."
544
+ ),
545
+ ReceptionState.UNKNOWN_EMPLOYEE: (
546
+ "Tell the visitor that name isn't in your directory. "
547
+ "Offer to find someone else who can help."
548
+ ),
549
+ ReceptionState.ERROR: (
550
+ "Apologize for the issue and ask the visitor to wait a moment."
551
+ ),
552
+ }
553
+
554
+
555
+ def next_action_hint(state: ReceptionState) -> str:
556
+ """Return a short workflow directive for the LLM based on the current state."""
557
+ return _NEXT_ACTION_HINTS.get(state, "")
558
+
559
+
560
+ # States that require the bot to speak IMMEDIATELY when entered, because the
561
+ # transition was triggered by an in-flight LLM response cycle (tool returned,
562
+ # state advanced — visitor is waiting for the bot to finish what it started).
563
+ # All others (face events, idle/reset) wait for the visitor to speak first.
564
+ _SPEAK_NOW_STATES: frozenset[ReceptionState] = frozenset({
565
+ # Greet the visitor as soon as the face is detected. Previously the bot
566
+ # would silently wait for the visitor to speak first, which gave the
567
+ # impression of an unresponsive robot — visitors often hesitate when
568
+ # they don't know if the bot is "on" yet.
569
+ ReceptionState.VISITOR_DETECTED,
570
+ ReceptionState.RECOGNIZED,
571
+ ReceptionState.APPOINTMENT_MATCHED,
572
+ ReceptionState.NO_APPOINTMENT,
573
+ ReceptionState.NOTIFIED,
574
+ ReceptionState.EMAIL_FAILED,
575
+ ReceptionState.UNKNOWN_EMPLOYEE,
576
+ })
577
+
578
+
579
+ def should_speak_immediately(state: ReceptionState) -> bool:
580
+ """True if entering ``state`` should trigger an immediate spoken response.
581
+
582
+ For these states the LLM is mid-flow (just ran a tool, state advanced)
583
+ and the visitor is waiting. For all other states (face events,
584
+ timeouts, manual resets) the bot waits for the visitor to speak.
585
+ """
586
+ return state in _SPEAK_NOW_STATES
src/reachy_mini_receptionist/dance_emotion_moves.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Dance and emotion moves for the movement queue system.
2
+
3
+ This module implements dance moves and emotions as Move objects that can be queued
4
+ and executed sequentially by the MovementManager.
5
+ """
6
+
7
+ from __future__ import annotations
8
+ import logging
9
+ from typing import Tuple
10
+
11
+ import numpy as np
12
+ from numpy.typing import NDArray
13
+
14
+ from reachy_mini.motion.move import Move
15
+ from reachy_mini.motion.recorded_move import RecordedMoves
16
+ from reachy_mini_dances_library.dance_move import DanceMove
17
+
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class DanceQueueMove(Move): # type: ignore
23
+ """Wrapper for dance moves to work with the movement queue system."""
24
+
25
+ def __init__(self, move_name: str):
26
+ """Initialize a DanceQueueMove."""
27
+ self.dance_move = DanceMove(move_name)
28
+ self.move_name = move_name
29
+
30
+ @property
31
+ def duration(self) -> float:
32
+ """Duration property required by official Move interface."""
33
+ return float(self.dance_move.duration)
34
+
35
+ def evaluate(self, t: float) -> tuple[NDArray[np.float64] | None, NDArray[np.float64] | None, float | None]:
36
+ """Evaluate dance move at time t."""
37
+ try:
38
+ # Get the pose from the dance move
39
+ head_pose, antennas, body_yaw = self.dance_move.evaluate(t)
40
+
41
+ # Convert to numpy array if antennas is tuple and return in official Move format
42
+ if isinstance(antennas, tuple):
43
+ antennas = np.array([antennas[0], antennas[1]])
44
+
45
+ return (head_pose, antennas, body_yaw)
46
+
47
+ except Exception as e:
48
+ logger.error(f"Error evaluating dance move '{self.move_name}' at t={t}: {e}")
49
+ # Return neutral pose on error
50
+ from reachy_mini.utils import create_head_pose
51
+
52
+ neutral_head_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
53
+ return (neutral_head_pose, np.array([0.0, 0.0], dtype=np.float64), 0.0)
54
+
55
+
56
+ class EmotionQueueMove(Move): # type: ignore
57
+ """Wrapper for emotion moves to work with the movement queue system."""
58
+
59
+ def __init__(self, emotion_name: str, recorded_moves: RecordedMoves):
60
+ """Initialize an EmotionQueueMove."""
61
+ self.emotion_move = recorded_moves.get(emotion_name)
62
+ self.emotion_name = emotion_name
63
+
64
+ @property
65
+ def duration(self) -> float:
66
+ """Duration property required by official Move interface."""
67
+ return float(self.emotion_move.duration)
68
+
69
+ def evaluate(self, t: float) -> tuple[NDArray[np.float64] | None, NDArray[np.float64] | None, float | None]:
70
+ """Evaluate emotion move at time t."""
71
+ try:
72
+ # Get the pose from the emotion move
73
+ head_pose, antennas, body_yaw = self.emotion_move.evaluate(t)
74
+
75
+ # Convert to numpy array if antennas is tuple and return in official Move format
76
+ if isinstance(antennas, tuple):
77
+ antennas = np.array([antennas[0], antennas[1]])
78
+
79
+ return (head_pose, antennas, body_yaw)
80
+
81
+ except Exception as e:
82
+ logger.error(f"Error evaluating emotion '{self.emotion_name}' at t={t}: {e}")
83
+ # Return neutral pose on error
84
+ from reachy_mini.utils import create_head_pose
85
+
86
+ neutral_head_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
87
+ return (neutral_head_pose, np.array([0.0, 0.0], dtype=np.float64), 0.0)
88
+
89
+
90
+ class GotoQueueMove(Move): # type: ignore
91
+ """Wrapper for goto moves to work with the movement queue system."""
92
+
93
+ def __init__(
94
+ self,
95
+ target_head_pose: NDArray[np.float32],
96
+ start_head_pose: NDArray[np.float32] | None = None,
97
+ target_antennas: Tuple[float, float] = (0, 0),
98
+ start_antennas: Tuple[float, float] | None = None,
99
+ target_body_yaw: float = 0,
100
+ start_body_yaw: float | None = None,
101
+ duration: float = 1.0,
102
+ ):
103
+ """Initialize a GotoQueueMove."""
104
+ self._duration = duration
105
+ self.target_head_pose = target_head_pose
106
+ self.start_head_pose = start_head_pose
107
+ self.target_antennas = target_antennas
108
+ self.start_antennas = start_antennas or (0, 0)
109
+ self.target_body_yaw = target_body_yaw
110
+ self.start_body_yaw = start_body_yaw or 0
111
+
112
+ @property
113
+ def duration(self) -> float:
114
+ """Duration property required by official Move interface."""
115
+ return self._duration
116
+
117
+ def evaluate(self, t: float) -> tuple[NDArray[np.float64] | None, NDArray[np.float64] | None, float | None]:
118
+ """Evaluate goto move at time t using linear interpolation."""
119
+ try:
120
+ from reachy_mini.utils import create_head_pose
121
+ from reachy_mini.utils.interpolation import linear_pose_interpolation
122
+
123
+ # Clamp t to [0, 1] for interpolation
124
+ t_clamped = max(0, min(1, t / self.duration))
125
+
126
+ # Use start pose if available, otherwise neutral
127
+ if self.start_head_pose is not None:
128
+ start_pose = self.start_head_pose
129
+ else:
130
+ start_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
131
+
132
+ # Interpolate head pose
133
+ head_pose = linear_pose_interpolation(start_pose, self.target_head_pose, t_clamped)
134
+
135
+ # Interpolate antennas - return as numpy array
136
+ antennas = np.array(
137
+ [
138
+ self.start_antennas[0] + (self.target_antennas[0] - self.start_antennas[0]) * t_clamped,
139
+ self.start_antennas[1] + (self.target_antennas[1] - self.start_antennas[1]) * t_clamped,
140
+ ],
141
+ dtype=np.float64,
142
+ )
143
+
144
+ # Interpolate body yaw
145
+ body_yaw = self.start_body_yaw + (self.target_body_yaw - self.start_body_yaw) * t_clamped
146
+
147
+ return (head_pose, antennas, body_yaw)
148
+
149
+ except Exception as e:
150
+ logger.error(f"Error evaluating goto move at t={t}: {e}")
151
+ # Return target pose on error - convert to float64
152
+ target_head_pose_f64 = self.target_head_pose.astype(np.float64)
153
+ target_antennas_array = np.array([self.target_antennas[0], self.target_antennas[1]], dtype=np.float64)
154
+ return (target_head_pose_f64, target_antennas_array, self.target_body_yaw)
src/reachy_mini_receptionist/employees.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Employee directory — read API used by tools, calendar, and the LLM.
2
+
3
+ Two-tier source:
4
+
5
+ 1. **Primary**: ``EmployeeStore`` (SQLite), populated and edited via the
6
+ dashboard's Employees panel.
7
+ 2. **Fallback**: the hardcoded ``_SEED_EMPLOYEES`` constant below. This
8
+ only takes effect when the store is unset (e.g. tests that import
9
+ ``employees`` directly) OR when the store is empty. On a real
10
+ deployment, ``main.py`` constructs the store and seeds it with
11
+ ``_SEED_EMPLOYEES`` on first run; after that, edits via the
12
+ dashboard are the source of truth.
13
+
14
+ Consumers (``lookup_employee``, ``find_email_for``, ``get_all_employees``,
15
+ ``format_for_llm``) keep the same signatures so ``calendar_data`` and the
16
+ ``lookup_employee`` tool need zero changes.
17
+ """
18
+ from __future__ import annotations
19
+
20
+ import logging
21
+ from typing import Any, List, Optional, TypedDict
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class Employee(TypedDict, total=False):
27
+ name: str
28
+ email: str
29
+ aliases: List[str]
30
+ title: Optional[str]
31
+
32
+
33
+ # Seed list intentionally empty. A brand-new install starts with zero
34
+ # employees and the dashboard's Employees panel shows an empty state
35
+ # with a "+ Add" button — that's a clearer first-run UX than pre-loading
36
+ # dummy entries the operator has to delete one by one before adding
37
+ # their real team. The seed-load mechanism in employees_store.py is
38
+ # preserved for callers that want to bundle a seed list (e.g. a future
39
+ # import-from-CSV path).
40
+ _SEED_EMPLOYEES: List[Employee] = []
41
+
42
+
43
+ # Process-wide reference to the active store. ``main.py`` calls
44
+ # ``set_store(...)`` after construction. Kept ``None`` in tests / imports
45
+ # that don't go through main, in which case we fall back to the seed list.
46
+ _store: Any = None
47
+
48
+
49
+ def set_store(store: Any) -> None:
50
+ """Register the EmployeeStore the module should read from."""
51
+ global _store
52
+ _store = store
53
+ logger.info("employees: bound to store (count=%s)", store.count() if store else "n/a")
54
+
55
+
56
+ def _strip_internal(emp: dict[str, Any]) -> Employee:
57
+ """Drop SQLite-side fields the LLM doesn't need (id, created_at, etc)."""
58
+ return { # type: ignore[return-value]
59
+ "name": emp.get("name", ""),
60
+ "email": emp.get("email", ""),
61
+ "aliases": list(emp.get("aliases") or []),
62
+ "title": emp.get("title"),
63
+ }
64
+
65
+
66
+ def _normalize(s: str) -> str:
67
+ return (s or "").strip().lower()
68
+
69
+
70
+ def get_all_employees() -> List[Employee]:
71
+ """Return a snapshot of the full directory."""
72
+ if _store is not None:
73
+ try:
74
+ rows = _store.list_all()
75
+ if rows:
76
+ return [_strip_internal(r) for r in rows]
77
+ except Exception as e:
78
+ logger.warning("employees.get_all_employees: store read failed (%s); using seed", e)
79
+ return [dict(e) for e in _SEED_EMPLOYEES] # type: ignore[misc]
80
+
81
+
82
+ def lookup_employee(query: str) -> Optional[Employee]:
83
+ """Find an employee by name or alias (case-insensitive, exact-only)."""
84
+ if not (query or "").strip():
85
+ return None
86
+ if _store is not None:
87
+ try:
88
+ hit = _store.lookup(query)
89
+ if hit:
90
+ return _strip_internal(hit)
91
+ except Exception as e:
92
+ logger.warning("employees.lookup_employee: store read failed (%s); using seed", e)
93
+ q = _normalize(query)
94
+ if not q:
95
+ return None
96
+ for emp in _SEED_EMPLOYEES:
97
+ if _normalize(emp.get("name", "")) == q:
98
+ return dict(emp) # type: ignore[return-value]
99
+ for alias in emp.get("aliases", []) or []:
100
+ if _normalize(alias) == q:
101
+ return dict(emp) # type: ignore[return-value]
102
+ return None
103
+
104
+
105
+ def find_email_for(query: str) -> Optional[str]:
106
+ """Convenience: resolve a name/alias to an email, or None."""
107
+ emp = lookup_employee(query)
108
+ return emp.get("email") if emp else None
109
+
110
+
111
+ def format_for_llm() -> str:
112
+ """Render the directory as a short string the LLM can reference."""
113
+ employees = get_all_employees()
114
+ if not employees:
115
+ return "Employee directory is empty."
116
+ lines = ["Employee directory:"]
117
+ for emp in employees:
118
+ aliases = emp.get("aliases") or []
119
+ alias_str = f" (also: {', '.join(aliases)})" if aliases else ""
120
+ lines.append(f" - {emp.get('name', '?')}{alias_str}")
121
+ return "\n".join(lines)
src/reachy_mini_receptionist/employees_store.py ADDED
@@ -0,0 +1,342 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """SQLite-backed employee directory CRUD.
2
+
3
+ The dashboard's Employees panel uses this; ``employees.py`` reads from
4
+ this store too (with a fall-through to the hardcoded ``_EMPLOYEES`` list
5
+ when the store is empty, which only happens on a brand-new install
6
+ before the seed runs).
7
+
8
+ Schema matches the operator's mental model: name, email, optional title,
9
+ optional list of aliases the bot might hear (e.g. "AJ" for "Arjun
10
+ Mehta"). Name uniqueness is enforced case-insensitively so the bot can
11
+ never end up with two "Mukul" entries that route differently.
12
+
13
+ Lives next to ``visitor_log.db`` in the app's instance directory. Uses
14
+ the same WAL + per-call connection pattern.
15
+ """
16
+ from __future__ import annotations
17
+
18
+ import json
19
+ import logging
20
+ import sqlite3
21
+ import threading
22
+ from datetime import datetime
23
+ from pathlib import Path
24
+ from typing import Any, Iterable, List, Optional
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ _SCHEMA = """
30
+ CREATE TABLE IF NOT EXISTS employees (
31
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
32
+ name TEXT NOT NULL,
33
+ email TEXT NOT NULL,
34
+ title TEXT,
35
+ aliases TEXT,
36
+ created_at TEXT NOT NULL,
37
+ updated_at TEXT NOT NULL
38
+ );
39
+
40
+ CREATE UNIQUE INDEX IF NOT EXISTS idx_employees_name_lower
41
+ ON employees (LOWER(name));
42
+ """
43
+
44
+
45
+ class EmployeeExistsError(Exception):
46
+ """Raised when an employee name (case-insensitive) already exists."""
47
+
48
+
49
+ class EmployeeNotFoundError(Exception):
50
+ """Raised when a CRUD operation targets a missing employee id."""
51
+
52
+
53
+ class EmployeeStore:
54
+ """Thread-safe employee directory backed by a single SQLite file."""
55
+
56
+ def __init__(self, db_path: str | Path) -> None:
57
+ self._db_path = Path(db_path)
58
+ self._db_path.parent.mkdir(parents=True, exist_ok=True)
59
+ self._lock = threading.Lock()
60
+ self._init_schema()
61
+ logger.info("EmployeeStore initialised at %s", self._db_path)
62
+
63
+ # ------------------------------------------------------------------
64
+ # Connection / schema
65
+ # ------------------------------------------------------------------
66
+
67
+ def _connect(self) -> sqlite3.Connection:
68
+ conn = sqlite3.connect(self._db_path, check_same_thread=False, timeout=5.0)
69
+ conn.row_factory = sqlite3.Row
70
+ conn.execute("PRAGMA synchronous=NORMAL")
71
+ return conn
72
+
73
+ def _init_schema(self) -> None:
74
+ with self._lock:
75
+ conn = self._connect()
76
+ try:
77
+ conn.execute("PRAGMA journal_mode=WAL")
78
+ conn.executescript(_SCHEMA)
79
+ conn.commit()
80
+ finally:
81
+ conn.close()
82
+
83
+ # ------------------------------------------------------------------
84
+ # Row <-> dict helpers
85
+ # ------------------------------------------------------------------
86
+
87
+ @staticmethod
88
+ def _row_to_dict(row: sqlite3.Row) -> dict[str, Any]:
89
+ aliases_raw = row["aliases"] or "[]"
90
+ try:
91
+ aliases = json.loads(aliases_raw)
92
+ if not isinstance(aliases, list):
93
+ aliases = []
94
+ except Exception:
95
+ aliases = []
96
+ return {
97
+ "id": row["id"],
98
+ "name": row["name"],
99
+ "email": row["email"],
100
+ "title": row["title"],
101
+ "aliases": aliases,
102
+ "created_at": row["created_at"],
103
+ "updated_at": row["updated_at"],
104
+ }
105
+
106
+ @staticmethod
107
+ def _aliases_to_text(aliases: Optional[Iterable[str]]) -> str:
108
+ cleaned = []
109
+ seen: set[str] = set()
110
+ for a in aliases or []:
111
+ s = (a or "").strip()
112
+ if not s:
113
+ continue
114
+ key = s.lower()
115
+ if key in seen:
116
+ continue
117
+ seen.add(key)
118
+ cleaned.append(s)
119
+ return json.dumps(cleaned)
120
+
121
+ # ------------------------------------------------------------------
122
+ # Reads
123
+ # ------------------------------------------------------------------
124
+
125
+ def list_all(self) -> List[dict[str, Any]]:
126
+ with self._lock:
127
+ conn = self._connect()
128
+ try:
129
+ rows = conn.execute(
130
+ "SELECT * FROM employees ORDER BY LOWER(name)",
131
+ ).fetchall()
132
+ finally:
133
+ conn.close()
134
+ return [self._row_to_dict(r) for r in rows]
135
+
136
+ def count(self) -> int:
137
+ with self._lock:
138
+ conn = self._connect()
139
+ try:
140
+ row = conn.execute("SELECT COUNT(*) AS n FROM employees").fetchone()
141
+ finally:
142
+ conn.close()
143
+ return int(row["n"]) if row else 0
144
+
145
+ def get_by_id(self, employee_id: int) -> Optional[dict[str, Any]]:
146
+ with self._lock:
147
+ conn = self._connect()
148
+ try:
149
+ row = conn.execute(
150
+ "SELECT * FROM employees WHERE id = ?",
151
+ (int(employee_id),),
152
+ ).fetchone()
153
+ finally:
154
+ conn.close()
155
+ return self._row_to_dict(row) if row else None
156
+
157
+ def lookup(self, query: str) -> Optional[dict[str, Any]]:
158
+ """Find an employee by name OR alias (case-insensitive, exact match).
159
+
160
+ Mirrors the original ``employees.lookup_employee`` semantics — exact
161
+ match only so that "Sam" can never silently route to "Samira".
162
+ """
163
+ q = (query or "").strip().lower()
164
+ if not q:
165
+ return None
166
+ with self._lock:
167
+ conn = self._connect()
168
+ try:
169
+ rows = conn.execute(
170
+ "SELECT * FROM employees WHERE LOWER(name) = ?",
171
+ (q,),
172
+ ).fetchall()
173
+ if rows:
174
+ return self._row_to_dict(rows[0])
175
+ all_rows = conn.execute("SELECT * FROM employees").fetchall()
176
+ finally:
177
+ conn.close()
178
+ for row in all_rows:
179
+ d = self._row_to_dict(row)
180
+ for alias in d.get("aliases") or []:
181
+ if (alias or "").strip().lower() == q:
182
+ return d
183
+ return None
184
+
185
+ # ------------------------------------------------------------------
186
+ # Writes
187
+ # ------------------------------------------------------------------
188
+
189
+ def create(
190
+ self,
191
+ name: str,
192
+ email: str,
193
+ aliases: Optional[Iterable[str]] = None,
194
+ title: Optional[str] = None,
195
+ ) -> dict[str, Any]:
196
+ name = (name or "").strip()
197
+ email = (email or "").strip()
198
+ if not name:
199
+ raise ValueError("name is required")
200
+ if not email:
201
+ raise ValueError("email is required")
202
+ aliases_text = self._aliases_to_text(aliases)
203
+ now = datetime.utcnow().isoformat(timespec="seconds")
204
+ with self._lock:
205
+ conn = self._connect()
206
+ try:
207
+ try:
208
+ cur = conn.execute(
209
+ """
210
+ INSERT INTO employees (name, email, title, aliases, created_at, updated_at)
211
+ VALUES (?, ?, ?, ?, ?, ?)
212
+ """,
213
+ (name, email, (title or None), aliases_text, now, now),
214
+ )
215
+ conn.commit()
216
+ new_id = cur.lastrowid
217
+ except sqlite3.IntegrityError as e:
218
+ raise EmployeeExistsError(
219
+ f"An employee named {name!r} already exists (case-insensitive)"
220
+ ) from e
221
+ row = conn.execute(
222
+ "SELECT * FROM employees WHERE id = ?",
223
+ (new_id,),
224
+ ).fetchone()
225
+ finally:
226
+ conn.close()
227
+ logger.info("EmployeeStore.create: id=%s name=%r email=%r", new_id, name, email)
228
+ return self._row_to_dict(row)
229
+
230
+ def update(
231
+ self,
232
+ employee_id: int,
233
+ *,
234
+ name: Optional[str] = None,
235
+ email: Optional[str] = None,
236
+ aliases: Optional[Iterable[str]] = None,
237
+ title: Optional[str] = None,
238
+ ) -> Optional[dict[str, Any]]:
239
+ sets: List[str] = []
240
+ params: List[Any] = []
241
+ if name is not None:
242
+ cleaned = name.strip()
243
+ if not cleaned:
244
+ raise ValueError("name cannot be empty")
245
+ sets.append("name = ?")
246
+ params.append(cleaned)
247
+ if email is not None:
248
+ cleaned = email.strip()
249
+ if not cleaned:
250
+ raise ValueError("email cannot be empty")
251
+ sets.append("email = ?")
252
+ params.append(cleaned)
253
+ if aliases is not None:
254
+ sets.append("aliases = ?")
255
+ params.append(self._aliases_to_text(aliases))
256
+ if title is not None:
257
+ sets.append("title = ?")
258
+ params.append(title.strip() or None)
259
+ if not sets:
260
+ # Nothing to update; return current row.
261
+ return self.get_by_id(employee_id)
262
+ sets.append("updated_at = ?")
263
+ params.append(datetime.utcnow().isoformat(timespec="seconds"))
264
+ params.append(int(employee_id))
265
+ with self._lock:
266
+ conn = self._connect()
267
+ try:
268
+ try:
269
+ cur = conn.execute(
270
+ f"UPDATE employees SET {', '.join(sets)} WHERE id = ?",
271
+ params,
272
+ )
273
+ conn.commit()
274
+ if cur.rowcount == 0:
275
+ return None
276
+ except sqlite3.IntegrityError as e:
277
+ raise EmployeeExistsError(
278
+ "Another employee already uses that name (case-insensitive)"
279
+ ) from e
280
+ row = conn.execute(
281
+ "SELECT * FROM employees WHERE id = ?",
282
+ (int(employee_id),),
283
+ ).fetchone()
284
+ finally:
285
+ conn.close()
286
+ if row:
287
+ logger.info("EmployeeStore.update: id=%s -> %s", employee_id, dict(row))
288
+ return self._row_to_dict(row)
289
+ return None
290
+
291
+ def delete(self, employee_id: int) -> bool:
292
+ with self._lock:
293
+ conn = self._connect()
294
+ try:
295
+ cur = conn.execute(
296
+ "DELETE FROM employees WHERE id = ?",
297
+ (int(employee_id),),
298
+ )
299
+ conn.commit()
300
+ removed = cur.rowcount > 0
301
+ finally:
302
+ conn.close()
303
+ if removed:
304
+ logger.info("EmployeeStore.delete: id=%s", employee_id)
305
+ return removed
306
+
307
+ # ------------------------------------------------------------------
308
+ # Seeding
309
+ # ------------------------------------------------------------------
310
+
311
+ def seed_if_empty(self, employees: Iterable[dict[str, Any]]) -> int:
312
+ """Bulk-insert ``employees`` only if the table is currently empty.
313
+
314
+ Returns the number inserted. Idempotent across restarts — the
315
+ seed runs once on a brand-new install and is then a no-op.
316
+ Duplicate-name conflicts inside the seed list are skipped (the
317
+ first occurrence wins) so partial seeds don't abort the whole
318
+ batch.
319
+ """
320
+ if self.count() > 0:
321
+ return 0
322
+ inserted = 0
323
+ for emp in employees:
324
+ try:
325
+ self.create(
326
+ name=emp.get("name", ""),
327
+ email=emp.get("email", ""),
328
+ aliases=emp.get("aliases") or [],
329
+ title=emp.get("title"),
330
+ )
331
+ inserted += 1
332
+ except EmployeeExistsError:
333
+ logger.warning(
334
+ "Seed: skipping duplicate %r", emp.get("name"),
335
+ )
336
+ except Exception as e:
337
+ logger.warning(
338
+ "Seed: failed to insert %r: %s", emp.get("name"), e,
339
+ )
340
+ if inserted:
341
+ logger.info("EmployeeStore: seeded %d employee(s)", inserted)
342
+ return inserted
src/reachy_mini_receptionist/face_db.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """File-based face store for OpenCV LBPH recognition.
2
+
3
+ Design decisions:
4
+ - Stores 100×100 grayscale face crops as PNG files named after the guest.
5
+ e.g. guests/Beyonce.png, guests/Elon Musk.png
6
+ - No database required — files are the database. Easy to inspect, edit,
7
+ or delete with any file manager.
8
+ - Max 100 guests. When full, the oldest file (by mtime) is replaced (FIFO).
9
+ - Thread-safe: all writes use a threading.Lock.
10
+ - The guests/ directory lives in the app instance directory so it persists
11
+ across restarts.
12
+ """
13
+ from __future__ import annotations
14
+
15
+ import logging
16
+ import threading
17
+ from datetime import datetime
18
+ from pathlib import Path
19
+ from typing import List, Optional, Tuple
20
+
21
+ import cv2
22
+ import numpy as np
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+ MAX_GUESTS = 100
27
+
28
+
29
+ def _safe_filename(name: str) -> str:
30
+ """Sanitise a guest name so it is safe to use as a filename."""
31
+ # Replace characters that are problematic on Windows/Linux/macOS
32
+ for ch in r'\/:*?"<>|':
33
+ name = name.replace(ch, "_")
34
+ return name.strip() or "unknown"
35
+
36
+
37
+ class FaceDatabase:
38
+ """File-based face crop store (PNG per guest).
39
+
40
+ Public API is intentionally identical to the previous SQLite version so
41
+ that all callers (FaceRecognitionWorker, register_guest tool, API
42
+ endpoints) need zero changes.
43
+ """
44
+
45
+ def __init__(self, db_path: str | Path) -> None:
46
+ # Accept the old ``guests.db`` path and derive the sibling directory
47
+ # from it so the call-site in main.py doesn't need to change.
48
+ db_path = Path(db_path)
49
+ self._guests_dir = db_path.parent / "guests"
50
+ self._guests_dir.mkdir(parents=True, exist_ok=True)
51
+ self._lock = threading.Lock()
52
+ logger.info("FaceDatabase (file-based) initialised at %s", self._guests_dir)
53
+
54
+ # ------------------------------------------------------------------
55
+ # Internal helpers
56
+ # ------------------------------------------------------------------
57
+
58
+ def _path_for(self, name: str) -> Path:
59
+ return self._guests_dir / f"{_safe_filename(name)}.png"
60
+
61
+ def _all_png_files(self) -> List[Path]:
62
+ """Return all .png files sorted newest-first (by mtime)."""
63
+ files = list(self._guests_dir.glob("*.png"))
64
+ files.sort(key=lambda p: p.stat().st_mtime, reverse=True)
65
+ return files
66
+
67
+ # ------------------------------------------------------------------
68
+ # Write
69
+ # ------------------------------------------------------------------
70
+
71
+ def add_or_update_guest(self, name: str, face_crop: np.ndarray) -> None:
72
+ """Save a 100×100 grayscale face crop as ``<guests_dir>/<name>.png``.
73
+
74
+ If the directory already has MAX_GUESTS different entries and ``name``
75
+ is new, the oldest file (by mtime) is deleted first (FIFO eviction).
76
+ """
77
+ target = self._path_for(name)
78
+
79
+ with self._lock:
80
+ if not target.exists():
81
+ files = list(self._guests_dir.glob("*.png"))
82
+ if len(files) >= MAX_GUESTS:
83
+ # Evict the oldest file
84
+ oldest = min(files, key=lambda p: p.stat().st_mtime)
85
+ oldest.unlink()
86
+ logger.info("Evicted oldest guest file: %s (capacity=%d)", oldest.name, MAX_GUESTS)
87
+
88
+ ok = cv2.imwrite(str(target), face_crop)
89
+ if ok:
90
+ logger.info("Saved guest '%s' → %s", name, target)
91
+ else:
92
+ raise RuntimeError(f"cv2.imwrite failed for path: {target}")
93
+
94
+ # ------------------------------------------------------------------
95
+ # Read
96
+ # ------------------------------------------------------------------
97
+
98
+ def get_all_guests(self) -> List[dict]:
99
+ """Return all guests as dicts with keys: name, timestamp, thumbnail_url."""
100
+ with self._lock:
101
+ files = self._all_png_files()
102
+ result = []
103
+ for f in files:
104
+ mtime = f.stat().st_mtime
105
+ ts = datetime.fromtimestamp(mtime).strftime("%Y-%m-%d %H:%M:%S")
106
+ guest_name = f.stem # filename without .png extension
107
+ result.append({
108
+ "name": guest_name,
109
+ "timestamp": ts,
110
+ "thumbnail_url": f"/guest_images/{f.name}",
111
+ })
112
+ return result
113
+
114
+ def get_all_guests_with_crops(self) -> List[Tuple[str, np.ndarray]]:
115
+ """Return list of (name, face_crop) for LBPH recognizer training."""
116
+ with self._lock:
117
+ files = self._all_png_files()
118
+ result = []
119
+ for f in files:
120
+ crop = cv2.imread(str(f), cv2.IMREAD_GRAYSCALE)
121
+ if crop is not None:
122
+ result.append((f.stem, crop))
123
+ else:
124
+ logger.warning("Could not read face crop from %s — skipping", f)
125
+ return result
126
+
127
+ def count(self) -> int:
128
+ with self._lock:
129
+ return len(list(self._guests_dir.glob("*.png")))
130
+
131
+ def clear(self) -> None:
132
+ """Wipe all guest images (useful for demo reset)."""
133
+ with self._lock:
134
+ for f in self._guests_dir.glob("*.png"):
135
+ f.unlink()
136
+ logger.info("FaceDatabase cleared")
137
+
138
+ def cleanup_older_than(self, max_age_days: float) -> int:
139
+ """Delete guest PNGs whose mtime is older than ``max_age_days``.
140
+
141
+ Returns the number of files removed. Pass 0 or negative to disable
142
+ cleanup (returns immediately). Failures to remove individual files
143
+ are logged but do not raise — TTL is best-effort.
144
+ """
145
+ import time as _t
146
+
147
+ if max_age_days <= 0:
148
+ return 0
149
+ cutoff = _t.time() - (max_age_days * 86400.0)
150
+ removed = 0
151
+ with self._lock:
152
+ for f in list(self._guests_dir.glob("*.png")):
153
+ try:
154
+ if f.stat().st_mtime < cutoff:
155
+ f.unlink()
156
+ removed += 1
157
+ logger.info(
158
+ "Face TTL: removed %s (older than %.1f days)",
159
+ f.name, max_age_days,
160
+ )
161
+ except Exception as e:
162
+ logger.warning("Face TTL: could not remove %s: %s", f, e)
163
+ return removed
164
+
165
+ def delete_guest(self, name: str) -> bool:
166
+ """Delete one guest PNG by name.
167
+
168
+ Returns True if the file existed and was removed, False otherwise.
169
+ """
170
+ target = self._path_for(name)
171
+ with self._lock:
172
+ if not target.exists():
173
+ return False
174
+ target.unlink()
175
+ logger.info("Deleted guest '%s' -> %s", name, target)
176
+ return True
177
+
178
+ # ------------------------------------------------------------------
179
+ # Expose the guests directory path (needed by main.py to mount statics)
180
+ # ------------------------------------------------------------------
181
+
182
+ @property
183
+ def guests_dir(self) -> Path:
184
+ return self._guests_dir
src/reachy_mini_receptionist/face_recognition_worker.py ADDED
@@ -0,0 +1,698 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Background face recognition worker — OpenCV YuNet detection + LBPH recognition.
2
+
3
+ Design decisions:
4
+ - Uses cv2.FaceDetectorYN (YuNet) for face detection: returns a real confidence
5
+ score (0–1) per bounding box. Model (~400 KB ONNX) is downloaded to
6
+ ~/.cache/reachy_mini/ on first use.
7
+ - Uses OpenCV LBPH (Local Binary Pattern Histogram) recognizer for identification.
8
+ Same algorithm used in embedded/microcontroller face recognition.
9
+ - Runs in a daemon thread so it never blocks the audio/LLM loop.
10
+ - Processes every Nth frame to keep CPU usage low.
11
+ - Annotates frames with bounding boxes + labels for the MJPEG dashboard stream.
12
+ - All shared state is protected by threading.Lock so tools can read safely.
13
+ - Frames are pulled from CameraWorker (robot camera via SDK) rather than opening
14
+ a raw cv2.VideoCapture, so the correct camera is always used on Lite setups.
15
+
16
+ Detection quality metrics:
17
+ - Detection confidence (YuNet score 0–1): higher = more certain this is a face.
18
+ Used as a multiplier in the quality score so uncertain detections rank lower.
19
+ - Blur score (Laplacian variance): cv2.Laplacian(crop, cv2.CV_64F).var()
20
+ High value = sharp / lots of edge detail → good crop.
21
+ Low value = blurry / uniform → bad crop (head mid-motion, etc.).
22
+ Crops below _MIN_BLUR_SCORE are considered low-quality, but they are still
23
+ stored so dashboard previews and fallback checks can return "best available"
24
+ evidence instead of empty results.
25
+ - Face area (w×h pixels) is kept as a secondary tiebreaker: among comparably sharp
26
+ crops the closer/larger face is still preferred.
27
+ - Combined quality score: blur_score × log(face_area) × max(det_confidence, 0.01)
28
+
29
+ Requirements: opencv-contrib-python (pip install opencv-contrib-python)
30
+ The contrib package is a superset of opencv-python — don't install both.
31
+ """
32
+ from __future__ import annotations
33
+
34
+ import logging
35
+ import math
36
+ import pathlib
37
+ import threading
38
+ import time
39
+ import urllib.request
40
+ from collections import deque
41
+ from typing import Any, Callable, Optional
42
+
43
+ import cv2
44
+ import numpy as np
45
+
46
+ logger = logging.getLogger(__name__)
47
+
48
+ # How many frames to skip between recognition passes
49
+ _PROCESS_EVERY_N_FRAMES = 10
50
+ # LBPH confidence threshold: LOWER = stricter match (distance, not similarity).
51
+ #
52
+ # Calibration notes (single 100×100 grayscale crop per guest):
53
+ # ≤ 50 almost certainly the same person (well-lit, same angle)
54
+ # 50 - 75 plausible match (different angle/lighting OK)
55
+ # 75 - 100 weak — often false positives on similar-looking people
56
+ # 100 - 160 printed photos, strangers who happen to resemble a guest
57
+ # > 160 unrelated face / no match
58
+ #
59
+ # Default was 110 which silently mis-recognised strangers as registered
60
+ # guests (we saw a stranger scored 103 and got greeted as "Jon"). 75 is
61
+ # the right starting point: prefers "I don't recognise you, please tell
62
+ # me your name" over a wrong-greeting failure. Operators can tune via
63
+ # the FACE_LBPH_THRESHOLD env var if their lighting/angles demand it.
64
+ _DEFAULT_CONFIDENCE_THRESHOLD = 75.0
65
+
66
+
67
+ def _resolve_threshold() -> float:
68
+ """Read FACE_LBPH_THRESHOLD env var, falling back to the default."""
69
+ import os
70
+ try:
71
+ raw = os.getenv("FACE_LBPH_THRESHOLD")
72
+ if raw is None or not str(raw).strip():
73
+ return _DEFAULT_CONFIDENCE_THRESHOLD
74
+ val = float(raw)
75
+ if val <= 0:
76
+ return _DEFAULT_CONFIDENCE_THRESHOLD
77
+ return val
78
+ except Exception:
79
+ return _DEFAULT_CONFIDENCE_THRESHOLD
80
+
81
+
82
+ _CONFIDENCE_THRESHOLD = _resolve_threshold()
83
+ # Minimum Laplacian variance for a crop to be accepted into the detection window.
84
+ # Laplacian variance measures image sharpness: high = sharp, low = blurry.
85
+ # Typical values: sharp well-lit face 150–400, soft/distant face 60–120,
86
+ # head mid-motion 5–40, total blur < 5.
87
+ # Crops below this threshold are considered low quality and ranked lower.
88
+ _MIN_BLUR_SCORE = 80.0 # lower to ~50 if valid faces get rejected; raise to ~120 if blur sneaks through
89
+
90
+ # Central detection zone: fractions of frame dimensions.
91
+ # Faces whose centre falls outside this zone are ignored.
92
+ # 0.25 margin → active zone = middle 50% horizontally and 80% vertically.
93
+ _ZONE_X_MARGIN = 0.25 # 25% margin on each side → 50% wide centre zone
94
+ _ZONE_Y_MARGIN = 0.10 # 10% margin on top/bottom → 80% tall centre zone
95
+
96
+ # Rolling window duration (seconds) for selecting the best recent face crop.
97
+ _BEST_FACE_WINDOW_SECONDS = 5.0
98
+ # Minimum spread (seconds) between first and last detection in the window.
99
+ # A face must have been continuously present for this long to be returned by
100
+ # best_recent_face(). Prevents a briefly passing face from overriding the
101
+ # person who has been standing in front of the robot for several seconds.
102
+ _MIN_DWELL_SECONDS = 1.5
103
+
104
+ # Stable identity transition settings for external face context events.
105
+ # Observed identity must remain unchanged for _FACE_STATE_CONFIRM_SECONDS before
106
+ # becoming the stable state, except no-face which uses a longer grace period.
107
+ _FACE_STATE_CONFIRM_SECONDS = 1.2
108
+ _NO_FACE_CONFIRM_SECONDS = 2.5
109
+ # Slightly longer dwell for multi-person before promoting — people walking
110
+ # past should not trip MULTIPLE_PEOPLE.
111
+ _MULTIPLE_PEOPLE_CONFIRM_SECONDS = 1.5
112
+ # Minimum interval between emitted external face events.
113
+ _FACE_EVENT_COOLDOWN_SECONDS = 5.0
114
+ # Minimum number of in-zone faces to be considered "multiple".
115
+ # Set absurdly high to effectively DISABLE MULTIPLE_PEOPLE state — in the
116
+ # pilot lobby, background people / posters were tripping the state too
117
+ # easily and the bot would go silent mid-conversation. The state has no
118
+ # SPEAK_NOW cue, so once triggered the visitor's speech is ignored until
119
+ # the camera clears. For a single-receptionist deployment the largest-
120
+ # face heuristic the worker already uses is enough to pick the visitor.
121
+ _MULTIPLE_PEOPLE_THRESHOLD = 999
122
+
123
+ # YuNet (cv2.FaceDetectorYN) detection settings
124
+ _YUNET_SCORE_THRESHOLD = 0.6 # minimum per-face detection confidence (0–1)
125
+ _YUNET_NMS_THRESHOLD = 0.3 # non-max suppression overlap threshold
126
+ _YUNET_TOP_K = 5000 # max candidate detections before NMS
127
+ _YUNET_MODEL_URL = (
128
+ "https://github.com/opencv/opencv_zoo/raw/main/models/face_detection_yunet/"
129
+ "face_detection_yunet_2023mar.onnx"
130
+ )
131
+ _YUNET_CACHE_PATH = pathlib.Path.home() / ".cache" / "reachy_mini" / "face_detection_yunet_2023mar.onnx"
132
+
133
+
134
+ def _ensure_yunet_model() -> pathlib.Path:
135
+ """Download the YuNet ONNX model (~400 KB) to cache if not already present."""
136
+ if not _YUNET_CACHE_PATH.exists():
137
+ _YUNET_CACHE_PATH.parent.mkdir(parents=True, exist_ok=True)
138
+ logger.info("Downloading YuNet model → %s", _YUNET_CACHE_PATH)
139
+ urllib.request.urlretrieve(_YUNET_MODEL_URL, _YUNET_CACHE_PATH)
140
+ logger.info("YuNet model downloaded.")
141
+ return _YUNET_CACHE_PATH
142
+
143
+
144
+ # Check LBPH recognizer availability (needs opencv-contrib-python)
145
+ _LBPH_AVAILABLE = hasattr(cv2, "face") and hasattr(cv2.face, "LBPHFaceRecognizer_create")
146
+ if not _LBPH_AVAILABLE:
147
+ logger.warning(
148
+ "cv2.face.LBPHFaceRecognizer_create not found. "
149
+ "Install opencv-contrib-python: pip install opencv-contrib-python\n"
150
+ "Face recognition (identification) will be disabled; detection still works."
151
+ )
152
+
153
+
154
+ def _build_lbph_recognizer(label_crops: list[tuple[int, np.ndarray]]) -> "cv2.face.LBPHFaceRecognizer | None":
155
+ """Train and return an LBPH recognizer from a list of (label_int, gray_crop) pairs."""
156
+ if not _LBPH_AVAILABLE or not label_crops:
157
+ return None
158
+ recognizer = cv2.face.LBPHFaceRecognizer_create()
159
+ labels = np.array([lc[0] for lc in label_crops], dtype=np.int32)
160
+ crops = [lc[1] for lc in label_crops]
161
+ recognizer.train(crops, labels)
162
+ return recognizer
163
+
164
+
165
+ class FaceRecognitionWorker:
166
+ """Background thread that continuously detects and recognises faces.
167
+
168
+ Public API (thread-safe):
169
+ worker.current_name → str "Unknown" or registered name
170
+ worker.current_encoding → Optional[np.ndarray] raw face crop (grayscale)
171
+ worker.latest_annotated_jpeg → Optional[bytes] MJPEG frame
172
+ worker.confidence → float 0–100 LBPH confidence (lower = better)
173
+ worker.best_recent_face() → (name, lbph_conf, crop) from 5-second window
174
+ """
175
+
176
+ def __init__(
177
+ self,
178
+ face_db, # FaceDatabase instance
179
+ camera_worker=None, # CameraWorker instance (pulls robot frames via SDK)
180
+ process_every_n: int = _PROCESS_EVERY_N_FRAMES,
181
+ confidence_threshold: float = _CONFIDENCE_THRESHOLD,
182
+ ) -> None:
183
+ self._face_db = face_db
184
+ self._camera_worker = camera_worker
185
+ self._process_every_n = process_every_n
186
+ self._confidence_threshold = confidence_threshold
187
+
188
+ # Shared state (read by tools and dashboard)
189
+ self._lock = threading.Lock()
190
+ self._current_name: str = "Unknown"
191
+ self._current_encoding: Optional[np.ndarray] = None # grayscale face crop
192
+ self._latest_annotated_jpeg: Optional[bytes] = None
193
+ self._confidence: float = 0.0
194
+
195
+ # Stable face state for context event emission.
196
+ # state in {"no_face", "unknown", "known"}
197
+ now = time.monotonic()
198
+ self._stable_state: str = "no_face"
199
+ self._stable_name: str = "Unknown"
200
+ self._stable_since: float = now
201
+ self._candidate_state: str = "no_face"
202
+ self._candidate_name: str = "Unknown"
203
+ self._candidate_since: float = now
204
+ self._last_event_sent_at: float = 0.0
205
+ self._face_event_callback: Optional[Callable[[dict[str, Any]], None]] = None
206
+
207
+ # Rolling 5-second detection window: each entry is
208
+ # (timestamp: float, face_area: int, blur_score: float, crop: np.ndarray, det_confidence: float)
209
+ # face_area = w * h in pixels — larger = more prominent / closer face
210
+ # blur_score = Laplacian variance — higher = sharper crop
211
+ # det_confidence = YuNet score 0–1 — higher = more confident detection
212
+ # Entries with blur_score < _MIN_BLUR_SCORE are still added, but rank lower.
213
+ self._detection_window: deque = deque()
214
+
215
+ # Log buffer for dashboard debug panel (ring buffer, max 200 lines)
216
+ self._log_buffer: list[str] = []
217
+ self._log_lock = threading.Lock()
218
+
219
+ # LBPH recognizer — rebuilt whenever guests are added/changed
220
+ self._recognizer_lock = threading.Lock()
221
+ self._recognizer: Optional[object] = None # cv2.face.LBPHFaceRecognizer
222
+ self._label_map: dict[int, str] = {} # int label → guest name
223
+
224
+ self._stop_event = threading.Event()
225
+ self._thread: Optional[threading.Thread] = None
226
+
227
+ # YuNet face detector — initialized in start()
228
+ self._detector: Optional[object] = None # cv2.FaceDetectorYN
229
+ self._detector_input_size: tuple = (0, 0)
230
+
231
+ # ------------------------------------------------------------------
232
+ # Public read properties (thread-safe)
233
+ # ------------------------------------------------------------------
234
+
235
+ @property
236
+ def current_name(self) -> str:
237
+ with self._lock:
238
+ return self._current_name
239
+
240
+ @property
241
+ def current_encoding(self) -> Optional[np.ndarray]:
242
+ """Returns the latest detected face crop (grayscale numpy array) or None."""
243
+ with self._lock:
244
+ return self._current_encoding.copy() if self._current_encoding is not None else None
245
+
246
+ @property
247
+ def latest_annotated_jpeg(self) -> Optional[bytes]:
248
+ with self._lock:
249
+ return self._latest_annotated_jpeg
250
+
251
+ @property
252
+ def confidence(self) -> float:
253
+ with self._lock:
254
+ return self._confidence
255
+
256
+ def get_recent_logs(self, n: int = 50) -> list[str]:
257
+ with self._log_lock:
258
+ return list(self._log_buffer[-n:])
259
+
260
+ def set_face_event_callback(self, callback: Optional[Callable[[dict[str, Any]], None]]) -> None:
261
+ """Register a callback for stable face-state transition events."""
262
+ with self._lock:
263
+ self._face_event_callback = callback
264
+
265
+ def best_recent_face(
266
+ self,
267
+ window_seconds: float = _BEST_FACE_WINDOW_SECONDS,
268
+ require_dwell: bool = True,
269
+ ) -> tuple[str, float, Optional[np.ndarray]]:
270
+ """Return the best face seen in the last `window_seconds` seconds.
271
+
272
+ Selection strategy:
273
+ - Among all entries in the rolling detection window (entries with
274
+ blur_score < _MIN_BLUR_SCORE are never stored), pick the one with the
275
+ highest combined quality score: blur_score * log(face_area)
276
+ This ranks sharpness first while using face area as a tiebreaker so
277
+ a large sharp face beats both a tiny sharp face and a large blurry one.
278
+ - Run LBPH recognition on that crop and return the result.
279
+ - If the window is empty (all recent crops were too blurry, or no face
280
+ seen) → return ("Unknown", 0.0, None).
281
+
282
+ This is intentionally called at tool-invocation time (not in the
283
+ background loop) so the LLM always queries the best available evidence
284
+ rather than a momentary snapshot.
285
+
286
+ Returns:
287
+ (name, lbph_confidence, crop)
288
+ name — registered guest name or "Unknown"
289
+ lbph_confidence — LBPH distance (lower = more certain match; 0 if no guests)
290
+ crop — 100×100 grayscale numpy array, or None if window is empty
291
+ """
292
+ now = time.monotonic()
293
+ with self._lock:
294
+ # Prune stale entries
295
+ while self._detection_window and (now - self._detection_window[0][0]) > window_seconds:
296
+ self._detection_window.popleft()
297
+
298
+ if not self._detection_window:
299
+ return "Unknown", 0.0, None
300
+
301
+ # Optional dwell guard: for tool-calls we prefer stable evidence;
302
+ # for dashboard preview we may choose best available immediately.
303
+ if require_dwell:
304
+ timestamps = [e[0] for e in self._detection_window]
305
+ if max(timestamps) - min(timestamps) < _MIN_DWELL_SECONDS:
306
+ return "Unknown", 0.0, None
307
+
308
+ # Pick the entry with the best combined quality: sharpness × log(area)
309
+ best = max(self._detection_window, key=lambda e: e[2] * math.log(max(e[1], 1)) * max(e[4], 0.01))
310
+ _ts, face_area, blur_score, crop, det_conf = best
311
+ crop_copy = crop.copy()
312
+
313
+ # Run LBPH recognition outside the lock (can be slow)
314
+ name, conf = self._recognize(crop_copy)
315
+ logger.debug(
316
+ "best_recent_face: face_area=%d blur=%.1f det_conf=%.2f name=%s lbph_conf=%.1f",
317
+ face_area, blur_score, det_conf, name, conf,
318
+ )
319
+ self._add_log(f"check_current_face → {name} (area={face_area}px², blur={blur_score:.1f}, det={det_conf:.2f}, lbph={conf:.1f})")
320
+ return name, conf, crop_copy
321
+
322
+ # ------------------------------------------------------------------
323
+ # Recognizer rebuild (called by register_guest tool after adding a guest)
324
+ # ------------------------------------------------------------------
325
+
326
+ def rebuild_recognizer(self) -> None:
327
+ """Rebuild the LBPH recognizer from the current guest database.
328
+
329
+ Call this after registering or updating a guest so the worker
330
+ immediately starts recognising the new face.
331
+ """
332
+ if not _LBPH_AVAILABLE:
333
+ return
334
+ guests = self._face_db.get_all_guests_with_crops()
335
+ if not guests:
336
+ with self._recognizer_lock:
337
+ self._recognizer = None
338
+ self._label_map = {}
339
+ self._add_log("Recognizer cleared (no guests)")
340
+ return
341
+
342
+ label_map: dict[int, str] = {}
343
+ label_crops: list[tuple[int, np.ndarray]] = []
344
+ for idx, (name, crop) in enumerate(guests):
345
+ label_map[idx] = name
346
+ label_crops.append((idx, crop))
347
+
348
+ recognizer = _build_lbph_recognizer(label_crops)
349
+ with self._recognizer_lock:
350
+ self._recognizer = recognizer
351
+ self._label_map = label_map
352
+ self._add_log(f"Recognizer rebuilt ({len(label_map)} guest(s))")
353
+ logger.info("LBPH recognizer rebuilt with %d guest(s)", len(label_map))
354
+
355
+ # ------------------------------------------------------------------
356
+ # Lifecycle
357
+ # ------------------------------------------------------------------
358
+
359
+ def start(self) -> None:
360
+ # Initialize YuNet detector (downloads model on first run)
361
+ try:
362
+ model_path = _ensure_yunet_model()
363
+ self._detector = cv2.FaceDetectorYN.create(
364
+ model=str(model_path),
365
+ config="",
366
+ input_size=(640, 480),
367
+ score_threshold=_YUNET_SCORE_THRESHOLD,
368
+ nms_threshold=_YUNET_NMS_THRESHOLD,
369
+ top_k=_YUNET_TOP_K,
370
+ )
371
+ self._detector_input_size = (640, 480)
372
+ logger.info("YuNet face detector initialized")
373
+ self._add_log("YuNet face detector initialized ✓")
374
+ except Exception as e:
375
+ logger.error("Failed to initialize YuNet detector: %s", e)
376
+ self._add_log(f"WARNING: YuNet detector failed to initialize: {e}")
377
+
378
+ # Build recognizer from existing DB on startup
379
+ self.rebuild_recognizer()
380
+ self._stop_event.clear()
381
+ self._thread = threading.Thread(target=self._run, daemon=True, name="face-recognition-worker")
382
+ self._thread.start()
383
+ logger.info("FaceRecognitionWorker started (camera_worker=%s)", self._camera_worker)
384
+
385
+ def stop(self) -> None:
386
+ self._stop_event.set()
387
+ if self._thread:
388
+ self._thread.join(timeout=3.0)
389
+ logger.info("FaceRecognitionWorker stopped")
390
+
391
+ # ------------------------------------------------------------------
392
+ # Internal helpers
393
+ # ------------------------------------------------------------------
394
+
395
+ def _add_log(self, msg: str) -> None:
396
+ ts = time.strftime("%H:%M:%S")
397
+ entry = f"[{ts}] {msg}"
398
+ with self._log_lock:
399
+ self._log_buffer.append(entry)
400
+ if len(self._log_buffer) > 200:
401
+ self._log_buffer = self._log_buffer[-200:]
402
+
403
+ def _emit_face_state_event(self, event: dict[str, Any]) -> None:
404
+ """Best-effort callback dispatch for external face context events."""
405
+ callback = None
406
+ with self._lock:
407
+ callback = self._face_event_callback
408
+ if callback is None:
409
+ return
410
+ try:
411
+ callback(event)
412
+ except Exception as e:
413
+ logger.warning("Failed to dispatch face state event: %s", e)
414
+
415
+ def _update_stable_state(
416
+ self,
417
+ observed_state: str,
418
+ observed_name: str,
419
+ lbph_confidence: float,
420
+ det_confidence: float,
421
+ ) -> None:
422
+ """Promote observed state into stable state with dwell and cooldown guards."""
423
+ now = time.monotonic()
424
+
425
+ with self._lock:
426
+ # Stage candidate transitions first.
427
+ candidate_changed = (
428
+ observed_state != self._candidate_state
429
+ or (observed_state == "known" and observed_name != self._candidate_name)
430
+ )
431
+ if candidate_changed:
432
+ self._candidate_state = observed_state
433
+ self._candidate_name = observed_name
434
+ self._candidate_since = now
435
+ return
436
+
437
+ if observed_state == "no_face":
438
+ required = _NO_FACE_CONFIRM_SECONDS
439
+ elif observed_state == "multiple":
440
+ required = _MULTIPLE_PEOPLE_CONFIRM_SECONDS
441
+ else:
442
+ required = _FACE_STATE_CONFIRM_SECONDS
443
+ if (now - self._candidate_since) < required:
444
+ return
445
+
446
+ previous_state = self._stable_state
447
+ previous_name = self._stable_name
448
+ stable_changed = (
449
+ observed_state != previous_state
450
+ or (observed_state == "known" and observed_name != previous_name)
451
+ )
452
+ if not stable_changed:
453
+ return
454
+
455
+ self._stable_state = observed_state
456
+ self._stable_name = observed_name
457
+ self._stable_since = now
458
+
459
+ # Public current_* values follow the stable state, not instantaneous observations.
460
+ self._current_name = observed_name if observed_state == "known" else "Unknown"
461
+ self._confidence = 0.0 if observed_state == "no_face" else float(lbph_confidence)
462
+
463
+ can_emit_event = (now - self._last_event_sent_at) >= _FACE_EVENT_COOLDOWN_SECONDS
464
+ if can_emit_event:
465
+ self._last_event_sent_at = now
466
+
467
+ self._add_log(
468
+ "Stable face: %s(%s) -> %s(%s)"
469
+ % (previous_state, previous_name, observed_state, observed_name)
470
+ )
471
+
472
+ if not can_emit_event:
473
+ self._add_log("Face context event skipped (cooldown)")
474
+ return
475
+
476
+ event_payload = {
477
+ "event": "face_state_changed",
478
+ "state": observed_state,
479
+ "name": observed_name if observed_state == "known" else None,
480
+ "previous_state": previous_state,
481
+ "previous_name": previous_name if previous_state == "known" else None,
482
+ "lbph_confidence": round(float(lbph_confidence), 2),
483
+ "detection_confidence": round(float(det_confidence), 3),
484
+ "timestamp": time.time(),
485
+ }
486
+ self._emit_face_state_event(event_payload)
487
+
488
+ def _detect_faces(self, frame: np.ndarray) -> list[tuple[int, int, int, int, int, float]]:
489
+ """Return list of (x, y, w, h, area, det_confidence) sorted by area descending.
490
+
491
+ Uses YuNet (cv2.FaceDetectorYN) which returns a real confidence score (0–1)
492
+ per bounding box. The detector input size is updated dynamically per frame.
493
+ """
494
+ if self._detector is None:
495
+ return []
496
+
497
+ h, w = frame.shape[:2]
498
+ if (w, h) != self._detector_input_size:
499
+ self._detector.setInputSize((w, h))
500
+ self._detector_input_size = (w, h)
501
+
502
+ _, faces = self._detector.detect(frame)
503
+ if faces is None or len(faces) == 0:
504
+ return []
505
+
506
+ results = [
507
+ (int(f[0]), int(f[1]), int(f[2]), int(f[3]), int(f[2] * f[3]), float(f[14]))
508
+ for f in faces
509
+ ]
510
+ results.sort(key=lambda r: r[4], reverse=True)
511
+ return results
512
+
513
+ def _recognize(self, gray_crop: np.ndarray) -> tuple[str, float]:
514
+ """Return (name, confidence) for a face crop. confidence is LBPH distance (lower = better match)."""
515
+ with self._recognizer_lock:
516
+ recognizer = self._recognizer
517
+ label_map = self._label_map
518
+
519
+ if recognizer is None or not label_map:
520
+ return "Unknown", 0.0
521
+
522
+ try:
523
+ resized = cv2.resize(gray_crop, (100, 100))
524
+ label_int, confidence = recognizer.predict(resized)
525
+ name = label_map.get(label_int, "Unknown")
526
+ if confidence <= self._confidence_threshold:
527
+ return name, float(confidence)
528
+ else:
529
+ return "Unknown", float(confidence)
530
+ except Exception as e:
531
+ logger.debug("LBPH predict error: %s", e)
532
+ return "Unknown", 0.0
533
+
534
+ # ------------------------------------------------------------------
535
+ # Main loop
536
+ # ------------------------------------------------------------------
537
+
538
+ def _run(self) -> None:
539
+ if self._camera_worker is None:
540
+ logger.warning("FaceRecognitionWorker: no camera_worker provided — face recognition disabled")
541
+ self._add_log("WARNING: No camera_worker — face recognition disabled")
542
+ return
543
+
544
+ self._add_log("Camera worker attached ✓ (OpenCV Haar + LBPH, robot camera)")
545
+
546
+ frame_count = 0
547
+ last_face_rects: list[tuple[int, int, int, int]] = [] # (x, y, w, h)
548
+ last_face_labels: list[str] = []
549
+
550
+ try:
551
+ while not self._stop_event.is_set():
552
+ frame = self._camera_worker.get_latest_frame()
553
+ if frame is None:
554
+ time.sleep(0.05)
555
+ continue
556
+
557
+ frame_count += 1
558
+ do_recognition = frame_count % self._process_every_n == 0
559
+
560
+ if do_recognition:
561
+ rects = self._detect_faces(frame) # sorted by area descending; (x,y,w,h,area,det_conf)
562
+
563
+ # Filter to faces whose centre falls inside the central detection zone
564
+ fh_full, fw_full = frame.shape[:2]
565
+ _zx1 = fw_full * _ZONE_X_MARGIN
566
+ _zx2 = fw_full * (1 - _ZONE_X_MARGIN)
567
+ _zy1 = fh_full * _ZONE_Y_MARGIN
568
+ _zy2 = fh_full * (1 - _ZONE_Y_MARGIN)
569
+ rects = [
570
+ (x, y, w, h, area, det_conf)
571
+ for x, y, w, h, area, det_conf in rects
572
+ if _zx1 <= (x + w / 2) <= _zx2 and _zy1 <= (y + h / 2) <= _zy2
573
+ ]
574
+
575
+ # Strip area/conf for annotation (keep (x,y,w,h) tuples)
576
+ last_face_rects = [(x, y, w, h) for x, y, w, h, _area, _conf in rects]
577
+ last_face_labels = []
578
+
579
+ multiple_in_zone = len(rects) >= _MULTIPLE_PEOPLE_THRESHOLD
580
+ if multiple_in_zone:
581
+ # Multiple people in the central zone — defer
582
+ # single-person identification until one comes forward.
583
+ self._update_stable_state("multiple", "Unknown", 0.0, 0.0)
584
+
585
+ if rects:
586
+ # Best face = largest area (index 0 after sort)
587
+ px, py, pw, ph, face_area, det_conf = rects[0]
588
+ gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
589
+ fh, fw = gray.shape[:2]
590
+ # Clamp bbox to frame bounds (YuNet can return coords outside the frame)
591
+ x1 = max(0, px)
592
+ y1 = max(0, py)
593
+ x2 = min(fw, px + pw)
594
+ y2 = min(fh, py + ph)
595
+ crop = gray[y1:y2, x1:x2]
596
+ if crop.size == 0:
597
+ last_face_labels.append("Face (bad crop)")
598
+ continue
599
+ resized_crop = cv2.resize(crop, (100, 100))
600
+
601
+ # Compute sharpness via Laplacian variance (higher = sharper)
602
+ blur_score = cv2.Laplacian(resized_crop, cv2.CV_64F).var()
603
+
604
+ # Add every valid crop to the detection window; low blur crops
605
+ # are naturally ranked lower by the quality score.
606
+ now = time.monotonic()
607
+ with self._lock:
608
+ self._detection_window.append((now, face_area, blur_score, resized_crop.copy(), det_conf))
609
+ # Prune entries older than the configured window
610
+ while self._detection_window and (now - self._detection_window[0][0]) > _BEST_FACE_WINDOW_SECONDS:
611
+ self._detection_window.popleft()
612
+ # Keep current_encoding pointing to the best-quality crop in the window
613
+ best_entry = max(self._detection_window, key=lambda e: e[2] * math.log(max(e[1], 1)) * max(e[4], 0.01))
614
+ self._current_encoding = best_entry[3].copy()
615
+
616
+ if blur_score < _MIN_BLUR_SCORE:
617
+ logger.debug("Face crop rejected: blur_score=%.1f < threshold=%.1f", blur_score, _MIN_BLUR_SCORE)
618
+
619
+ # Run LBPH recognition on the current crop for live display
620
+ name, conf = self._recognize(resized_crop)
621
+
622
+ # When 2+ faces are in the zone we already emitted
623
+ # "multiple"; skip the per-person state emission so
624
+ # we don't oscillate.
625
+ if not multiple_in_zone:
626
+ observed_state = "known" if name != "Unknown" else "unknown"
627
+ observed_name = name if observed_state == "known" else "Unknown"
628
+ self._update_stable_state(observed_state, observed_name, conf, det_conf)
629
+
630
+ # Build labels for all detected faces
631
+ for i, (x, y, w, h, area, conf_i) in enumerate(rects):
632
+ if i == 0:
633
+ det_tag = f"det={det_conf:.2f}"
634
+ if name != "Unknown":
635
+ last_face_labels.append(f"Known: {name} (lbph={conf:.0f}) {det_tag}")
636
+ else:
637
+ last_face_labels.append(f"Unknown (lbph={conf:.0f}) {det_tag}")
638
+ else:
639
+ last_face_labels.append(f"Face (det={conf_i:.2f})")
640
+ else:
641
+ # Prune the window even when no face detected
642
+ now = time.monotonic()
643
+ with self._lock:
644
+ while self._detection_window and (now - self._detection_window[0][0]) > _BEST_FACE_WINDOW_SECONDS:
645
+ self._detection_window.popleft()
646
+ if not self._detection_window:
647
+ self._current_encoding = None
648
+
649
+ self._update_stable_state("no_face", "No face", 0.0, 0.0)
650
+
651
+ # Annotate frame and encode as JPEG
652
+ annotated = self._annotate_frame(frame, last_face_rects, last_face_labels)
653
+ _, jpeg = cv2.imencode(".jpg", annotated, [cv2.IMWRITE_JPEG_QUALITY, 70])
654
+ with self._lock:
655
+ self._latest_annotated_jpeg = jpeg.tobytes()
656
+
657
+ except Exception as e:
658
+ logger.exception("FaceRecognitionWorker crashed: %s", e)
659
+ self._add_log(f"CRASH: {e}")
660
+ finally:
661
+ logger.info("FaceRecognitionWorker: stopped")
662
+
663
+ def _annotate_frame(
664
+ self,
665
+ frame: np.ndarray,
666
+ face_rects: list[tuple[int, int, int, int]],
667
+ labels: list[str],
668
+ ) -> np.ndarray:
669
+ """Draw bounding boxes and labels on the frame."""
670
+ out = frame.copy()
671
+
672
+ # Draw the active central detection zone
673
+ fh, fw = out.shape[:2]
674
+ zone_x1 = int(fw * _ZONE_X_MARGIN)
675
+ zone_y1 = int(fh * _ZONE_Y_MARGIN)
676
+ zone_x2 = int(fw * (1 - _ZONE_X_MARGIN))
677
+ zone_y2 = int(fh * (1 - _ZONE_Y_MARGIN))
678
+ cv2.rectangle(out, (zone_x1, zone_y1), (zone_x2, zone_y2), (0, 210, 210), 2)
679
+ cv2.putText(out, "Detection Zone", (zone_x1 + 4, zone_y1 + 18),
680
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 210, 210), 1)
681
+
682
+ for i, (x, y, w, h) in enumerate(face_rects):
683
+ label = labels[i] if i < len(labels) else "Face"
684
+ is_known = label.startswith("Known:")
685
+ color = (0, 200, 0) if is_known else (0, 100, 255) # green / orange
686
+
687
+ cv2.rectangle(out, (x, y), (x + w, y + h), color, 2)
688
+ cv2.rectangle(out, (x, y + h - 28), (x + w, y + h), color, cv2.FILLED)
689
+ cv2.putText(
690
+ out,
691
+ label,
692
+ (x + 4, y + h - 8),
693
+ cv2.FONT_HERSHEY_SIMPLEX,
694
+ 0.5,
695
+ (255, 255, 255),
696
+ 1,
697
+ )
698
+ return out
src/reachy_mini_receptionist/gemini_live.py ADDED
@@ -0,0 +1,754 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Gemini Live API handler — drop-in replacement for OpenaiRealtimeHandler.
2
+
3
+ Same public surface as ``openai_realtime.OpenaiRealtimeHandler`` so the
4
+ rest of the app (main.py, console.py, headless_personality_ui.py) can
5
+ switch backends with one env var (``VOICE_BACKEND=gemini``) without
6
+ code changes.
7
+
8
+ Audio I/O is bidirectional PCM via Gemini's Live websocket. Tool
9
+ calling, VAD, voice synthesis all delegated to Gemini.
10
+
11
+ This module imports ``google.genai`` LAZILY inside ``start_up`` so the
12
+ absence of the SDK does not break OpenAI-backend installs.
13
+ """
14
+ from __future__ import annotations
15
+
16
+ import asyncio
17
+ import base64
18
+ import json
19
+ import logging
20
+ import os
21
+ import threading
22
+ import time
23
+ import uuid
24
+ from datetime import datetime
25
+ from typing import Any, Final, Literal, Optional, Tuple
26
+
27
+ import numpy as np
28
+ from fastrtc import AdditionalOutputs, AsyncStreamHandler, wait_for_item, audio_to_int16
29
+ from numpy.typing import NDArray
30
+ from scipy.signal import resample
31
+
32
+ from reachy_mini_receptionist.config import config
33
+ from reachy_mini_receptionist.prompts import get_session_voice, get_session_instructions
34
+ from reachy_mini_receptionist.tools.core_tools import (
35
+ ToolDependencies,
36
+ get_tool_specs,
37
+ )
38
+ from reachy_mini_receptionist.tools.background_tool_manager import (
39
+ ToolCallRoutine,
40
+ ToolNotification,
41
+ BackgroundToolManager,
42
+ )
43
+
44
+ logger = logging.getLogger(__name__)
45
+
46
+ # Gemini Live expects 16 kHz PCM16 mono input; outputs 24 kHz PCM16.
47
+ GEMINI_INPUT_SAMPLE_RATE: Final[int] = 16000
48
+ GEMINI_OUTPUT_SAMPLE_RATE: Final[int] = 24000
49
+
50
+
51
+ def _gemini_model_name() -> str:
52
+ """Resolve the Gemini Live model id (env-overridable).
53
+
54
+ Default is ``gemini-2.5-flash-native-audio-latest`` — confirmed
55
+ bidiGenerateContent-capable on the project's API key (queried
56
+ 2026-05-20 against models.list endpoint). Set GEMINI_LIVE_MODEL
57
+ in .env to switch to e.g. gemini-2.5-flash-native-audio-preview-09-2025
58
+ or whatever else your project has allowlisted.
59
+ """
60
+ return (os.getenv("GEMINI_LIVE_MODEL") or "gemini-2.5-flash-native-audio-latest").strip()
61
+
62
+
63
+ def _openai_tools_to_gemini(openai_tools: list[dict]) -> list[dict]:
64
+ """Translate OpenAI function-tool specs to Gemini function declarations.
65
+
66
+ OpenAI format: {"type": "function", "name": "...", "description": "...",
67
+ "parameters": {"type": "object", ...}}
68
+ Gemini format: {"function_declarations": [
69
+ {"name": "...", "description": "...",
70
+ "parameters": {"type": "OBJECT", ...}}, ...]}
71
+ """
72
+ declarations: list[dict] = []
73
+ for tool in openai_tools or []:
74
+ if tool.get("type") != "function":
75
+ continue
76
+ name = tool.get("name")
77
+ if not name:
78
+ continue
79
+ decl = {
80
+ "name": name,
81
+ "description": tool.get("description", ""),
82
+ }
83
+ params = tool.get("parameters")
84
+ if params:
85
+ decl["parameters"] = params
86
+ declarations.append(decl)
87
+ return [{"function_declarations": declarations}] if declarations else []
88
+
89
+
90
+ class GeminiLiveHandler(AsyncStreamHandler):
91
+ """Gemini Live API handler — mirror of OpenaiRealtimeHandler.
92
+
93
+ Same public surface as the OpenAI handler so the rest of the app
94
+ can switch backends via ``VOICE_BACKEND=gemini`` without code
95
+ changes elsewhere.
96
+ """
97
+
98
+ def __init__(
99
+ self,
100
+ deps: ToolDependencies,
101
+ gradio_mode: bool = False,
102
+ instance_path: Optional[str] = None,
103
+ session_manager: Any | None = None,
104
+ controller: Any | None = None,
105
+ ):
106
+ super().__init__(
107
+ expected_layout="mono",
108
+ output_sample_rate=GEMINI_OUTPUT_SAMPLE_RATE,
109
+ input_sample_rate=GEMINI_INPUT_SAMPLE_RATE,
110
+ )
111
+ self.deps = deps
112
+ self.gradio_mode = gradio_mode
113
+ self.instance_path = instance_path
114
+ self._session_manager = session_manager
115
+ self._controller = controller
116
+
117
+ self.output_queue: "asyncio.Queue[Tuple[int, NDArray[np.int16]] | AdditionalOutputs]" = asyncio.Queue()
118
+
119
+ self.session: Any = None # google.genai live session, set in start_up
120
+ self._client: Any = None
121
+ self._runtime_loop: asyncio.AbstractEventLoop | None = None
122
+ self._shutdown_requested: bool = False
123
+
124
+ # Cumulative cost tracker (Gemini doesn't expose token costs the same way)
125
+ self.cumulative_cost: float = 0.0
126
+ self.start_time = asyncio.get_event_loop().time()
127
+ self.last_activity_time = self.start_time
128
+
129
+ # Background tool manager (same as OpenAI handler)
130
+ self.tool_manager = BackgroundToolManager()
131
+
132
+ # Tool-call args stash keyed by call_id, so the controller can see
133
+ # both args + result when the tool completes.
134
+ self._tool_call_args: dict[str, dict[str, Any]] = {}
135
+
136
+ # Last face event + session event sent to model (for /api endpoints)
137
+ self._face_event_lock = threading.Lock()
138
+ self._last_face_event_sent: dict[str, Any] | None = None
139
+ self._session_event_lock = threading.Lock()
140
+ self._last_session_event_sent: dict[str, Any] | None = None
141
+
142
+ # Pending events waiting for session to be ready
143
+ self._pending_face_event_lock = threading.Lock()
144
+ self._pending_face_event: dict[str, Any] | None = None
145
+ self._pending_session_event_lock = threading.Lock()
146
+ self._pending_session_event: dict[str, Any] | None = None
147
+
148
+ # idle-speech cue dedupe
149
+ self._idle_speech_cue_pushed: bool = False
150
+
151
+ # ------------------------------------------------------------------
152
+ # AsyncStreamHandler required methods
153
+ # ------------------------------------------------------------------
154
+
155
+ def copy(self) -> "GeminiLiveHandler":
156
+ return GeminiLiveHandler(
157
+ self.deps,
158
+ self.gradio_mode,
159
+ self.instance_path,
160
+ session_manager=self._session_manager,
161
+ controller=self._controller,
162
+ )
163
+
164
+ async def start_up(self) -> None:
165
+ """Connect to Gemini Live and run the event loop until shutdown."""
166
+ self._runtime_loop = asyncio.get_running_loop()
167
+
168
+ # Lazy import — only required when this backend is active.
169
+ try:
170
+ from google import genai
171
+ from google.genai import types as genai_types
172
+ except ImportError as e:
173
+ logger.error(
174
+ "google-genai SDK not installed. Install with: "
175
+ "pip install google-genai. Error: %s", e,
176
+ )
177
+ return
178
+
179
+ api_key = (os.getenv("GEMINI_API_KEY") or "").strip()
180
+ if not api_key:
181
+ logger.error("GEMINI_API_KEY not set — cannot start Gemini Live backend")
182
+ return
183
+
184
+ self._client = genai.Client(api_key=api_key, http_options={"api_version": "v1beta"})
185
+ model_id = _gemini_model_name()
186
+ logger.info("Gemini Live backend starting with model=%s", model_id)
187
+
188
+ # Build session config — instructions, voice, tools, VAD-equivalent
189
+ instructions = get_session_instructions()
190
+ voice = get_session_voice()
191
+ openai_tools = get_tool_specs() # type: ignore[no-untyped-call]
192
+ gemini_tools = _openai_tools_to_gemini(openai_tools)
193
+
194
+ # Minimal config — back to known-good shape after the
195
+ # realtime_input_config attempt caused 1011 internal server
196
+ # errors. Will revisit low-latency VAD tuning via SDK-specific
197
+ # types once we confirm the exact accepted shape.
198
+ config_obj: dict[str, Any] = {
199
+ "response_modalities": ["AUDIO"],
200
+ "system_instruction": instructions,
201
+ }
202
+
203
+ # Half-cascade Live models (model name contains "-live-" but
204
+ # NOT "native-audio") require an explicit speech_config /
205
+ # voice_config to actually emit audio. Without it they degrade
206
+ # to 2-byte placeholder chunks and `resp.text`-only responses
207
+ # (observed 2026-05-21 with gemini-3.1-flash-live-preview).
208
+ # Native-audio models auto-select voice and REJECT voice_config
209
+ # with 1007 "Cannot extract voices from a non-audio request",
210
+ # so we conditionally apply this only when the model name says
211
+ # we should. Voice name override via env var.
212
+ model_lower = model_id.lower()
213
+ is_half_cascade = ("-live-" in model_lower or model_lower.endswith("-live-preview")) \
214
+ and "native-audio" not in model_lower
215
+ if is_half_cascade:
216
+ voice_name = (os.getenv("GEMINI_LIVE_VOICE") or "Puck").strip() or "Puck"
217
+ config_obj["speech_config"] = {
218
+ "voice_config": {
219
+ "prebuilt_voice_config": {"voice_name": voice_name},
220
+ },
221
+ }
222
+ # Half-cascade also benefits from explicit input/output
223
+ # transcription so we can log what the model heard / said
224
+ # for debugging. Empty dict = enable with defaults.
225
+ config_obj["input_audio_transcription"] = {}
226
+ config_obj["output_audio_transcription"] = {}
227
+ logger.info(
228
+ "Gemini Live: half-cascade model detected — adding "
229
+ "speech_config (voice=%s) + transcription", voice_name,
230
+ )
231
+
232
+ if gemini_tools:
233
+ config_obj["tools"] = gemini_tools
234
+
235
+ try:
236
+ async with self._client.aio.live.connect(model=model_id, config=config_obj) as session:
237
+ self.session = session
238
+ logger.info("Gemini Live connected.")
239
+ # Start background tool manager (same callback signature)
240
+ self.tool_manager.start_up(tool_callbacks=[self._handle_tool_result])
241
+
242
+ # Kick off conversation with a one-shot greeting prompt.
243
+ # With the tighter VAD config (400ms silence) we could
244
+ # in theory wait for visitor speech, but a proactive
245
+ # greeting keeps the demo natural — the bot says hi
246
+ # the moment a face is detected. Server VAD handles
247
+ # all subsequent visitor turns with low latency.
248
+ try:
249
+ await session.send_client_content(
250
+ turns=[{
251
+ "role": "user",
252
+ "parts": [{
253
+ "text": (
254
+ "(Visitor just walked up. Greet them "
255
+ "very briefly in ONE short friendly "
256
+ "sentence and ask their name or who "
257
+ "they're here to see. Keep it under "
258
+ "8 words.)"
259
+ ),
260
+ }],
261
+ }],
262
+ turn_complete=True,
263
+ )
264
+ logger.info("Gemini Live: sent kick-off greeting prompt")
265
+ except Exception as e:
266
+ logger.warning("Gemini Live: kick-off send failed: %s", e)
267
+
268
+ try:
269
+ await self._run_event_loop()
270
+ except Exception as inner:
271
+ logger.exception("Gemini Live event loop crashed: %s", inner)
272
+ finally:
273
+ await self.tool_manager.shutdown()
274
+ self.session = None
275
+ logger.info("Gemini Live: session ended cleanly")
276
+ except Exception as e:
277
+ logger.exception("Gemini Live session failed: %s", e)
278
+ self.session = None
279
+
280
+ async def shutdown(self) -> None:
281
+ self._shutdown_requested = True
282
+ try:
283
+ if self.session is not None:
284
+ await self.session.close()
285
+ except Exception as e:
286
+ logger.debug("Gemini session close ignored: %s", e)
287
+ try:
288
+ await self.tool_manager.shutdown()
289
+ except Exception:
290
+ pass
291
+
292
+ async def receive(self, frame: Tuple[int, NDArray[np.int16]]) -> None:
293
+ """Stream visitor mic frames to Gemini Live."""
294
+ if self.session is None:
295
+ return
296
+ input_rate, audio_frame = frame
297
+ if audio_frame.ndim == 2:
298
+ if audio_frame.shape[1] > audio_frame.shape[0]:
299
+ audio_frame = audio_frame.T
300
+ if audio_frame.shape[1] > 1:
301
+ audio_frame = audio_frame[:, 0]
302
+ # Resample if needed
303
+ if self.input_sample_rate != input_rate:
304
+ audio_frame = resample(
305
+ audio_frame,
306
+ int(len(audio_frame) * self.input_sample_rate / input_rate),
307
+ )
308
+ audio_frame = audio_to_int16(audio_frame)
309
+ try:
310
+ await self.session.send_realtime_input(
311
+ audio={
312
+ "data": audio_frame.tobytes(),
313
+ "mime_type": f"audio/pcm;rate={self.input_sample_rate}",
314
+ },
315
+ )
316
+ # Log once after first frame so we know mic-to-Gemini path is live.
317
+ if not getattr(self, "_first_mic_frame_logged", False):
318
+ logger.info(
319
+ "Gemini Live: first mic frame sent (input_rate=%d, target_rate=%d, samples=%d)",
320
+ input_rate, self.input_sample_rate, len(audio_frame),
321
+ )
322
+ self._first_mic_frame_logged = True
323
+ except Exception as e:
324
+ logger.debug("Dropped mic frame: %s", e)
325
+
326
+ async def emit(self) -> Tuple[int, NDArray[np.int16]] | AdditionalOutputs | None:
327
+ # Optional idle/timeout reset like OpenAI handler
328
+ if self._session_manager is not None:
329
+ try:
330
+ self._session_manager.maybe_reset_if_stale()
331
+ except Exception:
332
+ pass
333
+ return await wait_for_item(self.output_queue) # type: ignore[no-any-return]
334
+
335
+ # ------------------------------------------------------------------
336
+ # Event loop — read Gemini events and dispatch
337
+ # ------------------------------------------------------------------
338
+
339
+ async def _run_event_loop(self) -> None:
340
+ """Consume Gemini Live server events until session closes / shutdown.
341
+
342
+ Wraps session.receive() in an outer loop so the conversation
343
+ survives multiple turns. Gemini's receive() iterator can return
344
+ after a single turn in some SDK versions — when it exits we
345
+ re-enter as long as the session and shutdown flag say keep going.
346
+ """
347
+ if self.session is None:
348
+ return
349
+ event_count = 0
350
+ audio_chunks = 0
351
+ outer_iterations = 0
352
+ try:
353
+ while self.session is not None and not self._shutdown_requested:
354
+ outer_iterations += 1
355
+ logger.info("Gemini Live: receive() iteration %d starting", outer_iterations)
356
+ async for resp in self.session.receive():
357
+ if self._shutdown_requested:
358
+ break
359
+ event_count += 1
360
+
361
+ # Smoke test confirmed: resp.data IS the same audio
362
+ # as server_content.model_turn.parts[].inline_data.data
363
+ # (just a convenience shortcut). Use ONLY the shortcut
364
+ # to avoid double-emit when both paths fire.
365
+ data = getattr(resp, "data", None)
366
+ if data:
367
+ audio_chunks += 1
368
+ arr = np.frombuffer(data, dtype=np.int16).reshape(1, -1)
369
+ await self.output_queue.put((self.output_sample_rate, arr))
370
+ self.last_activity_time = asyncio.get_event_loop().time()
371
+ if audio_chunks == 1:
372
+ logger.info("Gemini Live: first audio chunk received (%d bytes)", len(data))
373
+
374
+ # text shortcut — concatenated text parts
375
+ text = getattr(resp, "text", None)
376
+ if text:
377
+ logger.info("Gemini Live text: %r", text[:120])
378
+ await self.output_queue.put(
379
+ AdditionalOutputs({"role": "assistant", "content": text})
380
+ )
381
+
382
+ # server_content parsing for non-audio metadata + turn_complete
383
+ server_content = getattr(resp, "server_content", None)
384
+ if server_content is not None:
385
+ # NOTE: audio chunks via model_turn.parts[].inline_data
386
+ # are ALREADY surfaced via resp.data above — don't
387
+ # re-emit. Just look at non-audio parts here.
388
+ model_turn = getattr(server_content, "model_turn", None)
389
+ if model_turn is not None:
390
+ parts = getattr(model_turn, "parts", None) or []
391
+ for part in parts:
392
+ if getattr(part, "thought", False):
393
+ continue # silently swallow chain-of-thought
394
+ ptext = getattr(part, "text", None)
395
+ if ptext and not text: # avoid double-text-emit
396
+ logger.info("Gemini Live model_turn text: %r", ptext[:120])
397
+ await self.output_queue.put(
398
+ AdditionalOutputs({"role": "assistant", "content": ptext})
399
+ )
400
+
401
+ in_tr = getattr(server_content, "input_transcription", None)
402
+ if in_tr is not None:
403
+ txt = (getattr(in_tr, "text", "") or "").strip()
404
+ if txt:
405
+ logger.info("Gemini Live input transcript: %r", txt)
406
+ if self._session_manager is not None:
407
+ try:
408
+ self._session_manager.record_user_transcript(txt)
409
+ except Exception:
410
+ pass
411
+ await self.output_queue.put(
412
+ AdditionalOutputs({"role": "user", "content": txt})
413
+ )
414
+
415
+ if getattr(server_content, "turn_complete", False):
416
+ logger.info(
417
+ "Gemini Live: model turn complete (events=%d, audio_chunks=%d)",
418
+ event_count, audio_chunks,
419
+ )
420
+ audio_chunks = 0 # reset for next turn
421
+
422
+ # Tool calls
423
+ tool_call = getattr(resp, "tool_call", None)
424
+ if tool_call is not None:
425
+ fcs = getattr(tool_call, "function_calls", None) or []
426
+ logger.info("Gemini Live: tool_call with %d function calls", len(fcs))
427
+ for fc in fcs:
428
+ await self._dispatch_function_call(fc)
429
+
430
+ logger.info(
431
+ "Gemini Live: receive() iteration %d ended (events so far=%d). Looping.",
432
+ outer_iterations, event_count,
433
+ )
434
+ # Small backoff so we don't spin if session is genuinely dead
435
+ await asyncio.sleep(0.1)
436
+ except Exception as e:
437
+ logger.warning("Gemini Live event loop exited with exception: %s", e)
438
+ finally:
439
+ logger.info(
440
+ "Gemini Live event loop: total iterations=%d, events=%d, audio_chunks=%d",
441
+ outer_iterations, event_count, audio_chunks,
442
+ )
443
+
444
+ async def _dispatch_function_call(self, fc: Any) -> None:
445
+ """Route a Gemini function call into the background tool manager."""
446
+ tool_name = getattr(fc, "name", None)
447
+ args_obj = getattr(fc, "args", {}) or {}
448
+ call_id = str(getattr(fc, "id", None) or uuid.uuid4())
449
+ if not tool_name:
450
+ return
451
+ # Normalize args dict
452
+ if not isinstance(args_obj, dict):
453
+ try:
454
+ args_obj = dict(args_obj)
455
+ except Exception:
456
+ args_obj = {}
457
+ args_json_str = json.dumps(args_obj)
458
+ self._tool_call_args[call_id] = args_obj
459
+ logger.info(
460
+ "Gemini tool call: %s call_id=%s args=%s", tool_name, call_id, args_json_str,
461
+ )
462
+ try:
463
+ await self.tool_manager.start_tool(
464
+ call_id=call_id,
465
+ tool_call_routine=ToolCallRoutine(
466
+ tool_name=tool_name,
467
+ args_json_str=args_json_str,
468
+ deps=self.deps,
469
+ ),
470
+ is_idle_tool_call=False,
471
+ )
472
+ except Exception as e:
473
+ logger.warning("Failed to start Gemini tool '%s': %s", tool_name, e)
474
+
475
+ async def _handle_tool_result(self, bg_tool: ToolNotification) -> None:
476
+ """Send the tool result back to Gemini + notify the controller."""
477
+ if bg_tool.error is not None:
478
+ tool_result: dict[str, Any] = {"error": bg_tool.error}
479
+ elif bg_tool.result is not None:
480
+ tool_result = bg_tool.result
481
+ else:
482
+ tool_result = {"error": "No result"}
483
+
484
+ call_args = self._tool_call_args.pop(bg_tool.id, {})
485
+
486
+ # Send result back to Gemini Live
487
+ if self.session is not None:
488
+ try:
489
+ # Gemini expects function_responses with {id, name, response: {output: ...}}
490
+ await self.session.send_tool_response(
491
+ function_responses=[{
492
+ "id": bg_tool.id if isinstance(bg_tool.id, str) else None,
493
+ "name": bg_tool.tool_name,
494
+ "response": {"output": tool_result},
495
+ }],
496
+ )
497
+ except Exception as e:
498
+ logger.debug("send_tool_response failed: %s", e)
499
+
500
+ # Surface tool result to dashboard chatbot
501
+ await self.output_queue.put(
502
+ AdditionalOutputs({
503
+ "role": "assistant",
504
+ "content": json.dumps(tool_result),
505
+ "metadata": {
506
+ "title": f"🛠️ Used tool {bg_tool.tool_name}",
507
+ "status": "done",
508
+ },
509
+ })
510
+ )
511
+
512
+ # Drive backend state transitions (same as OpenAI handler)
513
+ if self._controller is not None:
514
+ try:
515
+ await self._controller.on_tool_completed_async(
516
+ bg_tool.tool_name, call_args, tool_result,
517
+ )
518
+ except Exception as e:
519
+ logger.warning(
520
+ "ConversationController.on_tool_completed_async raised %s: %s",
521
+ type(e).__name__, e,
522
+ )
523
+
524
+ # ------------------------------------------------------------------
525
+ # Public API — face + session context push (mirror of OpenAI handler)
526
+ # ------------------------------------------------------------------
527
+
528
+ def _stash_pending_face_event(self, face_event: dict[str, Any]) -> None:
529
+ with self._pending_face_event_lock:
530
+ self._pending_face_event = dict(face_event)
531
+
532
+ def _pop_pending_face_event(self) -> dict[str, Any] | None:
533
+ with self._pending_face_event_lock:
534
+ p = self._pending_face_event
535
+ self._pending_face_event = None
536
+ return p
537
+
538
+ async def _flush_pending_face_event(self) -> None:
539
+ p = self._pop_pending_face_event()
540
+ if p is not None:
541
+ try:
542
+ await self._push_face_context_event(p)
543
+ except Exception as e:
544
+ logger.debug("flush pending face event failed: %s", e)
545
+ self._stash_pending_face_event(p)
546
+
547
+ def notify_external_face_event(self, face_event: dict[str, Any]) -> None:
548
+ loop = self._runtime_loop
549
+ if loop is None or loop.is_closed() or self.session is None:
550
+ self._stash_pending_face_event(face_event)
551
+ return
552
+ try:
553
+ future = asyncio.run_coroutine_threadsafe(
554
+ self._push_face_context_event(face_event), loop,
555
+ )
556
+
557
+ def _done(fut: "asyncio.Future[None]") -> None:
558
+ try:
559
+ fut.result()
560
+ except Exception as e:
561
+ logger.debug("face event push failed: %s", e)
562
+ self._stash_pending_face_event(face_event)
563
+
564
+ future.add_done_callback(_done)
565
+ except Exception as e:
566
+ logger.debug("schedule face event failed: %s", e)
567
+ self._stash_pending_face_event(face_event)
568
+
569
+ async def _push_face_context_event(self, face_event: dict[str, Any]) -> None:
570
+ if self.session is None:
571
+ self._stash_pending_face_event(face_event)
572
+ return
573
+ state = str(face_event.get("state", "unknown"))
574
+ name = face_event.get("name")
575
+ msg = (
576
+ f"[External face update {self.format_timestamp()}] "
577
+ f"state={state}; name={name}. Context only; don't respond unless the user speaks."
578
+ )
579
+ try:
580
+ # send_realtime_input(text=...) injects context without
581
+ # forcing a turn — perfect for face state updates that
582
+ # the model should know about but not respond to.
583
+ await self.session.send_realtime_input(text=msg)
584
+ except Exception as e:
585
+ logger.debug("send face context failed: %s", e)
586
+ self._stash_pending_face_event(face_event)
587
+ return
588
+
589
+ sent_at = time.time()
590
+ payload = {
591
+ "state": state,
592
+ "name": name,
593
+ "previous_state": face_event.get("previous_state"),
594
+ "previous_name": face_event.get("previous_name"),
595
+ "lbph_confidence": float(face_event.get("lbph_confidence") or 0.0),
596
+ "detection_confidence": float(face_event.get("detection_confidence") or 0.0),
597
+ "sent_at": sent_at,
598
+ "sent_at_iso": datetime.fromtimestamp(sent_at).strftime("%Y-%m-%d %H:%M:%S"),
599
+ }
600
+ with self._face_event_lock:
601
+ self._last_face_event_sent = payload
602
+
603
+ def get_last_face_event_sent(self) -> dict[str, Any] | None:
604
+ with self._face_event_lock:
605
+ return dict(self._last_face_event_sent) if self._last_face_event_sent else None
606
+
607
+ # --- session events ---
608
+
609
+ def _stash_pending_session_event(self, payload: dict[str, Any]) -> None:
610
+ with self._pending_session_event_lock:
611
+ self._pending_session_event = dict(payload)
612
+
613
+ def _pop_pending_session_event(self) -> dict[str, Any] | None:
614
+ with self._pending_session_event_lock:
615
+ p = self._pending_session_event
616
+ self._pending_session_event = None
617
+ return p
618
+
619
+ async def _flush_pending_session_event(self) -> None:
620
+ p = self._pop_pending_session_event()
621
+ if p is not None:
622
+ try:
623
+ await self._push_session_context_event(p)
624
+ except Exception:
625
+ self._stash_pending_session_event(p)
626
+
627
+ def notify_session_event(self, previous_state: Any, new_state: Any, snapshot: Any) -> None:
628
+ try:
629
+ payload = {
630
+ "previous_state": getattr(previous_state, "value", str(previous_state)),
631
+ "new_state": getattr(new_state, "value", str(new_state)),
632
+ "snapshot": snapshot.to_dict() if hasattr(snapshot, "to_dict") else {},
633
+ }
634
+ except Exception:
635
+ return
636
+
637
+ if payload.get("new_state") == "idle":
638
+ self._idle_speech_cue_pushed = False
639
+
640
+ loop = self._runtime_loop
641
+ if loop is None or loop.is_closed() or self.session is None:
642
+ self._stash_pending_session_event(payload)
643
+ return
644
+ try:
645
+ future = asyncio.run_coroutine_threadsafe(
646
+ self._push_session_context_event(payload), loop,
647
+ )
648
+
649
+ def _done(fut: "asyncio.Future[None]") -> None:
650
+ try:
651
+ fut.result()
652
+ except Exception:
653
+ self._stash_pending_session_event(payload)
654
+
655
+ future.add_done_callback(_done)
656
+ except Exception:
657
+ self._stash_pending_session_event(payload)
658
+
659
+ async def _push_session_context_event(self, payload: dict[str, Any]) -> None:
660
+ if self.session is None:
661
+ self._stash_pending_session_event(payload)
662
+ return
663
+ snap = payload.get("snapshot") or {}
664
+ new_state_value = payload.get("new_state")
665
+ hint = ""
666
+ speak_now = False
667
+ try:
668
+ from reachy_mini_receptionist.conversation_controller import (
669
+ next_action_hint, should_speak_immediately,
670
+ )
671
+ from reachy_mini_receptionist.receptionist_state import ReceptionState
672
+ if new_state_value:
673
+ new_state_enum = ReceptionState(new_state_value)
674
+ hint = next_action_hint(new_state_enum)
675
+ speak_now = should_speak_immediately(new_state_enum)
676
+ except Exception:
677
+ pass
678
+
679
+ base = (
680
+ f"[Backend session update {self.format_timestamp()}] "
681
+ f"state: {payload.get('previous_state')} -> {new_state_value}; "
682
+ f"visitor={snap.get('visitor_name')}; "
683
+ f"employee={snap.get('employee_name')}; "
684
+ f"appointment={(snap.get('matched_appointment') or {}).get('time')}; "
685
+ f"email_sent_to={snap.get('email_sent_to')}."
686
+ )
687
+ if hint and speak_now:
688
+ msg = f"{base} SPEAK NOW: {hint}"
689
+ elif hint:
690
+ msg = f"{base} Next: {hint} (Stay quiet until the visitor speaks; context only.)"
691
+ else:
692
+ msg = f"{base} Context only; do not respond unless the user speaks."
693
+
694
+ try:
695
+ # For SPEAK_NOW transitions, use send_client_content with
696
+ # turn_complete=True so the model actually responds.
697
+ # Otherwise context-only via send_realtime_input(text=).
698
+ from reachy_mini_receptionist.conversation_controller import should_speak_immediately
699
+ from reachy_mini_receptionist.receptionist_state import ReceptionState
700
+ speak_now = False
701
+ try:
702
+ if new_state_value:
703
+ speak_now = should_speak_immediately(ReceptionState(new_state_value))
704
+ except Exception:
705
+ pass
706
+
707
+ if speak_now:
708
+ await self.session.send_client_content(
709
+ turns=[{
710
+ "role": "user",
711
+ "parts": [{"text": msg}],
712
+ }],
713
+ turn_complete=True,
714
+ )
715
+ else:
716
+ await self.session.send_realtime_input(text=msg)
717
+ except Exception as e:
718
+ logger.debug("session context push failed: %s", e)
719
+ return
720
+
721
+ sent_payload = {**payload, "sent_at": time.time(), "hint": hint}
722
+ with self._session_event_lock:
723
+ self._last_session_event_sent = sent_payload
724
+
725
+ def get_last_session_event_sent(self) -> dict[str, Any] | None:
726
+ with self._session_event_lock:
727
+ return dict(self._last_session_event_sent) if self._last_session_event_sent else None
728
+
729
+ # ------------------------------------------------------------------
730
+ # Personality + voice (UI hooks)
731
+ # ------------------------------------------------------------------
732
+
733
+ async def apply_personality(self, profile: str | None) -> str:
734
+ """Profile updates require a session restart — minimal impl."""
735
+ try:
736
+ from reachy_mini_receptionist.config import set_custom_profile
737
+ set_custom_profile(profile)
738
+ return "Personality updated. Restart for it to take effect (Gemini backend)."
739
+ except Exception as e:
740
+ return f"Failed to apply personality: {e}"
741
+
742
+ async def get_available_voices(self) -> list[str]:
743
+ """Gemini prebuilt voices (fixed list — no discovery API)."""
744
+ return ["Aoede", "Charon", "Kore", "Puck", "Fenrir"]
745
+
746
+ # ------------------------------------------------------------------
747
+ # Helpers
748
+ # ------------------------------------------------------------------
749
+
750
+ def format_timestamp(self) -> str:
751
+ loop_time = asyncio.get_event_loop().time()
752
+ elapsed = loop_time - self.start_time
753
+ dt = datetime.now()
754
+ return f"[{dt.strftime('%Y-%m-%d %H:%M:%S')} | +{elapsed:.1f}s]"
src/reachy_mini_receptionist/gradio_personality.py ADDED
@@ -0,0 +1,316 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Gradio personality UI components and wiring.
2
+
3
+ This module encapsulates the UI elements and logic related to managing
4
+ conversation "personalities" (profiles) so that `main.py` stays lean.
5
+ """
6
+
7
+ from __future__ import annotations
8
+ from typing import Any
9
+ from pathlib import Path
10
+
11
+ import gradio as gr
12
+
13
+ from .config import LOCKED_PROFILE, config
14
+
15
+
16
+ class PersonalityUI:
17
+ """Container for personality-related Gradio components."""
18
+
19
+ def __init__(self) -> None:
20
+ """Initialize the PersonalityUI instance."""
21
+ # Constants and paths
22
+ self.DEFAULT_OPTION = "(built-in default)"
23
+ self._profiles_root = Path(__file__).parent / "profiles"
24
+ self._tools_dir = Path(__file__).parent / "tools"
25
+ self._prompts_dir = Path(__file__).parent / "prompts"
26
+
27
+ # Components (initialized in create_components)
28
+ self.personalities_dropdown: gr.Dropdown
29
+ self.apply_btn: gr.Button
30
+ self.status_md: gr.Markdown
31
+ self.preview_md: gr.Markdown
32
+ self.person_name_tb: gr.Textbox
33
+ self.person_instr_ta: gr.TextArea
34
+ self.tools_txt_ta: gr.TextArea
35
+ self.voice_dropdown: gr.Dropdown
36
+ self.new_personality_btn: gr.Button
37
+ self.available_tools_cg: gr.CheckboxGroup
38
+ self.save_btn: gr.Button
39
+
40
+ # ---------- Filesystem helpers ----------
41
+ def _list_personalities(self) -> list[str]:
42
+ names: list[str] = []
43
+ try:
44
+ if self._profiles_root.exists():
45
+ for p in sorted(self._profiles_root.iterdir()):
46
+ if p.name == "user_personalities":
47
+ continue
48
+ if p.is_dir() and (p / "instructions.txt").exists():
49
+ names.append(p.name)
50
+ user_dir = self._profiles_root / "user_personalities"
51
+ if user_dir.exists():
52
+ for p in sorted(user_dir.iterdir()):
53
+ if p.is_dir() and (p / "instructions.txt").exists():
54
+ names.append(f"user_personalities/{p.name}")
55
+ except Exception:
56
+ pass
57
+ return names
58
+
59
+ def _resolve_profile_dir(self, selection: str) -> Path:
60
+ return self._profiles_root / selection
61
+
62
+ def _read_instructions_for(self, name: str) -> str:
63
+ try:
64
+ if name == self.DEFAULT_OPTION:
65
+ default_file = self._prompts_dir / "default_prompt.txt"
66
+ if default_file.exists():
67
+ return default_file.read_text(encoding="utf-8").strip()
68
+ return ""
69
+ target = self._resolve_profile_dir(name) / "instructions.txt"
70
+ if target.exists():
71
+ return target.read_text(encoding="utf-8").strip()
72
+ return ""
73
+ except Exception as e:
74
+ return f"Could not load instructions: {e}"
75
+
76
+ @staticmethod
77
+ def _sanitize_name(name: str) -> str:
78
+ import re
79
+
80
+ s = name.strip()
81
+ s = re.sub(r"\s+", "_", s)
82
+ s = re.sub(r"[^a-zA-Z0-9_-]", "", s)
83
+ return s
84
+
85
+ # ---------- Public API ----------
86
+ def create_components(self) -> None:
87
+ """Instantiate Gradio components for the personality UI."""
88
+ if LOCKED_PROFILE is not None:
89
+ is_locked = True
90
+ current_value: str = LOCKED_PROFILE
91
+ dropdown_label = "Select personality (locked)"
92
+ dropdown_choices: list[str] = [LOCKED_PROFILE]
93
+ else:
94
+ is_locked = False
95
+ current_value = config.REACHY_MINI_CUSTOM_PROFILE or self.DEFAULT_OPTION
96
+ dropdown_label = "Select personality"
97
+ dropdown_choices = [self.DEFAULT_OPTION, *(self._list_personalities())]
98
+
99
+ self.personalities_dropdown = gr.Dropdown(
100
+ label=dropdown_label,
101
+ choices=dropdown_choices,
102
+ value=current_value,
103
+ interactive=not is_locked,
104
+ )
105
+ self.apply_btn = gr.Button("Apply personality", interactive=not is_locked)
106
+ self.status_md = gr.Markdown(visible=True)
107
+ self.preview_md = gr.Markdown(value=self._read_instructions_for(current_value))
108
+ self.person_name_tb = gr.Textbox(label="Personality name", interactive=not is_locked)
109
+ self.person_instr_ta = gr.TextArea(label="Personality instructions", lines=10, interactive=not is_locked)
110
+ self.tools_txt_ta = gr.TextArea(label="tools.txt", lines=10, interactive=not is_locked)
111
+ self.voice_dropdown = gr.Dropdown(label="Voice", choices=["marin"], value="marin", interactive=not is_locked)
112
+ self.new_personality_btn = gr.Button("New personality", interactive=not is_locked)
113
+ self.available_tools_cg = gr.CheckboxGroup(label="Available tools (helper)", choices=[], value=[], interactive=not is_locked)
114
+ self.save_btn = gr.Button("Save personality (instructions + tools)", interactive=not is_locked)
115
+
116
+ def additional_inputs_ordered(self) -> list[Any]:
117
+ """Return the additional inputs in the expected order for Stream."""
118
+ return [
119
+ self.personalities_dropdown,
120
+ self.apply_btn,
121
+ self.new_personality_btn,
122
+ self.status_md,
123
+ self.preview_md,
124
+ self.person_name_tb,
125
+ self.person_instr_ta,
126
+ self.tools_txt_ta,
127
+ self.voice_dropdown,
128
+ self.available_tools_cg,
129
+ self.save_btn,
130
+ ]
131
+
132
+ # ---------- Event wiring ----------
133
+ def wire_events(self, handler: Any, blocks: gr.Blocks) -> None:
134
+ """Attach event handlers to components within a Blocks context."""
135
+
136
+ async def _apply_personality(selected: str) -> tuple[str, str]:
137
+ if LOCKED_PROFILE is not None and selected != LOCKED_PROFILE:
138
+ return (
139
+ f"Profile is locked to '{LOCKED_PROFILE}'. Cannot change personality.",
140
+ self._read_instructions_for(LOCKED_PROFILE),
141
+ )
142
+ profile = None if selected == self.DEFAULT_OPTION else selected
143
+ status = await handler.apply_personality(profile)
144
+ preview = self._read_instructions_for(selected)
145
+ return status, preview
146
+
147
+ def _read_voice_for(name: str) -> str:
148
+ try:
149
+ if name == self.DEFAULT_OPTION:
150
+ return "marin"
151
+ vf = self._resolve_profile_dir(name) / "voice.txt"
152
+ if vf.exists():
153
+ v = vf.read_text(encoding="utf-8").strip()
154
+ return v or "marin"
155
+ except Exception:
156
+ pass
157
+ return "marin"
158
+
159
+ async def _fetch_voices(selected: str) -> dict[str, Any]:
160
+ try:
161
+ voices = await handler.get_available_voices()
162
+ current = _read_voice_for(selected)
163
+ if current not in voices:
164
+ current = "marin"
165
+ return gr.update(choices=voices, value=current)
166
+ except Exception:
167
+ return gr.update(choices=["marin"], value="marin")
168
+
169
+ def _available_tools_for(selected: str) -> tuple[list[str], list[str]]:
170
+ shared: list[str] = []
171
+ try:
172
+ for py in self._tools_dir.glob("*.py"):
173
+ if py.stem in {"__init__", "core_tools"}:
174
+ continue
175
+ shared.append(py.stem)
176
+ except Exception:
177
+ pass
178
+ local: list[str] = []
179
+ try:
180
+ if selected != self.DEFAULT_OPTION:
181
+ for py in (self._profiles_root / selected).glob("*.py"):
182
+ local.append(py.stem)
183
+ except Exception:
184
+ pass
185
+ return sorted(shared), sorted(local)
186
+
187
+ def _parse_enabled_tools(text: str) -> list[str]:
188
+ enabled: list[str] = []
189
+ for line in text.splitlines():
190
+ s = line.strip()
191
+ if not s or s.startswith("#"):
192
+ continue
193
+ enabled.append(s)
194
+ return enabled
195
+
196
+ def _load_profile_for_edit(selected: str) -> tuple[dict[str, Any], dict[str, Any], dict[str, Any], str]:
197
+ instr = self._read_instructions_for(selected)
198
+ tools_txt = ""
199
+ if selected != self.DEFAULT_OPTION:
200
+ tp = self._resolve_profile_dir(selected) / "tools.txt"
201
+ if tp.exists():
202
+ tools_txt = tp.read_text(encoding="utf-8")
203
+ shared, local = _available_tools_for(selected)
204
+ all_tools = sorted(set(shared + local))
205
+ enabled = _parse_enabled_tools(tools_txt)
206
+ status_text = f"Loaded profile '{selected}'."
207
+ return (
208
+ gr.update(value=instr),
209
+ gr.update(value=tools_txt),
210
+ gr.update(choices=all_tools, value=enabled),
211
+ status_text,
212
+ )
213
+
214
+ def _new_personality() -> tuple[
215
+ dict[str, Any], dict[str, Any], dict[str, Any], dict[str, Any], str, dict[str, Any]
216
+ ]:
217
+ try:
218
+ # Prefill with hints
219
+ instr_val = """# Write your instructions here\n# e.g., Keep responses concise and friendly."""
220
+ tools_txt_val = "# tools enabled for this profile\n"
221
+ return (
222
+ gr.update(value=""),
223
+ gr.update(value=instr_val),
224
+ gr.update(value=tools_txt_val),
225
+ gr.update(choices=sorted(_available_tools_for(self.DEFAULT_OPTION)[0]), value=[]),
226
+ "Fill in a name, instructions and (optional) tools, then Save.",
227
+ gr.update(value="marin"),
228
+ )
229
+ except Exception:
230
+ return (
231
+ gr.update(),
232
+ gr.update(),
233
+ gr.update(),
234
+ gr.update(),
235
+ "Failed to initialize new personality.",
236
+ gr.update(),
237
+ )
238
+
239
+ def _save_personality(
240
+ name: str, instructions: str, tools_text: str, voice: str
241
+ ) -> tuple[dict[str, Any], dict[str, Any], str]:
242
+ name_s = self._sanitize_name(name)
243
+ if not name_s:
244
+ return gr.update(), gr.update(), "Please enter a valid name."
245
+ try:
246
+ target_dir = self._profiles_root / "user_personalities" / name_s
247
+ target_dir.mkdir(parents=True, exist_ok=True)
248
+ (target_dir / "instructions.txt").write_text(instructions.strip() + "\n", encoding="utf-8")
249
+ (target_dir / "tools.txt").write_text(tools_text.strip() + "\n", encoding="utf-8")
250
+ (target_dir / "voice.txt").write_text((voice or "marin").strip() + "\n", encoding="utf-8")
251
+
252
+ choices = self._list_personalities()
253
+ value = f"user_personalities/{name_s}"
254
+ if value not in choices:
255
+ choices.append(value)
256
+ return (
257
+ gr.update(choices=[self.DEFAULT_OPTION, *sorted(choices)], value=value),
258
+ gr.update(value=instructions),
259
+ f"Saved personality '{name_s}'.",
260
+ )
261
+ except Exception as e:
262
+ return gr.update(), gr.update(), f"Failed to save personality: {e}"
263
+
264
+ def _sync_tools_from_checks(selected: list[str], current_text: str) -> dict[str, Any]:
265
+ comments = [ln for ln in current_text.splitlines() if ln.strip().startswith("#")]
266
+ body = "\n".join(selected)
267
+ out = ("\n".join(comments) + ("\n" if comments else "") + body).strip() + "\n"
268
+ return gr.update(value=out)
269
+
270
+ with blocks:
271
+ self.apply_btn.click(
272
+ fn=_apply_personality,
273
+ inputs=[self.personalities_dropdown],
274
+ outputs=[self.status_md, self.preview_md],
275
+ )
276
+
277
+ self.personalities_dropdown.change(
278
+ fn=_load_profile_for_edit,
279
+ inputs=[self.personalities_dropdown],
280
+ outputs=[self.person_instr_ta, self.tools_txt_ta, self.available_tools_cg, self.status_md],
281
+ )
282
+
283
+ blocks.load(
284
+ fn=_fetch_voices,
285
+ inputs=[self.personalities_dropdown],
286
+ outputs=[self.voice_dropdown],
287
+ )
288
+
289
+ self.available_tools_cg.change(
290
+ fn=_sync_tools_from_checks,
291
+ inputs=[self.available_tools_cg, self.tools_txt_ta],
292
+ outputs=[self.tools_txt_ta],
293
+ )
294
+
295
+ self.new_personality_btn.click(
296
+ fn=_new_personality,
297
+ inputs=[],
298
+ outputs=[
299
+ self.person_name_tb,
300
+ self.person_instr_ta,
301
+ self.tools_txt_ta,
302
+ self.available_tools_cg,
303
+ self.status_md,
304
+ self.voice_dropdown,
305
+ ],
306
+ )
307
+
308
+ self.save_btn.click(
309
+ fn=_save_personality,
310
+ inputs=[self.person_name_tb, self.person_instr_ta, self.tools_txt_ta, self.voice_dropdown],
311
+ outputs=[self.personalities_dropdown, self.person_instr_ta, self.status_md],
312
+ ).then(
313
+ fn=_apply_personality,
314
+ inputs=[self.personalities_dropdown],
315
+ outputs=[self.status_md, self.preview_md],
316
+ )
src/reachy_mini_receptionist/headless_personality.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Headless personality management (console-based).
2
+
3
+ Provides an interactive CLI to browse, preview, apply, create and edit
4
+ "personalities" (profiles) when running without Gradio.
5
+
6
+ This module is intentionally not shared with the Gradio implementation to
7
+ avoid coupling and keep responsibilities clear for headless mode.
8
+ """
9
+
10
+ from __future__ import annotations
11
+ from typing import List
12
+ from pathlib import Path
13
+
14
+
15
+ DEFAULT_OPTION = "(built-in default)"
16
+
17
+
18
+ def _profiles_root() -> Path:
19
+ return Path(__file__).parent / "profiles"
20
+
21
+
22
+ def _prompts_dir() -> Path:
23
+ return Path(__file__).parent / "prompts"
24
+
25
+
26
+ def _tools_dir() -> Path:
27
+ return Path(__file__).parent / "tools"
28
+
29
+
30
+ def _sanitize_name(name: str) -> str:
31
+ import re
32
+
33
+ s = name.strip()
34
+ s = re.sub(r"\s+", "_", s)
35
+ s = re.sub(r"[^a-zA-Z0-9_-]", "", s)
36
+ return s
37
+
38
+
39
+ def list_personalities() -> List[str]:
40
+ """List available personality profile names."""
41
+ names: List[str] = []
42
+ root = _profiles_root()
43
+ try:
44
+ if root.exists():
45
+ for p in sorted(root.iterdir()):
46
+ if p.name == "user_personalities":
47
+ continue
48
+ if p.is_dir() and (p / "instructions.txt").exists():
49
+ names.append(p.name)
50
+ udir = root / "user_personalities"
51
+ if udir.exists():
52
+ for p in sorted(udir.iterdir()):
53
+ if p.is_dir() and (p / "instructions.txt").exists():
54
+ names.append(f"user_personalities/{p.name}")
55
+ except Exception:
56
+ pass
57
+ return names
58
+
59
+
60
+ def resolve_profile_dir(selection: str) -> Path:
61
+ """Resolve the directory path for the given profile selection."""
62
+ return _profiles_root() / selection
63
+
64
+
65
+ def read_instructions_for(name: str) -> str:
66
+ """Read the instructions.txt content for the given profile name."""
67
+ try:
68
+ if name == DEFAULT_OPTION:
69
+ df = _prompts_dir() / "default_prompt.txt"
70
+ return df.read_text(encoding="utf-8").strip() if df.exists() else ""
71
+ target = resolve_profile_dir(name) / "instructions.txt"
72
+ return target.read_text(encoding="utf-8").strip() if target.exists() else ""
73
+ except Exception as e:
74
+ return f"Could not load instructions: {e}"
75
+
76
+
77
+ def available_tools_for(selected: str) -> List[str]:
78
+ """List available tool modules for the given profile selection."""
79
+ shared: List[str] = []
80
+ try:
81
+ for py in _tools_dir().glob("*.py"):
82
+ if py.stem in {"__init__", "core_tools"}:
83
+ continue
84
+ shared.append(py.stem)
85
+ except Exception:
86
+ pass
87
+ local: List[str] = []
88
+ try:
89
+ if selected != DEFAULT_OPTION:
90
+ for py in resolve_profile_dir(selected).glob("*.py"):
91
+ local.append(py.stem)
92
+ except Exception:
93
+ pass
94
+ return sorted(set(shared + local))
95
+
96
+
97
+ def _write_profile(name_s: str, instructions: str, tools_text: str, voice: str = "marin") -> None:
98
+ target_dir = _profiles_root() / "user_personalities" / name_s
99
+ target_dir.mkdir(parents=True, exist_ok=True)
100
+ (target_dir / "instructions.txt").write_text(instructions.strip() + "\n", encoding="utf-8")
101
+ (target_dir / "tools.txt").write_text((tools_text or "").strip() + "\n", encoding="utf-8")
102
+ (target_dir / "voice.txt").write_text((voice or "marin").strip() + "\n", encoding="utf-8")
src/reachy_mini_receptionist/headless_personality_ui.py ADDED
@@ -0,0 +1,287 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Settings UI routes for headless personality management.
2
+
3
+ Exposes REST endpoints on the provided FastAPI settings app. The
4
+ implementation schedules backend actions (apply personality, fetch voices)
5
+ onto the running LocalStream asyncio loop using the supplied get_loop
6
+ callable to avoid cross-thread issues.
7
+ """
8
+
9
+ from __future__ import annotations
10
+ import asyncio
11
+ import logging
12
+ from typing import Any, Callable, Optional
13
+
14
+ from fastapi import FastAPI
15
+
16
+ from .config import LOCKED_PROFILE, config
17
+ from .openai_realtime import OpenaiRealtimeHandler
18
+ from .headless_personality import (
19
+ DEFAULT_OPTION,
20
+ _sanitize_name,
21
+ _write_profile,
22
+ list_personalities,
23
+ available_tools_for,
24
+ resolve_profile_dir,
25
+ read_instructions_for,
26
+ )
27
+
28
+
29
+ def mount_personality_routes(
30
+ app: FastAPI,
31
+ handler: OpenaiRealtimeHandler,
32
+ get_loop: Callable[[], asyncio.AbstractEventLoop | None],
33
+ *,
34
+ persist_personality: Callable[[Optional[str]], None] | None = None,
35
+ get_persisted_personality: Callable[[], Optional[str]] | None = None,
36
+ ) -> None:
37
+ """Register personality management endpoints on a FastAPI app."""
38
+ try:
39
+ from fastapi import Request
40
+ from pydantic import BaseModel
41
+ from fastapi.responses import JSONResponse
42
+ except Exception: # pragma: no cover - only when settings app not available
43
+ return
44
+
45
+ class SavePayload(BaseModel):
46
+ name: str
47
+ instructions: str
48
+ tools_text: str
49
+ voice: Optional[str] = "marin"
50
+
51
+ class ApplyPayload(BaseModel):
52
+ name: str
53
+ persist: Optional[bool] = False
54
+
55
+ def _startup_choice() -> Any:
56
+ """Return the persisted startup personality or default."""
57
+ try:
58
+ if get_persisted_personality is not None:
59
+ stored = get_persisted_personality()
60
+ if stored:
61
+ return stored
62
+ env_val = getattr(config, "REACHY_MINI_CUSTOM_PROFILE", None)
63
+ if env_val:
64
+ return env_val
65
+ except Exception:
66
+ pass
67
+ return DEFAULT_OPTION
68
+
69
+ def _current_choice() -> str:
70
+ try:
71
+ cur = getattr(config, "REACHY_MINI_CUSTOM_PROFILE", None)
72
+ return cur or DEFAULT_OPTION
73
+ except Exception:
74
+ return DEFAULT_OPTION
75
+
76
+ @app.get("/personalities")
77
+ def _list() -> dict: # type: ignore
78
+ choices = [DEFAULT_OPTION, *list_personalities()]
79
+ return {
80
+ "choices": choices,
81
+ "current": _current_choice(),
82
+ "startup": _startup_choice(),
83
+ "locked": LOCKED_PROFILE is not None,
84
+ "locked_to": LOCKED_PROFILE,
85
+ }
86
+
87
+ @app.get("/personalities/load")
88
+ def _load(name: str) -> dict: # type: ignore
89
+ instr = read_instructions_for(name)
90
+ tools_txt = ""
91
+ voice = "marin"
92
+ if name != DEFAULT_OPTION:
93
+ pdir = resolve_profile_dir(name)
94
+ tp = pdir / "tools.txt"
95
+ if tp.exists():
96
+ tools_txt = tp.read_text(encoding="utf-8")
97
+ vf = pdir / "voice.txt"
98
+ if vf.exists():
99
+ v = vf.read_text(encoding="utf-8").strip()
100
+ voice = v or "marin"
101
+ avail = available_tools_for(name)
102
+ enabled = [ln.strip() for ln in tools_txt.splitlines() if ln.strip() and not ln.strip().startswith("#")]
103
+ return {
104
+ "instructions": instr,
105
+ "tools_text": tools_txt,
106
+ "voice": voice,
107
+ "available_tools": avail,
108
+ "enabled_tools": enabled,
109
+ }
110
+
111
+ @app.post("/personalities/save")
112
+ async def _save(request: Request) -> dict: # type: ignore
113
+ # Accept raw JSON only to avoid validation-related 422s
114
+ try:
115
+ raw = await request.json()
116
+ except Exception:
117
+ raw = {}
118
+ name = str(raw.get("name", ""))
119
+ instructions = str(raw.get("instructions", ""))
120
+ tools_text = str(raw.get("tools_text", ""))
121
+ voice = str(raw.get("voice", "marin")) if raw.get("voice") is not None else "marin"
122
+
123
+ name_s = _sanitize_name(name)
124
+ if not name_s:
125
+ return JSONResponse({"ok": False, "error": "invalid_name"}, status_code=400) # type: ignore
126
+ try:
127
+ logger.info(
128
+ "Headless save: name=%r voice=%r instr_len=%d tools_len=%d",
129
+ name_s,
130
+ voice,
131
+ len(instructions),
132
+ len(tools_text),
133
+ )
134
+ _write_profile(name_s, instructions, tools_text, voice or "marin")
135
+ value = f"user_personalities/{name_s}"
136
+ choices = [DEFAULT_OPTION, *list_personalities()]
137
+ return {"ok": True, "value": value, "choices": choices}
138
+ except Exception as e:
139
+ return JSONResponse({"ok": False, "error": str(e)}, status_code=500) # type: ignore
140
+
141
+ @app.post("/personalities/save_raw")
142
+ async def _save_raw(
143
+ request: Request,
144
+ name: Optional[str] = None,
145
+ instructions: Optional[str] = None,
146
+ tools_text: Optional[str] = None,
147
+ voice: Optional[str] = None,
148
+ ) -> dict: # type: ignore
149
+ # Accept query params, form-encoded, or raw JSON
150
+ data = {"name": name, "instructions": instructions, "tools_text": tools_text, "voice": voice}
151
+ # Prefer form if present
152
+ try:
153
+ form = await request.form()
154
+ for k in ("name", "instructions", "tools_text", "voice"):
155
+ if k in form and form[k] is not None:
156
+ data[k] = str(form[k])
157
+ except Exception:
158
+ pass
159
+ # Try JSON
160
+ try:
161
+ raw = await request.json()
162
+ if isinstance(raw, dict):
163
+ for k in ("name", "instructions", "tools_text", "voice"):
164
+ if raw.get(k) is not None:
165
+ data[k] = str(raw.get(k))
166
+ except Exception:
167
+ pass
168
+
169
+ name_s = _sanitize_name(str(data.get("name") or ""))
170
+ if not name_s:
171
+ return JSONResponse({"ok": False, "error": "invalid_name"}, status_code=400) # type: ignore
172
+ instr = str(data.get("instructions") or "")
173
+ tools = str(data.get("tools_text") or "")
174
+ v = str(data.get("voice") or "marin")
175
+ try:
176
+ logger.info(
177
+ "Headless save_raw: name=%r voice=%r instr_len=%d tools_len=%d", name_s, v, len(instr), len(tools)
178
+ )
179
+ _write_profile(name_s, instr, tools, v)
180
+ value = f"user_personalities/{name_s}"
181
+ choices = [DEFAULT_OPTION, *list_personalities()]
182
+ return {"ok": True, "value": value, "choices": choices}
183
+ except Exception as e:
184
+ return JSONResponse({"ok": False, "error": str(e)}, status_code=500) # type: ignore
185
+
186
+ @app.get("/personalities/save_raw")
187
+ async def _save_raw_get(name: str, instructions: str = "", tools_text: str = "", voice: str = "marin") -> dict: # type: ignore
188
+ name_s = _sanitize_name(name)
189
+ if not name_s:
190
+ return JSONResponse({"ok": False, "error": "invalid_name"}, status_code=400) # type: ignore
191
+ try:
192
+ logger.info(
193
+ "Headless save_raw(GET): name=%r voice=%r instr_len=%d tools_len=%d",
194
+ name_s,
195
+ voice,
196
+ len(instructions),
197
+ len(tools_text),
198
+ )
199
+ _write_profile(name_s, instructions, tools_text, voice or "marin")
200
+ value = f"user_personalities/{name_s}"
201
+ choices = [DEFAULT_OPTION, *list_personalities()]
202
+ return {"ok": True, "value": value, "choices": choices}
203
+ except Exception as e:
204
+ return JSONResponse({"ok": False, "error": str(e)}, status_code=500) # type: ignore
205
+
206
+ logger = logging.getLogger(__name__)
207
+
208
+ @app.post("/personalities/apply")
209
+ async def _apply(
210
+ payload: ApplyPayload | None = None,
211
+ name: str | None = None,
212
+ persist: Optional[bool] = None,
213
+ request: Optional[Request] = None,
214
+ ) -> dict: # type: ignore
215
+ if LOCKED_PROFILE is not None:
216
+ return JSONResponse(
217
+ {"ok": False, "error": "profile_locked", "locked_to": LOCKED_PROFILE},
218
+ status_code=403,
219
+ ) # type: ignore
220
+ loop = get_loop()
221
+ if loop is None:
222
+ return JSONResponse({"ok": False, "error": "loop_unavailable"}, status_code=503) # type: ignore
223
+
224
+ # Accept both JSON payload and query param for convenience
225
+ sel_name: Optional[str] = None
226
+ persist_flag = bool(persist) if persist is not None else False
227
+ if payload and getattr(payload, "name", None):
228
+ sel_name = payload.name
229
+ persist_flag = bool(getattr(payload, "persist", False))
230
+ elif name:
231
+ sel_name = name
232
+ elif request is not None:
233
+ try:
234
+ body = await request.json()
235
+ if isinstance(body, dict) and body.get("name"):
236
+ sel_name = str(body.get("name"))
237
+ if isinstance(body, dict) and "persist" in body:
238
+ persist_flag = bool(body.get("persist"))
239
+ except Exception:
240
+ sel_name = None
241
+ if request is not None:
242
+ try:
243
+ q_persist = request.query_params.get("persist")
244
+ if q_persist is not None:
245
+ persist_flag = str(q_persist).lower() in {"1", "true", "yes", "on"}
246
+ except Exception:
247
+ pass
248
+ if not sel_name:
249
+ sel_name = DEFAULT_OPTION
250
+
251
+ async def _do_apply() -> str:
252
+ sel = None if sel_name == DEFAULT_OPTION else sel_name
253
+ status = await handler.apply_personality(sel)
254
+ return status
255
+
256
+ try:
257
+ logger.info("Headless apply: requested name=%r", sel_name)
258
+ fut = asyncio.run_coroutine_threadsafe(_do_apply(), loop)
259
+ status = fut.result(timeout=10)
260
+ persisted_choice = _startup_choice()
261
+ if persist_flag and persist_personality is not None:
262
+ try:
263
+ persist_personality(None if sel_name == DEFAULT_OPTION else sel_name)
264
+ persisted_choice = _startup_choice()
265
+ except Exception as e:
266
+ logger.warning("Failed to persist startup personality: %s", e)
267
+ return {"ok": True, "status": status, "startup": persisted_choice}
268
+ except Exception as e:
269
+ return JSONResponse({"ok": False, "error": str(e)}, status_code=500) # type: ignore
270
+
271
+ @app.get("/voices")
272
+ async def _voices() -> list[str]:
273
+ loop = get_loop()
274
+ if loop is None:
275
+ return ["marin"]
276
+
277
+ async def _get_v() -> list[str]:
278
+ try:
279
+ return await handler.get_available_voices()
280
+ except Exception:
281
+ return ["marin"]
282
+
283
+ try:
284
+ fut = asyncio.run_coroutine_threadsafe(_get_v(), loop)
285
+ return fut.result(timeout=10)
286
+ except Exception:
287
+ return ["marin"]
src/reachy_mini_receptionist/ical_calendar.py ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """iCal calendar source for the receptionist.
2
+
3
+ When ``RECEPTION_ICS_URL`` is set in the environment, ``calendar_data``
4
+ uses this module to fetch today's appointments. The URL is typically a
5
+ Google Calendar "Public address in iCal format" link
6
+ (Settings -> Integrate calendar -> Public address in iCal format).
7
+ When the URL is unset, ``calendar_data`` returns an empty schedule and
8
+ the receptionist serves walk-in visitors only (via ``lookup_employee``).
9
+
10
+ Operator convention for event titles:
11
+
12
+ "<Visitor name> with <Host name>"
13
+
14
+ Examples:
15
+
16
+ "Rohan Verma with Mukul"
17
+ "Sara Khan with Priya"
18
+ "David Lee with Arjun Mehta"
19
+
20
+ The host name (or alias) is matched against ``employees.py``. Add an
21
+ optional " — note" suffix to the title or use the event's DESCRIPTION
22
+ field for the note. The event's LOCATION field is used as a fallback host
23
+ when the title doesn't contain " with ".
24
+
25
+ Single-occurrence events only — RRULE recurrence is not expanded in v1.
26
+ """
27
+ from __future__ import annotations
28
+
29
+ import logging
30
+ import os
31
+ import time
32
+ from datetime import date, datetime
33
+ from typing import Any, Dict, List, Optional, Tuple
34
+ from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
35
+
36
+ import httpx
37
+ from icalendar import Calendar
38
+
39
+ logger = logging.getLogger(__name__)
40
+
41
+ _CACHE_TTL_SECONDS: float = 300.0
42
+ _HTTP_TIMEOUT_SECONDS: float = 10.0
43
+
44
+ _cache: Dict[str, Any] = {"fetched_at": 0.0, "data": [], "url": None, "valid": False}
45
+
46
+
47
+ def _display_tz() -> Any:
48
+ """Return the timezone to use for displaying iCal event times.
49
+
50
+ Read from ``RECEPTION_TIMEZONE`` env var (e.g. ``Asia/Kolkata``,
51
+ ``America/New_York``). Defaults to ``Asia/Kolkata`` since the pilot
52
+ deployment is in India. Falls back to system local time on a bad
53
+ value rather than crashing the whole calendar fetch.
54
+ """
55
+ raw = (os.getenv("RECEPTION_TIMEZONE") or "Asia/Kolkata").strip()
56
+ try:
57
+ return ZoneInfo(raw)
58
+ except ZoneInfoNotFoundError:
59
+ logger.warning(
60
+ "RECEPTION_TIMEZONE=%r is not a valid IANA tz name — "
61
+ "falling back to system local time",
62
+ raw,
63
+ )
64
+ return None # signals "use astimezone() default"
65
+
66
+
67
+ # Visitor/host separators the operator may write in event titles. Order
68
+ # matters — we try the most-specific phrasing first so "is here to see"
69
+ # wins over a bare "to" if both appear. Case-insensitive match.
70
+ _VISITOR_HOST_SEPARATORS: Tuple[str, ...] = (
71
+ " is here to see ",
72
+ " here to see ",
73
+ " to see ",
74
+ " meets with ",
75
+ " meeting with ",
76
+ " meeting ",
77
+ " meets ",
78
+ # Bare "meet" — added 2026-05-21 after the operator's calendar used
79
+ # the imperative form ("Krishna Meet Rohan", "Alex Meet Arjun"). Without
80
+ # this the title fails to split, host_query stays empty, and the bot
81
+ # falls back to a context-derived email instead of looking the host
82
+ # up in the employee directory.
83
+ " meet ",
84
+ " with ",
85
+ " for ",
86
+ " -> ",
87
+ " → ",
88
+ )
89
+
90
+
91
+ def _parse_summary(summary: str) -> Tuple[str, str, Optional[str]]:
92
+ """Split a SUMMARY into (visitor_name, host_query, inline_note).
93
+
94
+ Visitor/host separator: any of ``_VISITOR_HOST_SEPARATORS``
95
+ (case-insensitive). Note separator (applied to the rest after host
96
+ extraction): ``" — "``, ``" - "``, ``": "``.
97
+
98
+ Returns ``(text, "", None)`` if no visitor/host separator is found —
99
+ the caller can then fall back to LOCATION for the host.
100
+ """
101
+ if not summary:
102
+ return ("", "", None)
103
+ text = summary.strip()
104
+ lower = text.lower()
105
+ sep_idx = -1
106
+ sep_len = 0
107
+ for sep in _VISITOR_HOST_SEPARATORS:
108
+ idx = lower.find(sep)
109
+ if idx >= 0 and (sep_idx < 0 or idx < sep_idx):
110
+ sep_idx = idx
111
+ sep_len = len(sep)
112
+ if sep_idx < 0:
113
+ return (text, "", None)
114
+ visitor = text[:sep_idx].strip()
115
+ rest = text[sep_idx + sep_len:].strip()
116
+ note: Optional[str] = None
117
+ for delim in (" — ", " - ", ": "):
118
+ d = rest.find(delim)
119
+ if d >= 0:
120
+ note = rest[d + len(delim):].strip()
121
+ rest = rest[:d].strip()
122
+ break
123
+ return (visitor, rest, note)
124
+
125
+
126
+ def _format_time(dt: Any) -> str:
127
+ """Format a datetime as 'H:MM AM/PM' in the display timezone.
128
+
129
+ iCal feeds frequently serialise event times in UTC (or with a TZID
130
+ pointing to a different region). We convert to ``RECEPTION_TIMEZONE``
131
+ (default ``Asia/Kolkata``) before formatting so an operator
132
+ scheduling "1 PM" in IST sees "1:00 PM" on the dashboard, not
133
+ "7:30 AM" (UTC) — regardless of what timezone the robot's OS is in.
134
+ """
135
+ if isinstance(dt, datetime):
136
+ if dt.tzinfo is not None:
137
+ tz = _display_tz()
138
+ local = dt.astimezone(tz) if tz is not None else dt.astimezone()
139
+ else:
140
+ local = dt
141
+ return local.strftime("%I:%M %p").lstrip("0")
142
+ return "all day"
143
+
144
+
145
+ def _local_event_date(start: Any) -> date:
146
+ """Return the display-tz date of an event's DTSTART."""
147
+ if isinstance(start, datetime):
148
+ if start.tzinfo is not None:
149
+ tz = _display_tz()
150
+ return (start.astimezone(tz) if tz is not None else start.astimezone()).date()
151
+ return start.date()
152
+ return start # already a date
153
+
154
+
155
+ def _today_events(cal: Calendar, today: date) -> List[Dict[str, Any]]:
156
+ """Walk a parsed Calendar and return the events that fall on ``today``."""
157
+ out: List[Dict[str, Any]] = []
158
+ for event in cal.walk("VEVENT"):
159
+ dtstart = event.get("DTSTART")
160
+ if dtstart is None:
161
+ continue
162
+ start = dtstart.dt
163
+ if _local_event_date(start) != today:
164
+ continue
165
+ summary = str(event.get("SUMMARY") or "")
166
+ description = str(event.get("DESCRIPTION") or "").strip()
167
+ location = str(event.get("LOCATION") or "").strip()
168
+ visitor, host_from_title, inline_note = _parse_summary(summary)
169
+ host_query = host_from_title or location
170
+ # Only surface a note when there's something beyond the title — never
171
+ # echo the SUMMARY back into ``note`` (that just makes the LLM
172
+ # context noisier without adding information).
173
+ note = inline_note or description or ""
174
+ out.append({
175
+ "time": _format_time(start),
176
+ "name": visitor or summary,
177
+ "note": note,
178
+ "_host_query": host_query,
179
+ "_dt": start,
180
+ })
181
+ out.sort(key=_sort_key)
182
+ return out
183
+
184
+
185
+ def _sort_key(event: Dict[str, Any]) -> datetime:
186
+ dt = event.get("_dt")
187
+ if isinstance(dt, datetime):
188
+ return dt.replace(tzinfo=None) if dt.tzinfo is None else dt.astimezone().replace(tzinfo=None)
189
+ if isinstance(dt, date):
190
+ return datetime.combine(dt, datetime.min.time())
191
+ return datetime.min
192
+
193
+
194
+ def _fetch_ics(url: str) -> str:
195
+ resp = httpx.get(url, timeout=_HTTP_TIMEOUT_SECONDS, follow_redirects=True)
196
+ resp.raise_for_status()
197
+ return resp.text
198
+
199
+
200
+ def fetch_appointments(ics_url: str, today: Optional[date] = None) -> List[Dict[str, Any]]:
201
+ """Return today's appointments from the iCal URL, cached for ~5 minutes.
202
+
203
+ On any fetch/parse failure returns the last successful cache (or empty
204
+ list if there's no cache yet) and logs a warning. Each appointment dict
205
+ has keys:
206
+
207
+ time (str) — "H:MM AM/PM" or "all day"
208
+ name (str) — visitor name parsed from SUMMARY
209
+ note (str) — inline note, DESCRIPTION, or SUMMARY fallback
210
+ _host_query (str) — host name/alias from SUMMARY ' with ' or LOCATION
211
+ _dt (datetime|date) — event start, for downstream use
212
+
213
+ Resolution of ``_host_query`` to an email is the caller's job
214
+ (calendar_data.py uses ``employees.find_email_for``).
215
+ """
216
+ if today is None:
217
+ tz = _display_tz()
218
+ today = (datetime.now(tz) if tz is not None else datetime.now().astimezone()).date()
219
+ now = time.time()
220
+ # Cache freshness is tracked by ``valid`` + ``fetched_at`` only. An empty
221
+ # list is a legitimate cached result ("no appointments today") — treating
222
+ # ``data`` truthiness as the freshness flag would force a re-fetch on
223
+ # every call on an empty-calendar day, which on the receptionist hot path
224
+ # blocks the audio loop with a synchronous HTTP call per request.
225
+ if (
226
+ _cache["url"] == ics_url
227
+ and _cache["valid"]
228
+ and (now - _cache["fetched_at"]) < _CACHE_TTL_SECONDS
229
+ ):
230
+ return list(_cache["data"])
231
+ try:
232
+ text = _fetch_ics(ics_url)
233
+ cal = Calendar.from_ical(text)
234
+ events = _today_events(cal, today)
235
+ _cache.update({"fetched_at": now, "data": events, "url": ics_url, "valid": True})
236
+ logger.info("Fetched iCal: %d event(s) for %s", len(events), today)
237
+ return list(events)
238
+ except Exception as e:
239
+ logger.warning(
240
+ "iCal fetch failed (%s: %s); using last-good cache (%d entries)",
241
+ type(e).__name__, e, len(_cache.get("data", [])),
242
+ )
243
+ return list(_cache.get("data", []))
244
+
245
+
246
+ def clear_cache() -> None:
247
+ """Clear the iCal cache (test hook + manual refresh helper)."""
248
+ _cache.update({"fetched_at": 0.0, "data": [], "url": None, "valid": False})
src/reachy_mini_receptionist/images/reachymini_avatar.png ADDED

Git LFS Details

  • SHA256: 5a63ac8802ff3542f01292c431c5278296880d74cd3580d219fcf4827bc235f9
  • Pointer size: 132 Bytes
  • Size of remote file: 1.23 MB
src/reachy_mini_receptionist/images/user_avatar.png ADDED

Git LFS Details

  • SHA256: e97ca125a86bacdaa41c8dca88abd9ca746fd5c9391eda24249c012432b0219b
  • Pointer size: 132 Bytes
  • Size of remote file: 1.11 MB
src/reachy_mini_receptionist/main.py ADDED
@@ -0,0 +1,1199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Entrypoint for the Reachy Mini Receptionist app.
2
+
3
+ Changes from the base realtime app template:
4
+ - FaceDatabase and FaceRecognitionWorker are initialised here and injected into
5
+ ToolDependencies (face_worker + face_db fields).
6
+ - A /video_feed MJPEG endpoint and /api/* JSON endpoints are mounted on the
7
+ FastAPI settings_app so the dashboard can show the live annotated camera feed,
8
+ guest list, calendar, and notifications.
9
+ - The camera_worker slot is left intact for future head-tracking integration.
10
+ """
11
+
12
+ import os
13
+ import sys
14
+ import time
15
+ import asyncio
16
+ import argparse
17
+ import threading
18
+ from pathlib import Path
19
+ from typing import Any, Dict, List, Optional
20
+
21
+ import gradio as gr
22
+ from fastapi import FastAPI
23
+ from fastapi.responses import StreamingResponse, JSONResponse
24
+ from fastapi.staticfiles import StaticFiles
25
+ from fastrtc import Stream
26
+ from gradio.utils import get_space
27
+
28
+ from reachy_mini import ReachyMini, ReachyMiniApp
29
+ from reachy_mini_receptionist.utils import (
30
+ parse_args,
31
+ setup_logger,
32
+ handle_vision_stuff,
33
+ log_connection_troubleshooting,
34
+ )
35
+
36
+
37
+ def _mount_dashboard_api(
38
+ app: Any,
39
+ face_worker: Any,
40
+ face_db: Any,
41
+ realtime_handler: Any | None = None,
42
+ session_manager: Any | None = None,
43
+ visitor_log: Any | None = None,
44
+ employee_store: Any | None = None,
45
+ instance_path: Optional[str] = None,
46
+ ) -> None:
47
+ """Mount receptionist dashboard API endpoints on a FastAPI app.
48
+
49
+ Endpoints:
50
+ GET /dashboard → serves the dashboard HTML page
51
+ GET /video_feed → MJPEG stream of annotated camera frames
52
+ GET /api/guests → JSON list of registered guests
53
+ GET /api/calendar → JSON today's appointments
54
+ GET /api/outbox → JSON email outbox log
55
+ GET /api/face_status → JSON current face detection state
56
+ GET /api/face_event_last_sent → JSON last external face event sent to model
57
+ GET /api/logs → JSON recent face recognition debug logs
58
+ GET /api/session → JSON current visitor session state (state machine)
59
+ GET /api/session_event_last_sent → JSON last session context event sent to model
60
+ """
61
+ if app is None or not hasattr(app, "get"):
62
+ return # No FastAPI app available (e.g. LocalStream without settings_app)
63
+
64
+ # Body needs to be in scope for every POST/PATCH endpoint with a JSON
65
+ # body below. Imported once here at the top of the function so the
66
+ # order endpoints are defined in doesn't matter.
67
+ from fastapi import Body # noqa: F401 — used by endpoints below
68
+
69
+ from reachy_mini_receptionist.calendar_data import get_appointments
70
+ from reachy_mini_receptionist.tools.send_email import get_outbox
71
+
72
+ static_dir = Path(__file__).parent / "static"
73
+ dashboard_html = static_dir / "dashboard.html"
74
+
75
+ # Serve guest face thumbnails as static files
76
+ guests_dir = face_db.guests_dir
77
+ app.mount(
78
+ "/guest_images",
79
+ StaticFiles(directory=str(guests_dir)),
80
+ name="guest_images",
81
+ )
82
+
83
+ # Serve dashboard page
84
+ if dashboard_html.exists():
85
+ from fastapi.responses import FileResponse
86
+
87
+ @app.get("/dashboard")
88
+ def _dashboard_page():
89
+ return FileResponse(str(dashboard_html))
90
+
91
+ # MJPEG video feed
92
+ def _mjpeg_generator():
93
+ boundary = b"--frame"
94
+ while True:
95
+ jpeg = face_worker.latest_annotated_jpeg
96
+ if jpeg:
97
+ yield (
98
+ boundary + b"\r\nContent-Type: image/jpeg\r\n\r\n"
99
+ + jpeg + b"\r\n"
100
+ )
101
+ time.sleep(0.05) # ~20 fps max
102
+
103
+ @app.get("/video_feed")
104
+ def _video_feed():
105
+ return StreamingResponse(
106
+ _mjpeg_generator(),
107
+ media_type="multipart/x-mixed-replace; boundary=frame",
108
+ )
109
+
110
+ @app.get("/api/guests")
111
+ def _api_guests():
112
+ return JSONResponse(face_db.get_all_guests())
113
+
114
+ @app.delete("/api/guests")
115
+ def _api_delete_guest(name: str):
116
+ removed = face_db.delete_guest(name)
117
+ if not removed:
118
+ return JSONResponse(
119
+ {"ok": False, "error": "Guest not found", "name": name},
120
+ status_code=404,
121
+ )
122
+ return JSONResponse({"ok": True, "name": name})
123
+
124
+ @app.get("/api/calendar")
125
+ def _api_calendar():
126
+ return JSONResponse(get_appointments())
127
+
128
+ @app.get("/api/outbox")
129
+ def _api_outbox():
130
+ return JSONResponse(get_outbox())
131
+
132
+ @app.get("/api/face_status")
133
+ def _api_face_status():
134
+ return JSONResponse({
135
+ "name": face_worker.current_name,
136
+ "confidence": round(face_worker.confidence, 2),
137
+ "is_known": face_worker.current_name not in ("Unknown", "No face"),
138
+ })
139
+
140
+ @app.get("/api/face_event_last_sent")
141
+ def _api_face_event_last_sent():
142
+ if realtime_handler is None or not hasattr(realtime_handler, "get_last_face_event_sent"):
143
+ return JSONResponse({"sent": False})
144
+
145
+ event = realtime_handler.get_last_face_event_sent()
146
+ if event is None:
147
+ return JSONResponse({"sent": False})
148
+
149
+ return JSONResponse({"sent": True, **event})
150
+
151
+ @app.get("/api/logs")
152
+ def _api_logs():
153
+ return JSONResponse({"logs": face_worker.get_recent_logs(100)})
154
+
155
+ @app.get("/api/config")
156
+ def _api_config():
157
+ from reachy_mini_receptionist.config import config
158
+ return JSONResponse({"model": config.MODEL_NAME})
159
+
160
+ @app.get("/api/best_face_jpeg")
161
+ def _api_best_face_jpeg():
162
+ # Return the best face crop from the last 5 seconds as JPEG.
163
+ # When no face is available we return 204 (No Content) instead of
164
+ # 404 so the dashboard doesn't pollute the browser's network tab
165
+ # with red error rows every 500 ms while the room is empty.
166
+ import cv2
167
+ from fastapi.responses import Response
168
+
169
+ # Dashboard preview should show best currently available face immediately
170
+ # and not wait for dwell-based stabilization.
171
+ result = face_worker.best_recent_face(window_seconds=5.0, require_dwell=False)
172
+ name, conf, crop = result
173
+
174
+ if crop is None:
175
+ return Response(status_code=204)
176
+
177
+ # Encode 100x100 grayscale crop as JPEG (upsample to 200x200 for readability)
178
+ display = cv2.resize(crop, (200, 200), interpolation=cv2.INTER_NEAREST)
179
+ _, jpeg_buf = cv2.imencode(".jpg", display, [cv2.IMWRITE_JPEG_QUALITY, 85])
180
+ jpeg_bytes = jpeg_buf.tobytes()
181
+
182
+ # Find face area of the best entry for the header
183
+ face_area = 0
184
+ with face_worker._lock:
185
+ if face_worker._detection_window:
186
+ best_entry = max(face_worker._detection_window, key=lambda e: e[1])
187
+ face_area = best_entry[1]
188
+
189
+ return Response(
190
+ content=jpeg_bytes,
191
+ media_type="image/jpeg",
192
+ headers={
193
+ "X-Face-Name": name,
194
+ "X-Face-Confidence": str(round(conf, 2)),
195
+ "X-Face-Area": str(face_area),
196
+ "Cache-Control": "no-cache, no-store",
197
+ },
198
+ )
199
+
200
+ @app.get("/api/session")
201
+ def _api_session():
202
+ if session_manager is None:
203
+ return JSONResponse({"available": False})
204
+ snap = session_manager.session
205
+ payload = snap.to_dict() if hasattr(snap, "to_dict") else {}
206
+ return JSONResponse({"available": True, **payload})
207
+
208
+ @app.get("/api/session_event_last_sent")
209
+ def _api_session_event_last_sent():
210
+ if realtime_handler is None or not hasattr(realtime_handler, "get_last_session_event_sent"):
211
+ return JSONResponse({"sent": False})
212
+ event = realtime_handler.get_last_session_event_sent()
213
+ if event is None:
214
+ return JSONResponse({"sent": False})
215
+ return JSONResponse({"sent": True, **event})
216
+
217
+ @app.get("/api/visitor_log")
218
+ def _api_visitor_log(limit: int = 100):
219
+ if visitor_log is None:
220
+ return JSONResponse({"available": False, "today_count": 0, "visits": []})
221
+ return JSONResponse({
222
+ "available": True,
223
+ "today_count": visitor_log.count_today(),
224
+ "visits": visitor_log.list_visits(limit=limit),
225
+ })
226
+
227
+ @app.get("/api/visitor_log.csv")
228
+ def _api_visitor_log_csv():
229
+ """Download the full visitor log as CSV (for HR / facilities handoff)."""
230
+ from fastapi.responses import Response
231
+ import csv
232
+ import io
233
+
234
+ if visitor_log is None:
235
+ return Response(content="", media_type="text/csv")
236
+
237
+ rows = visitor_log.list_visits(limit=1000)
238
+ buf = io.StringIO()
239
+ fieldnames = [
240
+ "id", "started_at", "ended_at",
241
+ "visitor_name", "recognized_face_name", "employee_name",
242
+ "matched_appointment_time", "matched_appointment_note",
243
+ "email_sent_to", "final_state", "error_message",
244
+ ]
245
+ writer = csv.DictWriter(buf, fieldnames=fieldnames, extrasaction="ignore")
246
+ writer.writeheader()
247
+ for r in rows:
248
+ writer.writerow(r)
249
+ filename = f"visitor_log_{time.strftime('%Y-%m-%d')}.csv"
250
+ return Response(
251
+ content=buf.getvalue(),
252
+ media_type="text/csv",
253
+ headers={"Content-Disposition": f'attachment; filename="{filename}"'},
254
+ )
255
+
256
+ @app.delete("/api/visitor_log")
257
+ def _api_visitor_log_wipe():
258
+ """Wipe every row of the visitor log. Scoped destructive action
259
+ used by the dashboard's panel-level Clear button. Does not touch
260
+ employees, settings, face DB, or session state."""
261
+ if visitor_log is None:
262
+ return JSONResponse({"ok": False, "error": "visitor_log unavailable"}, status_code=503)
263
+ try:
264
+ removed = visitor_log.wipe_all()
265
+ return JSONResponse({"ok": True, "removed": removed})
266
+ except Exception as e:
267
+ return JSONResponse(
268
+ {"ok": False, "error": f"{type(e).__name__}: {e}"}, status_code=500,
269
+ )
270
+
271
+ @app.get("/api/stats")
272
+ def _api_stats():
273
+ """Consolidated stats strip data — visits, emails, last visit, state."""
274
+ visits_today = visitor_log.count_today() if visitor_log is not None else 0
275
+ emails_today = visitor_log.count_emails_delivered_today() if visitor_log is not None else 0
276
+ last = visitor_log.last_visit() if visitor_log is not None else None
277
+ current_state = None
278
+ current_visitor = None
279
+ if session_manager is not None:
280
+ snap = session_manager.session
281
+ current_state = snap.current_state.value
282
+ current_visitor = snap.visitor_name
283
+ return JSONResponse({
284
+ "visits_today": visits_today,
285
+ "emails_delivered_today": emails_today,
286
+ "last_visit": last,
287
+ "current_state": current_state,
288
+ "current_visitor": current_visitor,
289
+ })
290
+
291
+ @app.post("/api/session/reset")
292
+ def _api_session_reset():
293
+ """Manual override: drop the current visitor session back to IDLE.
294
+
295
+ Useful when the bot gets stuck (e.g. spurious face match keeps the
296
+ state from going back to idle). Mirrors the auto-timeout reset.
297
+ """
298
+ if session_manager is None:
299
+ return JSONResponse({"ok": False, "error": "Session manager not available"}, status_code=503)
300
+ snap = session_manager.reset()
301
+ return JSONResponse({"ok": True, "current_state": snap.current_state.value})
302
+
303
+ @app.post("/api/guests/manual_register")
304
+ def _api_guests_manual_register(payload: dict = Body(...)):
305
+ """Operator-initiated visitor registration that bypasses voice.
306
+
307
+ When the bot can't transcribe a visitor's name (common for short
308
+ non-English names that Whisper/gpt-realtime mangle), the operator
309
+ types it on the dashboard. The current camera face crop is saved
310
+ under that name AND the active session is flipped to RECOGNIZED so
311
+ downstream tools (lookup_employee / send_email) can proceed as if
312
+ register_guest had succeeded via voice.
313
+ """
314
+ name = (payload.get("name") or "").strip()
315
+ if not name:
316
+ return JSONResponse(
317
+ {"ok": False, "error": "name is required"}, status_code=400,
318
+ )
319
+
320
+ worker = face_worker
321
+ if worker is None:
322
+ return JSONResponse(
323
+ {"ok": False, "error": "Face worker not available"},
324
+ status_code=503,
325
+ )
326
+ face_crop = getattr(worker, "current_encoding", None)
327
+ if face_crop is None:
328
+ return JSONResponse(
329
+ {
330
+ "ok": False,
331
+ "error": (
332
+ "No face currently detected by the camera. Ask the "
333
+ "visitor to look directly at the lens and try again."
334
+ ),
335
+ },
336
+ status_code=409,
337
+ )
338
+ if face_db is None:
339
+ return JSONResponse(
340
+ {"ok": False, "error": "Face DB not available"},
341
+ status_code=503,
342
+ )
343
+
344
+ try:
345
+ face_db.add_or_update_guest(name, face_crop)
346
+ try:
347
+ worker.rebuild_recognizer()
348
+ except Exception:
349
+ pass
350
+ except Exception as e:
351
+ return JSONResponse(
352
+ {"ok": False, "error": f"{type(e).__name__}: {e}"},
353
+ status_code=500,
354
+ )
355
+
356
+ # Flip the active session so the LLM sees this as a confirmed
357
+ # registration and can resume the flow (calendar match / lookup /
358
+ # send_email) without re-asking for the name.
359
+ if session_manager is not None:
360
+ try:
361
+ from reachy_mini_receptionist.receptionist_state import (
362
+ ReceptionState as _RS,
363
+ )
364
+ session_manager.transition(
365
+ _RS.RECOGNIZED,
366
+ visitor_name=name,
367
+ recognized_face_name=name,
368
+ )
369
+ except Exception as e:
370
+ print(f"[manual_register] session transition failed: {e}")
371
+ return JSONResponse({
372
+ "ok": True,
373
+ "name": name,
374
+ "total_guests": face_db.count(),
375
+ })
376
+
377
+ @app.post("/api/demo/reset")
378
+ def _api_demo_reset():
379
+ """Wipe everything that accumulates during testing so the next
380
+ demo runs from a clean slate. **Preserves** the employee
381
+ directory, calendar, and .env settings — operators don't want
382
+ to re-enter Mukul/Priya/etc. before every demo.
383
+
384
+ Wipes:
385
+ - face DB (all guests/*.png)
386
+ - visitor log (every visit row)
387
+ - email outbox (in-memory)
388
+ - active session (forces IDLE)
389
+
390
+ Triggers a face-recognizer rebuild so the worker sees the empty
391
+ DB on its next pass.
392
+ """
393
+ from reachy_mini_receptionist.tools.send_email import clear_outbox
394
+ results = {
395
+ "guests_removed": 0,
396
+ "visits_removed": 0,
397
+ "outbox_removed": 0,
398
+ "session_reset": False,
399
+ "errors": [],
400
+ }
401
+ # Face DB
402
+ try:
403
+ if face_db is not None:
404
+ before = face_db.count()
405
+ face_db.clear()
406
+ results["guests_removed"] = before
407
+ except Exception as e:
408
+ results["errors"].append(f"face_db: {type(e).__name__}: {e}")
409
+ # Rebuild recognizer so the worker drops the wiped faces immediately
410
+ try:
411
+ if face_worker is not None and hasattr(face_worker, "rebuild_recognizer"):
412
+ face_worker.rebuild_recognizer()
413
+ except Exception as e:
414
+ results["errors"].append(f"face_worker: {type(e).__name__}: {e}")
415
+ # Visitor log
416
+ try:
417
+ if visitor_log is not None:
418
+ results["visits_removed"] = visitor_log.wipe_all()
419
+ except Exception as e:
420
+ results["errors"].append(f"visitor_log: {type(e).__name__}: {e}")
421
+ # Outbox
422
+ try:
423
+ results["outbox_removed"] = clear_outbox()
424
+ except Exception as e:
425
+ results["errors"].append(f"outbox: {type(e).__name__}: {e}")
426
+ # Session
427
+ try:
428
+ if session_manager is not None:
429
+ session_manager.reset()
430
+ results["session_reset"] = True
431
+ except Exception as e:
432
+ results["errors"].append(f"session: {type(e).__name__}: {e}")
433
+ results["ok"] = not results["errors"]
434
+ return JSONResponse(results)
435
+
436
+ # ------------------------------------------------------------------
437
+ # Employee CRUD — backs the Employees panel on the dashboard.
438
+ # (Body is imported at the top of _mount_dashboard_api.)
439
+ # ------------------------------------------------------------------
440
+
441
+ def _employee_store_or_503():
442
+ if employee_store is None:
443
+ return JSONResponse(
444
+ {"ok": False, "error": "Employee store not available"}, status_code=503,
445
+ )
446
+ return None
447
+
448
+ @app.get("/api/employees")
449
+ def _api_employees_list():
450
+ guard = _employee_store_or_503()
451
+ if guard is not None:
452
+ return guard
453
+ return JSONResponse({"employees": employee_store.list_all()})
454
+
455
+ @app.post("/api/employees")
456
+ def _api_employees_create(payload: dict = Body(...)):
457
+ guard = _employee_store_or_503()
458
+ if guard is not None:
459
+ return guard
460
+ try:
461
+ from reachy_mini_receptionist.employees_store import EmployeeExistsError
462
+ emp = employee_store.create(
463
+ name=payload.get("name", ""),
464
+ email=payload.get("email", ""),
465
+ aliases=payload.get("aliases") or [],
466
+ title=payload.get("title"),
467
+ )
468
+ return JSONResponse({"ok": True, "employee": emp})
469
+ except EmployeeExistsError as e:
470
+ return JSONResponse(
471
+ {"ok": False, "error": str(e)}, status_code=409,
472
+ )
473
+ except ValueError as e:
474
+ return JSONResponse(
475
+ {"ok": False, "error": str(e)}, status_code=400,
476
+ )
477
+ except Exception as e:
478
+ return JSONResponse(
479
+ {"ok": False, "error": f"{type(e).__name__}: {e}"}, status_code=500,
480
+ )
481
+
482
+ @app.patch("/api/employees/{employee_id}")
483
+ def _api_employees_update(employee_id: int, payload: dict = Body(...)):
484
+ guard = _employee_store_or_503()
485
+ if guard is not None:
486
+ return guard
487
+ try:
488
+ from reachy_mini_receptionist.employees_store import EmployeeExistsError
489
+ emp = employee_store.update(
490
+ employee_id,
491
+ name=payload.get("name"),
492
+ email=payload.get("email"),
493
+ aliases=payload.get("aliases"),
494
+ title=payload.get("title"),
495
+ )
496
+ if emp is None:
497
+ return JSONResponse(
498
+ {"ok": False, "error": "Employee not found"}, status_code=404,
499
+ )
500
+ return JSONResponse({"ok": True, "employee": emp})
501
+ except EmployeeExistsError as e:
502
+ return JSONResponse(
503
+ {"ok": False, "error": str(e)}, status_code=409,
504
+ )
505
+ except ValueError as e:
506
+ return JSONResponse(
507
+ {"ok": False, "error": str(e)}, status_code=400,
508
+ )
509
+ except Exception as e:
510
+ return JSONResponse(
511
+ {"ok": False, "error": f"{type(e).__name__}: {e}"}, status_code=500,
512
+ )
513
+
514
+ @app.delete("/api/employees/{employee_id}")
515
+ def _api_employees_delete(employee_id: int):
516
+ guard = _employee_store_or_503()
517
+ if guard is not None:
518
+ return guard
519
+ removed = employee_store.delete(employee_id)
520
+ if not removed:
521
+ return JSONResponse(
522
+ {"ok": False, "error": "Employee not found"}, status_code=404,
523
+ )
524
+ return JSONResponse({"ok": True})
525
+
526
+ # ------------------------------------------------------------------
527
+ # Diagnostics — surfaces the kind of failures that turned today into
528
+ # a 30-min debugging session (audio at 7%, daemon asleep, OpenAI
529
+ # latency, etc) BEFORE the demo, not during.
530
+ # ------------------------------------------------------------------
531
+ import socket
532
+ import subprocess
533
+
534
+ def _check_tcp(host: str, port: int, timeout: float = 2.0) -> dict:
535
+ start = time.monotonic()
536
+ try:
537
+ with socket.create_connection((host, port), timeout=timeout):
538
+ return {
539
+ "ok": True, "latency_ms": int((time.monotonic() - start) * 1000),
540
+ }
541
+ except Exception as e:
542
+ return {"ok": False, "error": f"{type(e).__name__}: {e}"}
543
+
544
+ def _check_daemon() -> dict:
545
+ try:
546
+ import httpx
547
+ t0 = time.monotonic()
548
+ r = httpx.get("http://localhost:8000/api/daemon/status", timeout=2.0)
549
+ latency = int((time.monotonic() - t0) * 1000)
550
+ if r.status_code != 200:
551
+ return {"ok": False, "latency_ms": latency, "error": f"HTTP {r.status_code}"}
552
+ body = r.json()
553
+ return {
554
+ "ok": body.get("state") == "started",
555
+ "latency_ms": latency,
556
+ "state": body.get("state"),
557
+ }
558
+ except Exception as e:
559
+ return {"ok": False, "error": f"{type(e).__name__}: {e}"}
560
+
561
+ def _check_wifi() -> dict:
562
+ try:
563
+ out = subprocess.check_output(
564
+ ["iwconfig", "wlan0"],
565
+ stderr=subprocess.STDOUT, timeout=2.0,
566
+ ).decode("utf-8", errors="replace")
567
+ except Exception as e:
568
+ return {"ok": False, "error": f"{type(e).__name__}: {e}"}
569
+ info: dict = {"raw": out.strip()}
570
+ import re
571
+ m = re.search(r"Link Quality=(\d+)/(\d+)", out)
572
+ if m:
573
+ info["link_quality"] = int(m.group(1))
574
+ info["link_quality_max"] = int(m.group(2))
575
+ info["link_quality_pct"] = round(int(m.group(1)) / int(m.group(2)) * 100, 1)
576
+ m = re.search(r"Signal level=(-?\d+)\s*dBm", out)
577
+ if m:
578
+ info["signal_dbm"] = int(m.group(1))
579
+ m = re.search(r"ESSID:\"([^\"]+)\"", out)
580
+ if m:
581
+ info["essid"] = m.group(1)
582
+ info["ok"] = info.get("link_quality_pct", 0) >= 50 if "link_quality_pct" in info else None
583
+ return info
584
+
585
+ def _check_audio() -> dict:
586
+ try:
587
+ out = subprocess.check_output(
588
+ ["pactl", "list", "short", "sinks"],
589
+ stderr=subprocess.STDOUT, timeout=2.0,
590
+ ).decode("utf-8", errors="replace")
591
+ sinks = [line.split("\t")[1] if "\t" in line else line for line in out.splitlines() if line.strip()]
592
+ return {"ok": len(sinks) > 0, "sinks": sinks}
593
+ except Exception as e:
594
+ return {"ok": False, "error": f"{type(e).__name__}: {e}"}
595
+
596
+ @app.get("/api/diagnostics/health")
597
+ def _api_diagnostics_health():
598
+ from reachy_mini_receptionist.config import config as _cfg
599
+ results = {
600
+ "openai_realtime": _check_tcp("api.openai.com", 443),
601
+ "resend": _check_tcp("api.resend.com", 443),
602
+ "daemon": _check_daemon(),
603
+ "wifi": _check_wifi(),
604
+ "audio": _check_audio(),
605
+ "config": {
606
+ "openai_key_set": bool(_cfg.OPENAI_API_KEY),
607
+ "resend_key_set": bool(os.getenv("RESEND_API_KEY")),
608
+ "resend_from": os.getenv("RESEND_FROM", "onboarding@resend.dev"),
609
+ "ical_url_set": bool(os.getenv("RECEPTION_ICS_URL")),
610
+ "model": _cfg.MODEL_NAME,
611
+ },
612
+ }
613
+ # Overall OK iff every "ok" we have is truthy (None counted as unknown -> ok)
614
+ overall = all(
615
+ (v.get("ok") in (True, None)) if isinstance(v, dict) else True
616
+ for k, v in results.items() if k != "config"
617
+ )
618
+ return JSONResponse({"ok": overall, "checks": results})
619
+
620
+ # ------------------------------------------------------------------
621
+ # Volume control — wraps pactl. Operators can change speaker volume
622
+ # without leaving the dashboard (previously required the Reachy Mini
623
+ # Control panel at :8000 or SSH'ing in).
624
+ # ------------------------------------------------------------------
625
+
626
+ # Reachy Mini uses ALSA directly (no PulseAudio), so we drive volume
627
+ # via `amixer`. The control name varies by device: Master is the
628
+ # standard ALSA name; PCM and Speaker are fallbacks on some Pi audio
629
+ # HATs; reachymini_audio_sink is the daemon's sink name on this image.
630
+ _AMIXER_CANDIDATES = (
631
+ "Master", "PCM", "Speaker", "Headphone", "reachymini_audio_sink",
632
+ )
633
+
634
+ def _amixer_active_control() -> Optional[str]:
635
+ """Return the first amixer control that exists on this device."""
636
+ try:
637
+ out = subprocess.check_output(
638
+ ["amixer", "scontrols"],
639
+ stderr=subprocess.STDOUT, timeout=2.0,
640
+ ).decode("utf-8", errors="replace")
641
+ except Exception:
642
+ return None
643
+ import re
644
+ names = re.findall(r"Simple mixer control '([^']+)'", out)
645
+ for cand in _AMIXER_CANDIDATES:
646
+ if cand in names:
647
+ return cand
648
+ return names[0] if names else None
649
+
650
+ def _audio_get_volume() -> dict:
651
+ ctrl = _amixer_active_control()
652
+ if ctrl is None:
653
+ return {"ok": False, "error": "no amixer control found"}
654
+ try:
655
+ out = subprocess.check_output(
656
+ ["amixer", "sget", ctrl],
657
+ stderr=subprocess.STDOUT, timeout=2.0,
658
+ ).decode("utf-8", errors="replace")
659
+ except Exception as e:
660
+ return {"ok": False, "error": f"{type(e).__name__}: {e}"}
661
+ import re
662
+ m = re.search(r"\[(\d+)%\]", out)
663
+ percent = int(m.group(1)) if m else None
664
+ muted = "[off]" in out.lower()
665
+ return {"ok": True, "control": ctrl, "percent": percent, "muted": muted}
666
+
667
+ @app.get("/api/audio/volume")
668
+ def _api_audio_volume_get():
669
+ return JSONResponse(_audio_get_volume())
670
+
671
+ @app.post("/api/audio/volume")
672
+ def _api_audio_volume_set(payload: dict = Body(...)):
673
+ target = payload.get("percent")
674
+ if target is None:
675
+ return JSONResponse(
676
+ {"ok": False, "error": "percent (0-150) is required"},
677
+ status_code=400,
678
+ )
679
+ try:
680
+ pct = int(target)
681
+ except Exception:
682
+ return JSONResponse(
683
+ {"ok": False, "error": "percent must be an integer"},
684
+ status_code=400,
685
+ )
686
+ pct = max(0, min(150, pct))
687
+ ctrl = _amixer_active_control()
688
+ if ctrl is None:
689
+ return JSONResponse(
690
+ {"ok": False, "error": "no amixer control found"},
691
+ status_code=500,
692
+ )
693
+ try:
694
+ subprocess.check_output(
695
+ ["amixer", "sset", ctrl, "unmute"],
696
+ stderr=subprocess.STDOUT, timeout=2.0,
697
+ )
698
+ subprocess.check_output(
699
+ ["amixer", "sset", ctrl, f"{pct}%"],
700
+ stderr=subprocess.STDOUT, timeout=2.0,
701
+ )
702
+ except Exception as e:
703
+ return JSONResponse(
704
+ {"ok": False, "error": f"{type(e).__name__}: {e}"},
705
+ status_code=500,
706
+ )
707
+ return JSONResponse({"ok": True, **_audio_get_volume()})
708
+
709
+ @app.post("/api/diagnostics/speaker_test")
710
+ def _api_diagnostics_speaker_test():
711
+ try:
712
+ subprocess.Popen(
713
+ ["speaker-test", "-c", "1", "-t", "sine", "-f", "440", "-l", "1"],
714
+ stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
715
+ )
716
+ return JSONResponse({"ok": True, "message": "Playing 1-second 440Hz tone"})
717
+ except FileNotFoundError:
718
+ return JSONResponse(
719
+ {"ok": False, "error": "speaker-test binary not found"}, status_code=500,
720
+ )
721
+ except Exception as e:
722
+ return JSONResponse(
723
+ {"ok": False, "error": f"{type(e).__name__}: {e}"}, status_code=500,
724
+ )
725
+
726
+ # ------------------------------------------------------------------
727
+ # Settings — read/write a subset of .env keys via the dashboard so
728
+ # operators don't have to SSH and `nano .env` to set an API key.
729
+ # Sensitive values are masked on GET; full values written on PATCH.
730
+ # Changes that require app restart are flagged in the response.
731
+ # ------------------------------------------------------------------
732
+ _SETTINGS_KEYS = {
733
+ "VOICE_BACKEND": {"secret": False, "restart": True},
734
+ "GEMINI_LIVE_MODEL": {"secret": False, "restart": True},
735
+ "GEMINI_LIVE_VOICE": {"secret": False, "restart": True},
736
+ "OPENAI_API_KEY": {"secret": True, "restart": True},
737
+ "GEMINI_API_KEY": {"secret": True, "restart": True},
738
+ # GEMINI_MODEL removed 2026-05-21 — the name normalizer it
739
+ # configured is short-circuited in name_normalizer.py, so this
740
+ # key is unused. Re-add if/when the normalizer is reinstated.
741
+ "STT_MODEL": {"secret": False, "restart": True},
742
+ "STT_DISABLE_BIAS": {"secret": False, "restart": True},
743
+ "RESEND_API_KEY": {"secret": True, "restart": False},
744
+ "RESEND_FROM": {"secret": False, "restart": False},
745
+ "RECEPTION_ICS_URL": {"secret": False, "restart": False},
746
+ "FACE_TTL_DAYS": {"secret": False, "restart": True},
747
+ "VISITOR_LOG_RETENTION_DAYS": {"secret": False, "restart": True},
748
+ "FACE_LBPH_THRESHOLD": {"secret": False, "restart": True},
749
+ "MODEL_NAME": {"secret": False, "restart": True},
750
+ }
751
+
752
+ def _find_env_path() -> Optional[Path]:
753
+ try:
754
+ from dotenv import find_dotenv
755
+ p = find_dotenv(usecwd=True)
756
+ if p:
757
+ return Path(p)
758
+ except Exception:
759
+ pass
760
+ candidates = []
761
+ if instance_path:
762
+ candidates.append(Path(instance_path) / ".env")
763
+ candidates.append(Path.cwd() / ".env")
764
+ for c in candidates:
765
+ if c.exists():
766
+ return c
767
+ return candidates[0] if candidates else None
768
+
769
+ def _mask(value: str) -> str:
770
+ if not value:
771
+ return ""
772
+ if len(value) <= 6:
773
+ return "•" * len(value)
774
+ return value[:3] + "•" * max(0, len(value) - 7) + value[-4:]
775
+
776
+ @app.get("/api/settings")
777
+ def _api_settings_get():
778
+ env_path = _find_env_path()
779
+ settings = []
780
+ for key, meta in _SETTINGS_KEYS.items():
781
+ raw = os.getenv(key) or ""
782
+ display = _mask(raw) if meta["secret"] and raw else raw
783
+ settings.append({
784
+ "key": key,
785
+ "value": display,
786
+ "is_set": bool(raw),
787
+ "is_secret": meta["secret"],
788
+ "requires_restart": meta["restart"],
789
+ })
790
+ return JSONResponse({
791
+ "env_path": str(env_path) if env_path else None,
792
+ "settings": settings,
793
+ })
794
+
795
+ @app.patch("/api/settings")
796
+ def _api_settings_patch(payload: dict = Body(...)):
797
+ env_path = _find_env_path()
798
+ if env_path is None:
799
+ return JSONResponse(
800
+ {"ok": False, "error": "Could not locate .env file"}, status_code=500,
801
+ )
802
+ env_path.parent.mkdir(parents=True, exist_ok=True)
803
+ # Read existing .env (if any), preserving comments + ordering.
804
+ lines: List[str] = []
805
+ if env_path.exists():
806
+ lines = env_path.read_text(encoding="utf-8").splitlines()
807
+ updates = {
808
+ k: str(v) for k, v in (payload or {}).items()
809
+ if k in _SETTINGS_KEYS and v is not None
810
+ }
811
+ if not updates:
812
+ return JSONResponse(
813
+ {"ok": False, "error": "No valid keys to update"}, status_code=400,
814
+ )
815
+ # Rewrite — replace existing keys in place, append new ones.
816
+ seen: set[str] = set()
817
+ for i, line in enumerate(lines):
818
+ stripped = line.lstrip()
819
+ if not stripped or stripped.startswith("#"):
820
+ continue
821
+ if "=" not in stripped:
822
+ continue
823
+ key = stripped.split("=", 1)[0].strip()
824
+ if key in updates:
825
+ lines[i] = f"{key}={updates[key]}"
826
+ seen.add(key)
827
+ for key, val in updates.items():
828
+ if key not in seen:
829
+ lines.append(f"{key}={val}")
830
+ env_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
831
+ # Apply to current process where it's safe to do so without restart.
832
+ restart_required = False
833
+ for key, val in updates.items():
834
+ os.environ[key] = val
835
+ if _SETTINGS_KEYS[key]["restart"]:
836
+ restart_required = True
837
+ return JSONResponse({
838
+ "ok": True,
839
+ "updated": list(updates.keys()),
840
+ "restart_required": restart_required,
841
+ "env_path": str(env_path),
842
+ })
843
+
844
+
845
+ def update_chatbot(chatbot: List[Dict[str, Any]], response: Dict[str, Any]) -> List[Dict[str, Any]]:
846
+ """Update the chatbot with AdditionalOutputs."""
847
+ chatbot.append(response)
848
+ return chatbot
849
+
850
+
851
+ def main() -> None:
852
+ """Entrypoint for the Reachy Mini receptionist app."""
853
+ args, _ = parse_args()
854
+ run(args)
855
+
856
+
857
+ def run(
858
+ args: argparse.Namespace,
859
+ robot: ReachyMini = None,
860
+ app_stop_event: Optional[threading.Event] = None,
861
+ settings_app: Optional[FastAPI] = None,
862
+ instance_path: Optional[str] = None,
863
+ ) -> None:
864
+ """Run the Reachy Mini receptionist app."""
865
+ # Importing runtime dependencies lazily keeps startup flexible across install contexts.
866
+ from reachy_mini_receptionist.moves import MovementManager
867
+ from reachy_mini_receptionist.console import LocalStream
868
+ from reachy_mini_receptionist.openai_realtime import OpenaiRealtimeHandler
869
+ from reachy_mini_receptionist.tools.core_tools import ToolDependencies
870
+ from reachy_mini_receptionist.audio.head_wobbler import HeadWobbler
871
+
872
+ # Backend switch: VOICE_BACKEND=gemini (default) uses Google Gemini
873
+ # Live, "openai" uses OpenAI Realtime. Default flipped from openai to
874
+ # gemini on 2026-05-21 because (a) Gemini Live hears Indian names far
875
+ # more accurately than OpenAI's gpt-4o-transcribe, (b) operators on a
876
+ # free Gemini API key get more headroom than on free OpenAI credits.
877
+ # Imported lazily so a missing SDK doesn't break the alternate backend.
878
+ _voice_backend = (os.getenv("VOICE_BACKEND") or "gemini").strip().lower()
879
+
880
+ logger = setup_logger(args.debug)
881
+ logger.info("Starting Reachy Mini Receptionist App")
882
+
883
+ if args.no_camera and args.head_tracker is not None:
884
+ logger.warning(
885
+ "Head tracking disabled: --no-camera flag is set. "
886
+ "Remove --no-camera to enable head tracking."
887
+ )
888
+
889
+ if robot is None:
890
+ try:
891
+ robot_kwargs = {}
892
+ if args.robot_name is not None:
893
+ robot_kwargs["robot_name"] = args.robot_name
894
+
895
+ logger.info("Initializing ReachyMini (SDK will auto-detect appropriate backend)")
896
+ robot = ReachyMini(**robot_kwargs)
897
+
898
+ except TimeoutError as e:
899
+ logger.error(
900
+ "Connection timeout: Failed to connect to Reachy Mini daemon. "
901
+ f"Details: {e}"
902
+ )
903
+ log_connection_troubleshooting(logger, args.robot_name)
904
+ sys.exit(1)
905
+
906
+ except ConnectionError as e:
907
+ logger.error(
908
+ "Connection failed: Unable to establish connection to Reachy Mini. "
909
+ f"Details: {e}"
910
+ )
911
+ log_connection_troubleshooting(logger, args.robot_name)
912
+ sys.exit(1)
913
+
914
+ except Exception as e:
915
+ logger.error(
916
+ f"Unexpected error during robot initialization: {type(e).__name__}: {e}"
917
+ )
918
+ logger.error("Please check your configuration and try again.")
919
+ sys.exit(1)
920
+
921
+ # Auto-enable Gradio in simulation mode (both MuJoCo for daemon and mockup-sim for desktop app)
922
+ status = robot.client.get_status()
923
+ if isinstance(status, dict):
924
+ simulation_enabled = status.get("simulation_enabled", False)
925
+ mockup_sim_enabled = status.get("mockup_sim_enabled", False)
926
+ else:
927
+ simulation_enabled = getattr(status, "simulation_enabled", False)
928
+ mockup_sim_enabled = getattr(status, "mockup_sim_enabled", False)
929
+
930
+ is_simulation = simulation_enabled or mockup_sim_enabled
931
+
932
+ if is_simulation and not args.gradio:
933
+ logger.info("Simulation mode detected. Automatically enabling gradio flag.")
934
+ args.gradio = True
935
+
936
+ camera_worker, _, vision_manager = handle_vision_stuff(args, robot)
937
+
938
+ # ------------------------------------------------------------------
939
+ # Receptionist: Face DB + Face Recognition Worker
940
+ # ------------------------------------------------------------------
941
+ from reachy_mini_receptionist.face_db import FaceDatabase
942
+ from reachy_mini_receptionist.face_recognition_worker import FaceRecognitionWorker
943
+
944
+ db_dir = Path(instance_path) if instance_path else Path.cwd()
945
+ face_db = FaceDatabase(db_dir / "guests.db")
946
+ face_worker = FaceRecognitionWorker(face_db, camera_worker=camera_worker)
947
+
948
+ # ------------------------------------------------------------------
949
+ # Receptionist: Session state machine + visitor log
950
+ # ------------------------------------------------------------------
951
+ from reachy_mini_receptionist.session_manager import SessionManager
952
+ from reachy_mini_receptionist.conversation_controller import ConversationController
953
+ from reachy_mini_receptionist.visitor_log import VisitorLog
954
+
955
+ visitor_log = VisitorLog(db_dir / "visitor_log.db")
956
+ session_manager = SessionManager(visitor_log=visitor_log)
957
+ conversation_controller = ConversationController(session_manager)
958
+
959
+ # Employee directory — SQLite-backed CRUD. Seeded from the hardcoded
960
+ # _SEED_EMPLOYEES list in employees.py on a brand-new install; after
961
+ # that, the dashboard's Employees panel is the source of truth.
962
+ from reachy_mini_receptionist.employees_store import EmployeeStore
963
+ from reachy_mini_receptionist import employees as _employees_module
964
+ employee_store = EmployeeStore(db_dir / "employees.db")
965
+ try:
966
+ seeded = employee_store.seed_if_empty(_employees_module._SEED_EMPLOYEES)
967
+ if seeded:
968
+ print(f"[employees] Seeded {seeded} employee(s) on first run")
969
+ except Exception as e:
970
+ print(f"[employees] Seed failed: {e}")
971
+ _employees_module.set_store(employee_store)
972
+
973
+ # Privacy retention — best-effort cleanup at startup. Defaults match the
974
+ # Day-2 plan (face TTL 30d, visit log 90d). Set the env var to 0 to
975
+ # disable either one. Runs once per app start — restart weekly or add a
976
+ # scheduled task if you keep the app up for months.
977
+ try:
978
+ face_ttl = float(os.getenv("FACE_TTL_DAYS", "30"))
979
+ removed_faces = face_db.cleanup_older_than(face_ttl)
980
+ if removed_faces:
981
+ print(f"[retention] Face DB: removed {removed_faces} guest(s) older than {face_ttl} days")
982
+ except Exception as e:
983
+ print(f"[retention] Face DB cleanup failed: {e}")
984
+ try:
985
+ log_retention = float(os.getenv("VISITOR_LOG_RETENTION_DAYS", "90"))
986
+ removed_visits = visitor_log.cleanup_older_than(log_retention)
987
+ if removed_visits:
988
+ print(f"[retention] Visitor log: removed {removed_visits} row(s) older than {log_retention} days")
989
+ except Exception as e:
990
+ print(f"[retention] Visitor log cleanup failed: {e}")
991
+
992
+ movement_manager = MovementManager(
993
+ current_robot=robot,
994
+ camera_worker=camera_worker,
995
+ )
996
+
997
+ head_wobbler = HeadWobbler(set_speech_offsets=movement_manager.set_speech_offsets)
998
+
999
+ deps = ToolDependencies(
1000
+ reachy_mini=robot,
1001
+ movement_manager=movement_manager,
1002
+ camera_worker=camera_worker,
1003
+ vision_manager=vision_manager,
1004
+ head_wobbler=head_wobbler,
1005
+ face_worker=face_worker,
1006
+ face_db=face_db,
1007
+ session_manager=session_manager,
1008
+ conversation_controller=conversation_controller,
1009
+ )
1010
+ current_file_path = os.path.dirname(os.path.abspath(__file__))
1011
+ logger.debug(f"Current file absolute path: {current_file_path}")
1012
+ chatbot = gr.Chatbot(
1013
+ type="messages",
1014
+ resizable=True,
1015
+ avatar_images=(
1016
+ os.path.join(current_file_path, "images", "user_avatar.png"),
1017
+ os.path.join(current_file_path, "images", "reachymini_avatar.png"),
1018
+ ),
1019
+ )
1020
+ logger.debug(f"Chatbot avatar images: {chatbot.avatar_images}")
1021
+
1022
+ if _voice_backend == "gemini":
1023
+ from reachy_mini_receptionist.gemini_live import GeminiLiveHandler
1024
+ logger.info("VOICE_BACKEND=gemini — using Gemini Live handler")
1025
+ handler = GeminiLiveHandler(
1026
+ deps,
1027
+ gradio_mode=args.gradio,
1028
+ instance_path=instance_path,
1029
+ session_manager=session_manager,
1030
+ controller=conversation_controller,
1031
+ )
1032
+ else:
1033
+ logger.info("VOICE_BACKEND=openai (default) — using OpenAI Realtime handler")
1034
+ handler = OpenaiRealtimeHandler(
1035
+ deps,
1036
+ gradio_mode=args.gradio,
1037
+ instance_path=instance_path,
1038
+ session_manager=session_manager,
1039
+ controller=conversation_controller,
1040
+ )
1041
+
1042
+ def _face_event_forwarder(event: Dict[str, Any]) -> None:
1043
+ """Route a face event through the controller, then to the LLM context."""
1044
+ try:
1045
+ conversation_controller.on_face_event(event)
1046
+ except Exception as exc:
1047
+ logger.warning("ConversationController.on_face_event raised %s: %s", type(exc).__name__, exc)
1048
+ handler.notify_external_face_event(event)
1049
+
1050
+ face_worker.set_face_event_callback(_face_event_forwarder)
1051
+
1052
+ # Subscribe the handler BEFORE face_worker.start() so the first
1053
+ # transitions aren't dropped. SessionManager buffers events that fire
1054
+ # before the realtime websocket is connected.
1055
+ session_manager.subscribe(handler.notify_session_event)
1056
+
1057
+ stream_manager: gr.Blocks | LocalStream | None = None
1058
+
1059
+ if args.gradio:
1060
+ api_key_textbox = gr.Textbox(
1061
+ label="OPENAI API Key",
1062
+ type="password",
1063
+ value=os.getenv("OPENAI_API_KEY") if not get_space() else "",
1064
+ )
1065
+
1066
+ from reachy_mini_receptionist.gradio_personality import PersonalityUI
1067
+
1068
+ personality_ui = PersonalityUI()
1069
+ personality_ui.create_components()
1070
+
1071
+ stream = Stream(
1072
+ handler=handler,
1073
+ mode="send-receive",
1074
+ modality="audio",
1075
+ additional_inputs=[
1076
+ chatbot,
1077
+ api_key_textbox,
1078
+ *personality_ui.additional_inputs_ordered(),
1079
+ ],
1080
+ additional_outputs=[chatbot],
1081
+ additional_outputs_handler=update_chatbot,
1082
+ ui_args={"title": "Talk with Reachy Mini"},
1083
+ )
1084
+ stream_manager = stream.ui
1085
+ if not settings_app:
1086
+ app = FastAPI()
1087
+ else:
1088
+ app = settings_app
1089
+
1090
+ personality_ui.wire_events(handler, stream_manager)
1091
+
1092
+ # ------------------------------------------------------------------
1093
+ # Mount dashboard API endpoints BEFORE wrapping with Gradio so that
1094
+ # /video_feed, /api/*, /dashboard routes are available on the same app.
1095
+ # ------------------------------------------------------------------
1096
+ _mount_dashboard_api(
1097
+ app, face_worker, face_db, handler, session_manager, visitor_log,
1098
+ employee_store=employee_store, instance_path=instance_path,
1099
+ )
1100
+ logger.info("📊 Receptionist dashboard available at: http://localhost:7860/dashboard")
1101
+
1102
+ app = gr.mount_gradio_app(app, stream.ui, path="/")
1103
+ else:
1104
+ # In headless mode, wire settings_app + instance_path to console LocalStream
1105
+ stream_manager = LocalStream(
1106
+ handler,
1107
+ robot,
1108
+ settings_app=settings_app,
1109
+ instance_path=instance_path,
1110
+ )
1111
+
1112
+ # ------------------------------------------------------------------
1113
+ # Mount dashboard API endpoints on settings_app when headless
1114
+ # ------------------------------------------------------------------
1115
+ _mount_dashboard_api(
1116
+ settings_app, face_worker, face_db, handler, session_manager, visitor_log,
1117
+ employee_store=employee_store, instance_path=instance_path,
1118
+ )
1119
+
1120
+ # Each async service → its own thread/loop
1121
+ movement_manager.start()
1122
+ head_wobbler.start()
1123
+ face_worker.start()
1124
+ if camera_worker:
1125
+ camera_worker.start()
1126
+ if vision_manager:
1127
+ vision_manager.start()
1128
+
1129
+ def poll_stop_event() -> None:
1130
+ """Poll the stop event to allow graceful shutdown."""
1131
+ if app_stop_event is not None:
1132
+ app_stop_event.wait()
1133
+
1134
+ logger.info("App stop event detected, shutting down...")
1135
+ try:
1136
+ stream_manager.close()
1137
+ except Exception as e:
1138
+ logger.error(f"Error while closing stream manager: {e}")
1139
+
1140
+ if app_stop_event:
1141
+ threading.Thread(target=poll_stop_event, daemon=True).start()
1142
+
1143
+ try:
1144
+ stream_manager.launch()
1145
+ except KeyboardInterrupt:
1146
+ logger.info("Keyboard interruption in main thread... closing server.")
1147
+ finally:
1148
+ movement_manager.stop()
1149
+ head_wobbler.stop()
1150
+ face_worker.stop()
1151
+ if camera_worker:
1152
+ camera_worker.stop()
1153
+ if vision_manager:
1154
+ vision_manager.stop()
1155
+
1156
+ # Ensure media is explicitly closed before disconnecting
1157
+ try:
1158
+ robot.media.close()
1159
+ except Exception as e:
1160
+ logger.debug(f"Error closing media during shutdown: {e}")
1161
+
1162
+ # prevent connection to keep alive some threads
1163
+ robot.client.disconnect()
1164
+ time.sleep(1)
1165
+ logger.info("Shutdown complete.")
1166
+
1167
+
1168
+ class ReachyMiniReceptionist(ReachyMiniApp): # type: ignore[misc]
1169
+ """Reachy Mini Apps entry point for the receptionist app."""
1170
+
1171
+ custom_app_url = "http://0.0.0.0:7860/"
1172
+ dont_start_webserver = False
1173
+
1174
+ def run(self, reachy_mini: ReachyMini, stop_event: threading.Event) -> None:
1175
+ """Run the Reachy Mini receptionist app."""
1176
+ loop = asyncio.new_event_loop()
1177
+ asyncio.set_event_loop(loop)
1178
+
1179
+ args, _ = parse_args()
1180
+
1181
+ # is_wireless = reachy_mini.client.get_status()["wireless_version"]
1182
+ # args.head_tracker = None if is_wireless else "mediapipe"
1183
+
1184
+ instance_path = self._get_instance_path().parent
1185
+ run(
1186
+ args,
1187
+ robot=reachy_mini,
1188
+ app_stop_event=stop_event,
1189
+ settings_app=self.settings_app,
1190
+ instance_path=instance_path,
1191
+ )
1192
+
1193
+
1194
+ if __name__ == "__main__":
1195
+ app = ReachyMiniReceptionist()
1196
+ try:
1197
+ app.wrapped_run()
1198
+ except KeyboardInterrupt:
1199
+ app.stop()
src/reachy_mini_receptionist/moves.py ADDED
@@ -0,0 +1,849 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Movement system with sequential primary moves and additive secondary moves.
2
+
3
+ Design overview
4
+ - Primary moves (emotions, dances, goto, breathing) are mutually exclusive and run
5
+ sequentially.
6
+ - Secondary moves (speech sway, face tracking) are additive offsets applied on top
7
+ of the current primary pose.
8
+ - There is a single control point to the robot: `ReachyMini.set_target`.
9
+ - The control loop runs near 100 Hz and is phase-aligned via a monotonic clock.
10
+ - Idle behaviour starts an infinite `BreathingMove` after a short inactivity delay
11
+ unless listening is active.
12
+
13
+ Threading model
14
+ - A dedicated worker thread owns all real-time state and issues `set_target`
15
+ commands.
16
+ - Other threads communicate via a command queue (enqueue moves, mark activity,
17
+ toggle listening).
18
+ - Secondary offset producers set pending values guarded by locks; the worker
19
+ snaps them atomically.
20
+
21
+ Units and frames
22
+ - Secondary offsets are interpreted as metres for x/y/z and radians for
23
+ roll/pitch/yaw in the world frame (unless noted by `compose_world_offset`).
24
+ - Antennas and `body_yaw` are in radians.
25
+ - Head pose composition uses `compose_world_offset(primary_head, secondary_head)`;
26
+ the secondary offset must therefore be expressed in the world frame.
27
+
28
+ Safety
29
+ - Listening freezes antennas, then blends them back on unfreeze.
30
+ - Interpolations and blends are used to avoid jumps at all times.
31
+ - `set_target` errors are rate-limited in logs.
32
+ """
33
+
34
+ from __future__ import annotations
35
+ import time
36
+ import logging
37
+ import threading
38
+ from queue import Empty, Queue
39
+ from typing import Any, Dict, Tuple
40
+ from collections import deque
41
+ from dataclasses import dataclass
42
+
43
+ import numpy as np
44
+ from numpy.typing import NDArray
45
+
46
+ from reachy_mini import ReachyMini
47
+ from reachy_mini.utils import create_head_pose
48
+ from reachy_mini.motion.move import Move
49
+ from reachy_mini.utils.interpolation import (
50
+ compose_world_offset,
51
+ linear_pose_interpolation,
52
+ )
53
+
54
+
55
+ logger = logging.getLogger(__name__)
56
+
57
+ # Configuration constants
58
+ CONTROL_LOOP_FREQUENCY_HZ = 100.0 # Hz - Target frequency for the movement control loop
59
+
60
+ # Type definitions
61
+ FullBodyPose = Tuple[NDArray[np.float32], Tuple[float, float], float] # (head_pose_4x4, antennas, body_yaw)
62
+
63
+
64
+ class BreathingMove(Move): # type: ignore
65
+ """Breathing move with interpolation to neutral and then continuous breathing patterns."""
66
+
67
+ def __init__(
68
+ self,
69
+ interpolation_start_pose: NDArray[np.float32],
70
+ interpolation_start_antennas: Tuple[float, float],
71
+ interpolation_duration: float = 1.0,
72
+ ):
73
+ """Initialize breathing move.
74
+
75
+ Args:
76
+ interpolation_start_pose: 4x4 matrix of current head pose to interpolate from
77
+ interpolation_start_antennas: Current antenna positions to interpolate from
78
+ interpolation_duration: Duration of interpolation to neutral (seconds)
79
+
80
+ """
81
+ self.interpolation_start_pose = interpolation_start_pose
82
+ self.interpolation_start_antennas = np.array(interpolation_start_antennas)
83
+ self.interpolation_duration = interpolation_duration
84
+
85
+ # Neutral positions for breathing base
86
+ self.neutral_head_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
87
+ self.neutral_antennas = np.array([0.0, 0.0])
88
+
89
+ # Breathing parameters
90
+ self.breathing_z_amplitude = 0.005 # 5mm gentle breathing
91
+ self.breathing_frequency = 0.1 # Hz (6 breaths per minute)
92
+ self.antenna_sway_amplitude = np.deg2rad(15) # 15 degrees
93
+ self.antenna_frequency = 0.5 # Hz (faster antenna sway)
94
+
95
+ @property
96
+ def duration(self) -> float:
97
+ """Duration property required by official Move interface."""
98
+ return float("inf") # Continuous breathing (never ends naturally)
99
+
100
+ def evaluate(self, t: float) -> tuple[NDArray[np.float64] | None, NDArray[np.float64] | None, float | None]:
101
+ """Evaluate breathing move at time t."""
102
+ if t < self.interpolation_duration:
103
+ # Phase 1: Interpolate to neutral base position
104
+ interpolation_t = t / self.interpolation_duration
105
+
106
+ # Interpolate head pose
107
+ head_pose = linear_pose_interpolation(
108
+ self.interpolation_start_pose, self.neutral_head_pose, interpolation_t,
109
+ )
110
+
111
+ # Interpolate antennas
112
+ antennas_interp = (
113
+ 1 - interpolation_t
114
+ ) * self.interpolation_start_antennas + interpolation_t * self.neutral_antennas
115
+ antennas = antennas_interp.astype(np.float64)
116
+
117
+ else:
118
+ # Phase 2: Breathing patterns from neutral base
119
+ breathing_time = t - self.interpolation_duration
120
+
121
+ # Gentle z-axis breathing
122
+ z_offset = self.breathing_z_amplitude * np.sin(2 * np.pi * self.breathing_frequency * breathing_time)
123
+ head_pose = create_head_pose(x=0, y=0, z=z_offset, roll=0, pitch=0, yaw=0, degrees=True, mm=False)
124
+
125
+ # Antenna sway (opposite directions)
126
+ antenna_sway = self.antenna_sway_amplitude * np.sin(2 * np.pi * self.antenna_frequency * breathing_time)
127
+ antennas = np.array([antenna_sway, -antenna_sway], dtype=np.float64)
128
+
129
+ # Return in official Move interface format: (head_pose, antennas_array, body_yaw)
130
+ return (head_pose, antennas, 0.0)
131
+
132
+
133
+ def combine_full_body(primary_pose: FullBodyPose, secondary_pose: FullBodyPose) -> FullBodyPose:
134
+ """Combine primary and secondary full body poses.
135
+
136
+ Args:
137
+ primary_pose: (head_pose, antennas, body_yaw) - primary move
138
+ secondary_pose: (head_pose, antennas, body_yaw) - secondary offsets
139
+
140
+ Returns:
141
+ Combined full body pose (head_pose, antennas, body_yaw)
142
+
143
+ """
144
+ primary_head, primary_antennas, primary_body_yaw = primary_pose
145
+ secondary_head, secondary_antennas, secondary_body_yaw = secondary_pose
146
+
147
+ # Combine head poses using compose_world_offset; the secondary pose must be an
148
+ # offset expressed in the world frame (T_off_world) applied to the absolute
149
+ # primary transform (T_abs).
150
+ combined_head = compose_world_offset(primary_head, secondary_head, reorthonormalize=True)
151
+
152
+ # Sum antennas and body_yaw
153
+ combined_antennas = (
154
+ primary_antennas[0] + secondary_antennas[0],
155
+ primary_antennas[1] + secondary_antennas[1],
156
+ )
157
+ combined_body_yaw = primary_body_yaw + secondary_body_yaw
158
+
159
+ return (combined_head, combined_antennas, combined_body_yaw)
160
+
161
+
162
+ def clone_full_body_pose(pose: FullBodyPose) -> FullBodyPose:
163
+ """Create a deep copy of a full body pose tuple."""
164
+ head, antennas, body_yaw = pose
165
+ return (head.copy(), (float(antennas[0]), float(antennas[1])), float(body_yaw))
166
+
167
+
168
+ @dataclass
169
+ class MovementState:
170
+ """State tracking for the movement system."""
171
+
172
+ # Primary move state
173
+ current_move: Move | None = None
174
+ move_start_time: float | None = None
175
+ last_activity_time: float = 0.0
176
+
177
+ # Secondary move state (offsets)
178
+ speech_offsets: Tuple[float, float, float, float, float, float] = (
179
+ 0.0,
180
+ 0.0,
181
+ 0.0,
182
+ 0.0,
183
+ 0.0,
184
+ 0.0,
185
+ )
186
+ face_tracking_offsets: Tuple[float, float, float, float, float, float] = (
187
+ 0.0,
188
+ 0.0,
189
+ 0.0,
190
+ 0.0,
191
+ 0.0,
192
+ 0.0,
193
+ )
194
+
195
+ # Status flags
196
+ last_primary_pose: FullBodyPose | None = None
197
+
198
+ def update_activity(self) -> None:
199
+ """Update the last activity time."""
200
+ self.last_activity_time = time.monotonic()
201
+
202
+
203
+ @dataclass
204
+ class LoopFrequencyStats:
205
+ """Track rolling loop frequency statistics."""
206
+
207
+ mean: float = 0.0
208
+ m2: float = 0.0
209
+ min_freq: float = float("inf")
210
+ count: int = 0
211
+ last_freq: float = 0.0
212
+ potential_freq: float = 0.0
213
+
214
+ def reset(self) -> None:
215
+ """Reset accumulators while keeping the last potential frequency."""
216
+ self.mean = 0.0
217
+ self.m2 = 0.0
218
+ self.min_freq = float("inf")
219
+ self.count = 0
220
+
221
+
222
+ class MovementManager:
223
+ """Coordinate sequential moves, additive offsets, and robot output at 100 Hz.
224
+
225
+ Responsibilities:
226
+ - Own a real-time loop that samples the current primary move (if any), fuses
227
+ secondary offsets, and calls `set_target` exactly once per tick.
228
+ - Start an idle `BreathingMove` after `idle_inactivity_delay` when not
229
+ listening and no moves are queued.
230
+ - Expose thread-safe APIs so other threads can enqueue moves, mark activity,
231
+ or feed secondary offsets without touching internal state.
232
+
233
+ Timing:
234
+ - All elapsed-time calculations rely on `time.monotonic()` through `self._now`
235
+ to avoid wall-clock jumps.
236
+ - The loop attempts 100 Hz
237
+
238
+ Concurrency:
239
+ - External threads communicate via `_command_queue` messages.
240
+ - Secondary offsets are staged via dirty flags guarded by locks and consumed
241
+ atomically inside the worker loop.
242
+ """
243
+
244
+ def __init__(
245
+ self,
246
+ current_robot: ReachyMini,
247
+ camera_worker: "Any" = None,
248
+ ):
249
+ """Initialize movement manager."""
250
+ self.current_robot = current_robot
251
+ self.camera_worker = camera_worker
252
+
253
+ # Single timing source for durations
254
+ self._now = time.monotonic
255
+
256
+ # Movement state
257
+ self.state = MovementState()
258
+ self.state.last_activity_time = self._now()
259
+ neutral_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
260
+ self.state.last_primary_pose = (neutral_pose, (0.0, 0.0), 0.0)
261
+
262
+ # Move queue (primary moves)
263
+ self.move_queue: deque[Move] = deque()
264
+
265
+ # Configuration
266
+ self.idle_inactivity_delay = 0.3 # seconds
267
+ self.target_frequency = CONTROL_LOOP_FREQUENCY_HZ
268
+ self.target_period = 1.0 / self.target_frequency
269
+
270
+ self._stop_event = threading.Event()
271
+ self._thread: threading.Thread | None = None
272
+ self._is_listening = False
273
+ self._last_commanded_pose: FullBodyPose = clone_full_body_pose(self.state.last_primary_pose)
274
+ self._listening_antennas: Tuple[float, float] = self._last_commanded_pose[1]
275
+ self._antenna_unfreeze_blend = 1.0
276
+ self._antenna_blend_duration = 0.4 # seconds to blend back after listening
277
+ self._last_listening_blend_time = self._now()
278
+ self._breathing_active = False # true when breathing move is running or queued
279
+ self._listening_debounce_s = 0.15
280
+ self._last_listening_toggle_time = self._now()
281
+ self._last_set_target_err = 0.0
282
+ self._set_target_err_interval = 1.0 # seconds between error logs
283
+ self._set_target_err_suppressed = 0
284
+
285
+ # Cross-thread signalling
286
+ self._command_queue: "Queue[Tuple[str, Any]]" = Queue()
287
+ self._speech_offsets_lock = threading.Lock()
288
+ self._pending_speech_offsets: Tuple[float, float, float, float, float, float] = (
289
+ 0.0,
290
+ 0.0,
291
+ 0.0,
292
+ 0.0,
293
+ 0.0,
294
+ 0.0,
295
+ )
296
+ self._speech_offsets_dirty = False
297
+
298
+ self._face_offsets_lock = threading.Lock()
299
+ self._pending_face_offsets: Tuple[float, float, float, float, float, float] = (
300
+ 0.0,
301
+ 0.0,
302
+ 0.0,
303
+ 0.0,
304
+ 0.0,
305
+ 0.0,
306
+ )
307
+ self._face_offsets_dirty = False
308
+
309
+ self._shared_state_lock = threading.Lock()
310
+ self._shared_last_activity_time = self.state.last_activity_time
311
+ self._shared_is_listening = self._is_listening
312
+ self._status_lock = threading.Lock()
313
+ self._freq_stats = LoopFrequencyStats()
314
+ self._freq_snapshot = LoopFrequencyStats()
315
+
316
+ def queue_move(self, move: Move) -> None:
317
+ """Queue a primary move to run after the currently executing one.
318
+
319
+ Thread-safe: the move is enqueued via the worker command queue so the
320
+ control loop remains the sole mutator of movement state.
321
+ """
322
+ self._command_queue.put(("queue_move", move))
323
+
324
+ def clear_move_queue(self) -> None:
325
+ """Stop the active move and discard any queued primary moves.
326
+
327
+ Thread-safe: executed by the worker thread via the command queue.
328
+ """
329
+ self._command_queue.put(("clear_queue", None))
330
+
331
+ def set_speech_offsets(self, offsets: Tuple[float, float, float, float, float, float]) -> None:
332
+ """Update speech-induced secondary offsets (x, y, z, roll, pitch, yaw).
333
+
334
+ Offsets are interpreted as metres for translation and radians for
335
+ rotation in the world frame. Thread-safe via a pending snapshot.
336
+ """
337
+ with self._speech_offsets_lock:
338
+ self._pending_speech_offsets = offsets
339
+ self._speech_offsets_dirty = True
340
+
341
+ def set_moving_state(self, duration: float) -> None:
342
+ """Mark the robot as actively moving for the provided duration.
343
+
344
+ Legacy hook used by goto helpers to keep inactivity and breathing logic
345
+ aware of manual motions. Thread-safe via the command queue.
346
+ """
347
+ self._command_queue.put(("set_moving_state", duration))
348
+
349
+ def is_idle(self) -> bool:
350
+ """Return True when the robot has been inactive longer than the idle delay."""
351
+ with self._shared_state_lock:
352
+ last_activity = self._shared_last_activity_time
353
+ listening = self._shared_is_listening
354
+
355
+ if listening:
356
+ return False
357
+
358
+ return self._now() - last_activity >= self.idle_inactivity_delay
359
+
360
+ def set_listening(self, listening: bool) -> None:
361
+ """Enable or disable listening mode without touching shared state directly.
362
+
363
+ While listening:
364
+ - Antenna positions are frozen at the last commanded values.
365
+ - Blending is reset so that upon unfreezing the antennas return smoothly.
366
+ - Idle breathing is suppressed.
367
+
368
+ Thread-safe: the change is posted to the worker command queue.
369
+ """
370
+ with self._shared_state_lock:
371
+ if self._shared_is_listening == listening:
372
+ return
373
+ self._command_queue.put(("set_listening", listening))
374
+
375
+ def _poll_signals(self, current_time: float) -> None:
376
+ """Apply queued commands and pending offset updates."""
377
+ self._apply_pending_offsets()
378
+
379
+ while True:
380
+ try:
381
+ command, payload = self._command_queue.get_nowait()
382
+ except Empty:
383
+ break
384
+ self._handle_command(command, payload, current_time)
385
+
386
+ def _apply_pending_offsets(self) -> None:
387
+ """Apply the most recent speech/face offset updates."""
388
+ speech_offsets: Tuple[float, float, float, float, float, float] | None = None
389
+ with self._speech_offsets_lock:
390
+ if self._speech_offsets_dirty:
391
+ speech_offsets = self._pending_speech_offsets
392
+ self._speech_offsets_dirty = False
393
+
394
+ if speech_offsets is not None:
395
+ self.state.speech_offsets = speech_offsets
396
+ self.state.update_activity()
397
+
398
+ face_offsets: Tuple[float, float, float, float, float, float] | None = None
399
+ with self._face_offsets_lock:
400
+ if self._face_offsets_dirty:
401
+ face_offsets = self._pending_face_offsets
402
+ self._face_offsets_dirty = False
403
+
404
+ if face_offsets is not None:
405
+ self.state.face_tracking_offsets = face_offsets
406
+ self.state.update_activity()
407
+
408
+ def _handle_command(self, command: str, payload: Any, current_time: float) -> None:
409
+ """Handle a single cross-thread command."""
410
+ if command == "queue_move":
411
+ if isinstance(payload, Move):
412
+ self.move_queue.append(payload)
413
+ self.state.update_activity()
414
+ duration = getattr(payload, "duration", None)
415
+ if duration is not None:
416
+ try:
417
+ duration_str = f"{float(duration):.2f}"
418
+ except (TypeError, ValueError):
419
+ duration_str = str(duration)
420
+ else:
421
+ duration_str = "?"
422
+ logger.debug(
423
+ "Queued move with duration %ss, queue size: %s",
424
+ duration_str,
425
+ len(self.move_queue),
426
+ )
427
+ else:
428
+ logger.warning("Ignored queue_move command with invalid payload: %s", payload)
429
+ elif command == "clear_queue":
430
+ self.move_queue.clear()
431
+ self.state.current_move = None
432
+ self.state.move_start_time = None
433
+ self._breathing_active = False
434
+ logger.info("Cleared move queue and stopped current move")
435
+ elif command == "set_moving_state":
436
+ try:
437
+ duration = float(payload)
438
+ except (TypeError, ValueError):
439
+ logger.warning("Invalid moving state duration: %s", payload)
440
+ return
441
+ self.state.update_activity()
442
+ elif command == "mark_activity":
443
+ self.state.update_activity()
444
+ elif command == "set_listening":
445
+ desired_state = bool(payload)
446
+ now = self._now()
447
+ if now - self._last_listening_toggle_time < self._listening_debounce_s:
448
+ return
449
+ self._last_listening_toggle_time = now
450
+
451
+ if self._is_listening == desired_state:
452
+ return
453
+
454
+ self._is_listening = desired_state
455
+ self._last_listening_blend_time = now
456
+ if desired_state:
457
+ # Freeze: snapshot current commanded antennas and reset blend
458
+ self._listening_antennas = (
459
+ float(self._last_commanded_pose[1][0]),
460
+ float(self._last_commanded_pose[1][1]),
461
+ )
462
+ self._antenna_unfreeze_blend = 0.0
463
+ else:
464
+ # Unfreeze: restart blending from frozen pose
465
+ self._antenna_unfreeze_blend = 0.0
466
+ self.state.update_activity()
467
+ else:
468
+ logger.warning("Unknown command received by MovementManager: %s", command)
469
+
470
+ def _publish_shared_state(self) -> None:
471
+ """Expose idle-related state for external threads."""
472
+ with self._shared_state_lock:
473
+ self._shared_last_activity_time = self.state.last_activity_time
474
+ self._shared_is_listening = self._is_listening
475
+
476
+ def _manage_move_queue(self, current_time: float) -> None:
477
+ """Manage the primary move queue (sequential execution)."""
478
+ if self.state.current_move is None or (
479
+ self.state.move_start_time is not None
480
+ and current_time - self.state.move_start_time >= self.state.current_move.duration
481
+ ):
482
+ self.state.current_move = None
483
+ self.state.move_start_time = None
484
+
485
+ if self.move_queue:
486
+ self.state.current_move = self.move_queue.popleft()
487
+ self.state.move_start_time = current_time
488
+ # Any real move cancels breathing mode flag
489
+ self._breathing_active = isinstance(self.state.current_move, BreathingMove)
490
+ logger.debug(f"Starting new move, duration: {self.state.current_move.duration}s")
491
+
492
+ def _manage_breathing(self, current_time: float) -> None:
493
+ """Manage automatic breathing when idle."""
494
+ if (
495
+ self.state.current_move is None
496
+ and not self.move_queue
497
+ and not self._is_listening
498
+ and not self._breathing_active
499
+ ):
500
+ idle_for = current_time - self.state.last_activity_time
501
+ if idle_for >= self.idle_inactivity_delay:
502
+ try:
503
+ # These 2 functions return the latest available sensor data from the robot, but don't perform I/O synchronously.
504
+ # Therefore, we accept calling them inside the control loop.
505
+ _, current_antennas = self.current_robot.get_current_joint_positions()
506
+ current_head_pose = self.current_robot.get_current_head_pose()
507
+
508
+ self._breathing_active = True
509
+ self.state.update_activity()
510
+
511
+ breathing_move = BreathingMove(
512
+ interpolation_start_pose=current_head_pose,
513
+ interpolation_start_antennas=current_antennas,
514
+ interpolation_duration=1.0,
515
+ )
516
+ self.move_queue.append(breathing_move)
517
+ logger.debug("Started breathing after %.1fs of inactivity", idle_for)
518
+ except Exception as e:
519
+ self._breathing_active = False
520
+ logger.error("Failed to start breathing: %s", e)
521
+
522
+ if isinstance(self.state.current_move, BreathingMove) and self.move_queue:
523
+ self.state.current_move = None
524
+ self.state.move_start_time = None
525
+ self._breathing_active = False
526
+ logger.debug("Stopping breathing due to new move activity")
527
+
528
+ if self.state.current_move is not None and not isinstance(self.state.current_move, BreathingMove):
529
+ self._breathing_active = False
530
+
531
+ def _get_primary_pose(self, current_time: float) -> FullBodyPose:
532
+ """Get the primary full body pose from current move or neutral."""
533
+ # When a primary move is playing, sample it and cache the resulting pose
534
+ if self.state.current_move is not None and self.state.move_start_time is not None:
535
+ move_time = current_time - self.state.move_start_time
536
+ head, antennas, body_yaw = self.state.current_move.evaluate(move_time)
537
+
538
+ if head is None:
539
+ head = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
540
+ if antennas is None:
541
+ antennas = np.array([0.0, 0.0])
542
+ if body_yaw is None:
543
+ body_yaw = 0.0
544
+
545
+ antennas_tuple = (float(antennas[0]), float(antennas[1]))
546
+ head_copy = head.copy()
547
+ primary_full_body_pose = (
548
+ head_copy,
549
+ antennas_tuple,
550
+ float(body_yaw),
551
+ )
552
+
553
+ self.state.last_primary_pose = clone_full_body_pose(primary_full_body_pose)
554
+ # Otherwise reuse the last primary pose so we avoid jumps between moves
555
+ elif self.state.last_primary_pose is not None:
556
+ primary_full_body_pose = clone_full_body_pose(self.state.last_primary_pose)
557
+ else:
558
+ neutral_head_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
559
+ primary_full_body_pose = (neutral_head_pose, (0.0, 0.0), 0.0)
560
+ self.state.last_primary_pose = clone_full_body_pose(primary_full_body_pose)
561
+
562
+ return primary_full_body_pose
563
+
564
+ def _get_secondary_pose(self) -> FullBodyPose:
565
+ """Get the secondary full body pose from speech and face tracking offsets."""
566
+ # Combine speech sway offsets + face tracking offsets for secondary pose
567
+ secondary_offsets = [
568
+ self.state.speech_offsets[0] + self.state.face_tracking_offsets[0],
569
+ self.state.speech_offsets[1] + self.state.face_tracking_offsets[1],
570
+ self.state.speech_offsets[2] + self.state.face_tracking_offsets[2],
571
+ self.state.speech_offsets[3] + self.state.face_tracking_offsets[3],
572
+ self.state.speech_offsets[4] + self.state.face_tracking_offsets[4],
573
+ self.state.speech_offsets[5] + self.state.face_tracking_offsets[5],
574
+ ]
575
+
576
+ secondary_head_pose = create_head_pose(
577
+ x=secondary_offsets[0],
578
+ y=secondary_offsets[1],
579
+ z=secondary_offsets[2],
580
+ roll=secondary_offsets[3],
581
+ pitch=secondary_offsets[4],
582
+ yaw=secondary_offsets[5],
583
+ degrees=False,
584
+ mm=False,
585
+ )
586
+ return (secondary_head_pose, (0.0, 0.0), 0.0)
587
+
588
+ def _compose_full_body_pose(self, current_time: float) -> FullBodyPose:
589
+ """Compose primary and secondary poses into a single command pose."""
590
+ primary = self._get_primary_pose(current_time)
591
+ secondary = self._get_secondary_pose()
592
+ return combine_full_body(primary, secondary)
593
+
594
+ def _update_primary_motion(self, current_time: float) -> None:
595
+ """Advance queue state and idle behaviours for this tick."""
596
+ self._manage_move_queue(current_time)
597
+ self._manage_breathing(current_time)
598
+
599
+ def _calculate_blended_antennas(self, target_antennas: Tuple[float, float]) -> Tuple[float, float]:
600
+ """Blend target antennas with listening freeze state and update blending."""
601
+ now = self._now()
602
+ listening = self._is_listening
603
+ listening_antennas = self._listening_antennas
604
+ blend = self._antenna_unfreeze_blend
605
+ blend_duration = self._antenna_blend_duration
606
+ last_update = self._last_listening_blend_time
607
+ self._last_listening_blend_time = now
608
+
609
+ if listening:
610
+ antennas_cmd = listening_antennas
611
+ new_blend = 0.0
612
+ else:
613
+ dt = max(0.0, now - last_update)
614
+ if blend_duration <= 0:
615
+ new_blend = 1.0
616
+ else:
617
+ new_blend = min(1.0, blend + dt / blend_duration)
618
+ antennas_cmd = (
619
+ listening_antennas[0] * (1.0 - new_blend) + target_antennas[0] * new_blend,
620
+ listening_antennas[1] * (1.0 - new_blend) + target_antennas[1] * new_blend,
621
+ )
622
+
623
+ if listening:
624
+ self._antenna_unfreeze_blend = 0.0
625
+ else:
626
+ self._antenna_unfreeze_blend = new_blend
627
+ if new_blend >= 1.0:
628
+ self._listening_antennas = (
629
+ float(target_antennas[0]),
630
+ float(target_antennas[1]),
631
+ )
632
+
633
+ return antennas_cmd
634
+
635
+ def _issue_control_command(self, head: NDArray[np.float32], antennas: Tuple[float, float], body_yaw: float) -> None:
636
+ """Send the fused pose to the robot with throttled error logging."""
637
+ try:
638
+ self.current_robot.set_target(head=head, antennas=antennas, body_yaw=body_yaw)
639
+ except Exception as e:
640
+ now = self._now()
641
+ if now - self._last_set_target_err >= self._set_target_err_interval:
642
+ msg = f"Failed to set robot target: {e}"
643
+ if self._set_target_err_suppressed:
644
+ msg += f" (suppressed {self._set_target_err_suppressed} repeats)"
645
+ self._set_target_err_suppressed = 0
646
+ logger.error(msg)
647
+ self._last_set_target_err = now
648
+ else:
649
+ self._set_target_err_suppressed += 1
650
+ else:
651
+ with self._status_lock:
652
+ self._last_commanded_pose = clone_full_body_pose((head, antennas, body_yaw))
653
+
654
+ def _update_frequency_stats(
655
+ self, loop_start: float, prev_loop_start: float, stats: LoopFrequencyStats,
656
+ ) -> LoopFrequencyStats:
657
+ """Update frequency statistics based on the current loop start time."""
658
+ period = loop_start - prev_loop_start
659
+ if period > 0:
660
+ stats.last_freq = 1.0 / period
661
+ stats.count += 1
662
+ delta = stats.last_freq - stats.mean
663
+ stats.mean += delta / stats.count
664
+ stats.m2 += delta * (stats.last_freq - stats.mean)
665
+ stats.min_freq = min(stats.min_freq, stats.last_freq)
666
+ return stats
667
+
668
+ def _schedule_next_tick(self, loop_start: float, stats: LoopFrequencyStats) -> Tuple[float, LoopFrequencyStats]:
669
+ """Compute sleep time to maintain target frequency and update potential freq."""
670
+ computation_time = self._now() - loop_start
671
+ stats.potential_freq = 1.0 / computation_time if computation_time > 0 else float("inf")
672
+ sleep_time = max(0.0, self.target_period - computation_time)
673
+ return sleep_time, stats
674
+
675
+ def _record_frequency_snapshot(self, stats: LoopFrequencyStats) -> None:
676
+ """Store a thread-safe snapshot of current frequency statistics."""
677
+ with self._status_lock:
678
+ self._freq_snapshot = LoopFrequencyStats(
679
+ mean=stats.mean,
680
+ m2=stats.m2,
681
+ min_freq=stats.min_freq,
682
+ count=stats.count,
683
+ last_freq=stats.last_freq,
684
+ potential_freq=stats.potential_freq,
685
+ )
686
+
687
+ def _maybe_log_frequency(self, loop_count: int, print_interval_loops: int, stats: LoopFrequencyStats) -> None:
688
+ """Emit frequency telemetry when enough loops have elapsed."""
689
+ if loop_count % print_interval_loops != 0 or stats.count == 0:
690
+ return
691
+
692
+ variance = stats.m2 / stats.count if stats.count > 0 else 0.0
693
+ lowest = stats.min_freq if stats.min_freq != float("inf") else 0.0
694
+ logger.debug(
695
+ "Loop freq - avg: %.2fHz, variance: %.4f, min: %.2fHz, last: %.2fHz, potential: %.2fHz, target: %.1fHz",
696
+ stats.mean,
697
+ variance,
698
+ lowest,
699
+ stats.last_freq,
700
+ stats.potential_freq,
701
+ self.target_frequency,
702
+ )
703
+ stats.reset()
704
+
705
+ def _update_face_tracking(self, current_time: float) -> None:
706
+ """Get face tracking offsets from camera worker thread."""
707
+ if self.camera_worker is not None:
708
+ # Get face tracking offsets from camera worker thread
709
+ offsets = self.camera_worker.get_face_tracking_offsets()
710
+ self.state.face_tracking_offsets = offsets
711
+ else:
712
+ # No camera worker, use neutral offsets
713
+ self.state.face_tracking_offsets = (0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
714
+
715
+ def start(self) -> None:
716
+ """Start the worker thread that drives the 100 Hz control loop."""
717
+ if self._thread is not None and self._thread.is_alive():
718
+ logger.warning("Move worker already running; start() ignored")
719
+ return
720
+ self._stop_event.clear()
721
+ self._thread = threading.Thread(target=self.working_loop, daemon=True)
722
+ self._thread.start()
723
+ logger.debug("Move worker started")
724
+
725
+ def stop(self) -> None:
726
+ """Request the worker thread to stop and wait for it to exit.
727
+
728
+ Before stopping, resets the robot to a neutral position.
729
+ """
730
+ if self._thread is None or not self._thread.is_alive():
731
+ logger.debug("Move worker not running; stop() ignored")
732
+ return
733
+
734
+ logger.info("Stopping movement manager and resetting to neutral position...")
735
+
736
+ # Clear any queued moves and stop current move
737
+ self.clear_move_queue()
738
+
739
+ # Stop the worker thread first so it doesn't interfere
740
+ self._stop_event.set()
741
+ if self._thread is not None:
742
+ self._thread.join()
743
+ self._thread = None
744
+ logger.debug("Move worker stopped")
745
+
746
+ # Reset to neutral position using goto_target (same approach as wake_up)
747
+ try:
748
+ neutral_head_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
749
+ neutral_antennas = [0.0, 0.0]
750
+ neutral_body_yaw = 0.0
751
+
752
+ # Use goto_target directly on the robot
753
+ self.current_robot.goto_target(
754
+ head=neutral_head_pose,
755
+ antennas=neutral_antennas,
756
+ duration=2.0,
757
+ body_yaw=neutral_body_yaw,
758
+ )
759
+
760
+ logger.info("Reset to neutral position completed")
761
+
762
+ except Exception as e:
763
+ logger.error(f"Failed to reset to neutral position: {e}")
764
+
765
+ def get_status(self) -> Dict[str, Any]:
766
+ """Return a lightweight status snapshot for observability."""
767
+ with self._status_lock:
768
+ pose_snapshot = clone_full_body_pose(self._last_commanded_pose)
769
+ freq_snapshot = LoopFrequencyStats(
770
+ mean=self._freq_snapshot.mean,
771
+ m2=self._freq_snapshot.m2,
772
+ min_freq=self._freq_snapshot.min_freq,
773
+ count=self._freq_snapshot.count,
774
+ last_freq=self._freq_snapshot.last_freq,
775
+ potential_freq=self._freq_snapshot.potential_freq,
776
+ )
777
+
778
+ head_matrix = pose_snapshot[0].tolist() if pose_snapshot else None
779
+ antennas = pose_snapshot[1] if pose_snapshot else None
780
+ body_yaw = pose_snapshot[2] if pose_snapshot else None
781
+
782
+ return {
783
+ "queue_size": len(self.move_queue),
784
+ "is_listening": self._is_listening,
785
+ "breathing_active": self._breathing_active,
786
+ "last_commanded_pose": {
787
+ "head": head_matrix,
788
+ "antennas": antennas,
789
+ "body_yaw": body_yaw,
790
+ },
791
+ "loop_frequency": {
792
+ "last": freq_snapshot.last_freq,
793
+ "mean": freq_snapshot.mean,
794
+ "min": freq_snapshot.min_freq,
795
+ "potential": freq_snapshot.potential_freq,
796
+ "samples": freq_snapshot.count,
797
+ },
798
+ }
799
+
800
+ def working_loop(self) -> None:
801
+ """Control loop main movements - reproduces main_works.py control architecture.
802
+
803
+ Single set_target() call with pose fusion.
804
+ """
805
+ logger.debug("Starting enhanced movement control loop (100Hz)")
806
+
807
+ loop_count = 0
808
+ prev_loop_start = self._now()
809
+ print_interval_loops = max(1, int(self.target_frequency * 2))
810
+ freq_stats = self._freq_stats
811
+
812
+ while not self._stop_event.is_set():
813
+ loop_start = self._now()
814
+ loop_count += 1
815
+
816
+ if loop_count > 1:
817
+ freq_stats = self._update_frequency_stats(loop_start, prev_loop_start, freq_stats)
818
+ prev_loop_start = loop_start
819
+
820
+ # 1) Poll external commands and apply pending offsets (atomic snapshot)
821
+ self._poll_signals(loop_start)
822
+
823
+ # 2) Manage the primary move queue (start new move, end finished move, breathing)
824
+ self._update_primary_motion(loop_start)
825
+
826
+ # 3) Update vision-based secondary offsets
827
+ self._update_face_tracking(loop_start)
828
+
829
+ # 4) Build primary and secondary full-body poses, then fuse them
830
+ head, antennas, body_yaw = self._compose_full_body_pose(loop_start)
831
+
832
+ # 5) Apply listening antenna freeze or blend-back
833
+ antennas_cmd = self._calculate_blended_antennas(antennas)
834
+
835
+ # 6) Single set_target call - the only control point
836
+ self._issue_control_command(head, antennas_cmd, body_yaw)
837
+
838
+ # 7) Adaptive sleep to align to next tick, then publish shared state
839
+ sleep_time, freq_stats = self._schedule_next_tick(loop_start, freq_stats)
840
+ self._publish_shared_state()
841
+ self._record_frequency_snapshot(freq_stats)
842
+
843
+ # 8) Periodic telemetry on loop frequency
844
+ self._maybe_log_frequency(loop_count, print_interval_loops, freq_stats)
845
+
846
+ if sleep_time > 0:
847
+ time.sleep(sleep_time)
848
+
849
+ logger.debug("Movement control loop stopped")
src/reachy_mini_receptionist/name_normalizer.py ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Gemini-backed name disambiguation.
2
+
3
+ OpenAI's gpt-4o-transcribe mishears short non-English names (Arav -> Lee Win,
4
+ Krishna -> Christina, Mukul -> Michael). The realtime audio loop is locked
5
+ to the OpenAI stack — too risky to swap mid-pilot — but we can run the
6
+ transcribed candidate through Gemini as a cheap post-processing step to
7
+ recover the intended name.
8
+
9
+ Pipeline:
10
+ visitor says "Arav"
11
+ -> OpenAI STT returns "Lee Win"
12
+ -> normalize_name("Lee Win", candidates=[Henry, Krishna, Arav, ...])
13
+ -> asks Gemini: "Which of these is closest phonetically to 'Lee Win'?"
14
+ -> returns "Arav" if confident, original "Lee Win" if not
15
+ -> register_guest("Arav", confirmed=true) — saves the right face
16
+
17
+ Fails open: when GEMINI_API_KEY is unset, the http call errors, or Gemini
18
+ returns garbage, we return the original transcribed name unchanged. The
19
+ worst case is "same behaviour as before".
20
+ """
21
+ from __future__ import annotations
22
+
23
+ import json
24
+ import logging
25
+ import os
26
+ from typing import Iterable, Optional
27
+
28
+ import httpx
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+ # Default to gemini-3.5-flash (Google's latest Flash-tier model,
33
+ # launched at I/O 2026 — faster and more accurate than the 2.5
34
+ # variants on short-prompt disambiguation). Override via the
35
+ # GEMINI_MODEL env var without restarting (read on each call) so the
36
+ # operator can fall back to 2.5-flash / 2.5-flash-lite if needed.
37
+ _DEFAULT_GEMINI_MODEL = "gemini-3.5-flash"
38
+ _GEMINI_URL_TEMPLATE = (
39
+ "https://generativelanguage.googleapis.com/v1beta/models/"
40
+ "{model}:generateContent"
41
+ )
42
+ _HTTP_TIMEOUT_SECONDS = 4.0
43
+
44
+
45
+ def _gemini_url() -> str:
46
+ model = os.getenv("GEMINI_MODEL", "").strip() or _DEFAULT_GEMINI_MODEL
47
+ return _GEMINI_URL_TEMPLATE.format(model=model)
48
+
49
+
50
+ def _build_prompt(transcribed: str, candidates: list[str]) -> str:
51
+ cand_list = ", ".join(candidates) if candidates else "(none)"
52
+ return (
53
+ "You are a speech-recognition error corrector for a reception desk. "
54
+ f"A visitor said their name and the speech-to-text returned: '{transcribed}'. "
55
+ f"The visitor is likely one of these scheduled or known people: {cand_list}. "
56
+ "Return a candidate name ONLY when it is very phonetically close to "
57
+ "the transcribed value — sharing most of the syllables, vowel sounds, "
58
+ "or stressed sound. If NO candidate is a clear phonetic match, you "
59
+ "MUST return the original transcribed value unchanged. Do not stretch "
60
+ "for a match. When in doubt, keep the original. "
61
+ "Good corrections (close phonetic match): "
62
+ "'Lee Win' -> 'Arav' (sounds like 'Le-win' ~ 'A-rav'? No — return 'Lee Win'). "
63
+ "'Christina' -> 'Krishna' (yes, similar syllables). "
64
+ "'Michael' -> 'Mukul' (yes, M-K consonants and similar vowels). "
65
+ "Bad corrections (NEVER do these — return the original instead): "
66
+ "'Bruh' -> stays 'Bruh' (no candidate sounds like Bruh). "
67
+ "'Bob' -> stays 'Bob' (no phonetic match in the list). "
68
+ "'Sarah' -> stays 'Sarah' (if Sarah is in the list, fine; if not, KEEP IT). "
69
+ "Return only the chosen name, no extra words, no quotes, no punctuation."
70
+ )
71
+
72
+
73
+ def _phonetic_similarity(a: str, b: str) -> float:
74
+ """Cheap phonetic similarity score in [0, 1].
75
+
76
+ Uses Python's stdlib difflib SequenceMatcher on the lowercased
77
+ strings. Not phonetic-perfect (no Soundex / Metaphone) but good
78
+ enough to reject "Henry" -> "Arjun" type hallucinations from
79
+ Gemini. Anything below 0.5 is "not really similar".
80
+ """
81
+ from difflib import SequenceMatcher
82
+ return SequenceMatcher(None, a.lower(), b.lower()).ratio()
83
+
84
+
85
+ def normalize_name(
86
+ transcribed: str,
87
+ candidates: Iterable[str],
88
+ api_key: Optional[str] = None,
89
+ ) -> str:
90
+ """Map a transcribed name to the closest candidate via Gemini.
91
+
92
+ Returns the original ``transcribed`` value unchanged when:
93
+ - GEMINI_API_KEY is unset
94
+ - the http call fails / times out / returns non-2xx
95
+ - the response is empty or obviously wrong
96
+ - Gemini's choice is not phonetically similar to the transcribed value
97
+ """
98
+ raw = (transcribed or "").strip()
99
+ if not raw:
100
+ return raw
101
+
102
+ # ──────────────────────────────────────────────────────────────────
103
+ # DISABLED (2026-05-21): Gemini-3.5-flash name normalizer.
104
+ # Built when OpenAI's gpt-4o-transcribe was mishearing short names
105
+ # (Arav -> Lee Win). Gemini Live hears names cleanly natively, so the
106
+ # extra REST call to gemini-3.5-flash before every register_guest /
107
+ # lookup_employee was pure latency (~1-2s per call) plus 429s on the
108
+ # free tier. Re-enable by removing this early-return when switching
109
+ # VOICE_BACKEND back to "openai".
110
+ # ──────────────────────────────────────────────────────────────────
111
+ return raw
112
+
113
+ key = api_key or os.getenv("GEMINI_API_KEY", "").strip()
114
+ if not key:
115
+ logger.debug("normalize_name: no GEMINI_API_KEY, returning original")
116
+ return raw
117
+
118
+ cand_list = [c for c in (candidates or []) if c and isinstance(c, str)]
119
+ if not cand_list:
120
+ return raw
121
+
122
+ # Pre-filter: only ask Gemini about candidates with at least some
123
+ # surface similarity to the transcribed name. Cuts API cost AND
124
+ # prevents Gemini from being prompted with totally-unrelated options.
125
+ _MIN_SIMILARITY = 0.4
126
+ similar = [c for c in cand_list if _phonetic_similarity(raw, c) >= _MIN_SIMILARITY]
127
+ if not similar:
128
+ logger.debug(
129
+ "normalize_name: no candidate similar enough to %r (best=%.2f) — keeping original",
130
+ raw, max((_phonetic_similarity(raw, c) for c in cand_list), default=0.0),
131
+ )
132
+ return raw
133
+ cand_list = similar
134
+
135
+ payload = {
136
+ "contents": [{
137
+ "role": "user",
138
+ "parts": [{"text": _build_prompt(raw, cand_list)}],
139
+ }],
140
+ "generationConfig": {
141
+ "temperature": 0.0,
142
+ "maxOutputTokens": 24,
143
+ },
144
+ }
145
+ try:
146
+ resp = httpx.post(
147
+ f"{_gemini_url()}?key={key}",
148
+ json=payload,
149
+ timeout=_HTTP_TIMEOUT_SECONDS,
150
+ )
151
+ if resp.status_code >= 400:
152
+ logger.debug("normalize_name: gemini HTTP %d: %s", resp.status_code, resp.text[:200])
153
+ return raw
154
+ data = resp.json()
155
+ except Exception as e:
156
+ logger.debug("normalize_name: gemini call failed (%s)", e)
157
+ return raw
158
+
159
+ try:
160
+ candidate_text = (
161
+ data["candidates"][0]["content"]["parts"][0]["text"]
162
+ ).strip()
163
+ except Exception:
164
+ logger.debug("normalize_name: unexpected gemini response shape: %s", json.dumps(data)[:200])
165
+ return raw
166
+
167
+ # Sanitize: Gemini sometimes adds punctuation despite the instruction.
168
+ for ch in ('"', "'", "."):
169
+ candidate_text = candidate_text.replace(ch, "")
170
+ candidate_text = candidate_text.strip()
171
+ if not candidate_text:
172
+ return raw
173
+
174
+ # Only trust a Gemini reply that is either (a) exactly one of the
175
+ # candidates (case-insensitive) or (b) the original transcript. This
176
+ # prevents Gemini from hallucinating a new name we never gave it.
177
+ cand_lower = {c.lower(): c for c in cand_list}
178
+ g_lower = candidate_text.lower()
179
+ if g_lower in cand_lower:
180
+ chosen = cand_lower[g_lower]
181
+ if chosen.lower() != raw.lower():
182
+ logger.info(
183
+ "normalize_name: '%s' -> '%s' (gemini disambiguation)",
184
+ raw, chosen,
185
+ )
186
+ return chosen
187
+ if g_lower == raw.lower():
188
+ return raw
189
+ logger.debug(
190
+ "normalize_name: gemini returned %r which isn't in candidates %r — keeping original %r",
191
+ candidate_text, cand_list, raw,
192
+ )
193
+ return raw
194
+
195
+
196
+ def collect_known_names() -> list[str]:
197
+ """Return employee names + aliases for the candidate list.
198
+
199
+ Used by ``register_guest`` (visitor name disambiguation against
200
+ employees, NOT against calendar visitors) and ``lookup_employee``
201
+ (host name -> directory entries). Calendar visitor names are
202
+ excluded — including them was causing Gemini to map every
203
+ transcribed visitor name onto the next-scheduled visitor.
204
+
205
+ Failures are silenced so a degraded source never blocks the lookup.
206
+ """
207
+ names: list[str] = []
208
+ try:
209
+ from reachy_mini_receptionist import employees
210
+ for emp in employees.get_all_employees():
211
+ n = (emp.get("name") or "").strip()
212
+ if n:
213
+ names.append(n)
214
+ for alias in (emp.get("aliases") or []):
215
+ a = (alias or "").strip()
216
+ if a:
217
+ names.append(a)
218
+ except Exception:
219
+ pass
220
+ seen: set[str] = set()
221
+ unique: list[str] = []
222
+ for n in names:
223
+ k = n.lower()
224
+ if k in seen:
225
+ continue
226
+ seen.add(k)
227
+ unique.append(n)
228
+ return unique
src/reachy_mini_receptionist/openai_realtime.py ADDED
@@ -0,0 +1,1839 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import uuid
4
+ import base64
5
+ import random
6
+ import asyncio
7
+ import logging
8
+ import threading
9
+ import time
10
+ from typing import Any, Final, Tuple, Literal, Optional
11
+ from pathlib import Path
12
+ from datetime import datetime
13
+
14
+ import cv2
15
+ import numpy as np
16
+ import gradio as gr
17
+ from openai import AsyncOpenAI
18
+ from fastrtc import AdditionalOutputs, AsyncStreamHandler, wait_for_item, audio_to_int16
19
+ from numpy.typing import NDArray
20
+ from scipy.signal import resample
21
+ from websockets.exceptions import ConnectionClosedError
22
+
23
+ from reachy_mini_receptionist.config import config
24
+ from reachy_mini_receptionist.prompts import get_session_voice, get_session_instructions
25
+ from reachy_mini_receptionist.tools.core_tools import (
26
+ ToolDependencies,
27
+ get_tool_specs,
28
+ )
29
+ from reachy_mini_receptionist.tools.background_tool_manager import (
30
+ ToolCallRoutine,
31
+ ToolNotification,
32
+ BackgroundToolManager,
33
+ )
34
+
35
+
36
+ logger = logging.getLogger(__name__)
37
+
38
+ OPEN_AI_INPUT_SAMPLE_RATE: Final[Literal[24000]] = 24000
39
+ OPEN_AI_OUTPUT_SAMPLE_RATE: Final[Literal[24000]] = 24000
40
+
41
+ # Cost tracking from usage data (pricing as of Feb 2026 https://openai.com/api/pricing/)
42
+ AUDIO_INPUT_COST_PER_1M = 32.0
43
+ AUDIO_OUTPUT_COST_PER_1M = 64.0
44
+ TEXT_INPUT_COST_PER_1M = 4.0
45
+ TEXT_OUTPUT_COST_PER_1M = 16.0
46
+ IMAGE_INPUT_COST_PER_1M = 5.0
47
+
48
+ _RESPONSE_DONE_TIMEOUT: Final[float] = 30.0
49
+
50
+ # How often to rebuild the STT bias prompt + push it to the realtime session
51
+ # so calendar additions made AFTER the session connected (visitors added on
52
+ # the fly) still benefit from name-biased transcription. 300s aligns with
53
+ # the iCal cache TTL so each refresh either reuses the cache or triggers
54
+ # at most one fetch.
55
+ _STT_BIAS_REFRESH_SECONDS: Final[float] = 300.0
56
+
57
+
58
+ def _format_bias_prompt(names: list[str]) -> str:
59
+ seen: set[str] = set()
60
+ unique: list[str] = []
61
+ for n in names:
62
+ key = n.lower()
63
+ if key in seen:
64
+ continue
65
+ seen.add(key)
66
+ unique.append(n)
67
+
68
+ if not unique:
69
+ return (
70
+ "Reception lobby check-in conversation. Visitor names and host names."
71
+ )
72
+
73
+ # Plain comma-separated bias list. We tried beefing this up with
74
+ # "I am <name>. Here to see <name>." sentences per name to increase
75
+ # the bias signal — but gpt-4o-transcribe started ECHOING those
76
+ # sentences back as the user transcript when audio was unclear,
77
+ # so real visitor speech ("I am David, here to see Andrew") was
78
+ # being mistranscribed as "I am David. Here to see David." (a
79
+ # name from the prompt copied to both slots). Going back to the
80
+ # simple list keeps the bias direction without the echo failure.
81
+ #
82
+ # OpenAI's Realtime API caps the transcription prompt at 1024 chars.
83
+ _MAX_PROMPT_CHARS = 1000
84
+ body = ", ".join(unique)
85
+ msg = (
86
+ "Reception lobby check-in. Expected visitor and host names include: "
87
+ + body
88
+ + "."
89
+ )
90
+ if len(msg) > _MAX_PROMPT_CHARS:
91
+ msg = msg[: _MAX_PROMPT_CHARS - 1] + "."
92
+ return msg
93
+
94
+
95
+ def _collect_employee_names() -> list[str]:
96
+ names: list[str] = []
97
+ try:
98
+ from reachy_mini_receptionist import employees
99
+ for emp in employees.get_all_employees():
100
+ n = (emp.get("name") if isinstance(emp, dict) else getattr(emp, "name", "")) or ""
101
+ n = n.strip()
102
+ if n:
103
+ names.append(n)
104
+ aliases = (
105
+ emp.get("aliases") if isinstance(emp, dict) else getattr(emp, "aliases", None)
106
+ ) or []
107
+ for alias in aliases:
108
+ a = (alias or "").strip()
109
+ if a:
110
+ names.append(a)
111
+ except Exception:
112
+ pass
113
+ return names
114
+
115
+
116
+ def _collect_appointment_names_sync() -> list[str]:
117
+ # See _collect_appointment_names_async — calendar visitor names are
118
+ # intentionally excluded from STT bias to stop the model defaulting
119
+ # every short utterance to the next scheduled visitor.
120
+ return []
121
+
122
+
123
+ async def _collect_appointment_names_async() -> list[str]:
124
+ # Calendar visitor names are intentionally NOT included in the STT
125
+ # bias prompt. We observed STT picking "Henry" (the next calendar
126
+ # entry) for any short utterance from a visitor, because the bias
127
+ # heavily prefers names that appear in the prompt. The employee
128
+ # directory provides enough name coverage for hosts; visitor names
129
+ # have to be heard fresh from speech.
130
+ return []
131
+
132
+
133
+ def _build_transcription_bias_prompt() -> str:
134
+ """Sync variant — kept for callers outside an event loop. Blocks on iCal
135
+ HTTP. Prefer ``_build_transcription_bias_prompt_async`` in async paths.
136
+ """
137
+ return _format_bias_prompt(_collect_appointment_names_sync() + _collect_employee_names())
138
+
139
+
140
+ async def _build_transcription_bias_prompt_async() -> str:
141
+ """Assemble the STT bias prompt without blocking the event loop on iCal.
142
+
143
+ OpenAI's transcription API accepts a free-form ``prompt`` string. When
144
+ the recognizer hears mumbled audio it leans toward words/phrases in this
145
+ prompt. Feeding today's calendar visitors + the employee directory
146
+ makes non-English names (Mukul, Krishna, Shyam, etc.) far more likely
147
+ to come back correct instead of collapsing to "Michael"/"Christina".
148
+
149
+ Fails open — if a source raises, the generic prompt is returned.
150
+ """
151
+ return _format_bias_prompt(
152
+ await _collect_appointment_names_async() + _collect_employee_names()
153
+ )
154
+
155
+
156
+ def _compute_response_cost(usage: Any) -> float:
157
+ """Compute dollar cost from a response usage object."""
158
+ inp = getattr(usage, "input_token_details", None)
159
+ out = getattr(usage, "output_token_details", None)
160
+ cost = 0.0
161
+ if inp:
162
+ cost += (getattr(inp, "audio_tokens", 0) or 0) * AUDIO_INPUT_COST_PER_1M / 1e6
163
+ cost += (getattr(inp, "text_tokens", 0) or 0) * TEXT_INPUT_COST_PER_1M / 1e6
164
+ cost += (getattr(inp, "image_tokens", 0) or 0) * IMAGE_INPUT_COST_PER_1M / 1e6
165
+ if out:
166
+ cost += (getattr(out, "audio_tokens", 0) or 0) * AUDIO_OUTPUT_COST_PER_1M / 1e6
167
+ cost += (getattr(out, "text_tokens", 0) or 0) * TEXT_OUTPUT_COST_PER_1M / 1e6
168
+ return cost
169
+
170
+
171
+ class OpenaiRealtimeHandler(AsyncStreamHandler):
172
+ """An OpenAI realtime handler for fastrtc Stream."""
173
+
174
+ def __init__(
175
+ self,
176
+ deps: ToolDependencies,
177
+ gradio_mode: bool = False,
178
+ instance_path: Optional[str] = None,
179
+ session_manager: Any | None = None,
180
+ controller: Any | None = None,
181
+ ):
182
+ """Initialize the handler.
183
+
184
+ ``session_manager`` and ``controller`` are optional so existing test
185
+ harnesses that construct the handler directly keep working.
186
+ """
187
+ super().__init__(
188
+ expected_layout="mono",
189
+ output_sample_rate=OPEN_AI_OUTPUT_SAMPLE_RATE,
190
+ input_sample_rate=OPEN_AI_INPUT_SAMPLE_RATE,
191
+ )
192
+
193
+ # Override typing of the sample rates to match OpenAI's requirements
194
+ self.output_sample_rate: Literal[24000] = self.output_sample_rate
195
+ self.input_sample_rate: Literal[24000] = self.input_sample_rate
196
+
197
+ self.deps = deps
198
+ self._session_manager = session_manager
199
+ self._controller = controller
200
+
201
+ # Override type annotations for OpenAI strict typing (only for values used in API)
202
+ self.output_sample_rate = OPEN_AI_OUTPUT_SAMPLE_RATE
203
+ self.input_sample_rate = OPEN_AI_INPUT_SAMPLE_RATE
204
+
205
+ self.connection: Any = None
206
+ self.output_queue: "asyncio.Queue[Tuple[int, NDArray[np.int16]] | AdditionalOutputs]" = asyncio.Queue()
207
+
208
+ self.last_activity_time = asyncio.get_event_loop().time()
209
+ self.start_time = asyncio.get_event_loop().time()
210
+ self.is_idle_tool_call = False
211
+ self.gradio_mode = gradio_mode
212
+ self.instance_path = instance_path
213
+ # Track how the API key was provided (env vs textbox) and its value
214
+ self._key_source: Literal["env", "textbox"] = "env"
215
+ self._provided_api_key: str | None = None
216
+
217
+ # Debouncing for partial transcripts
218
+ self.partial_transcript_task: asyncio.Task[None] | None = None
219
+ self.partial_transcript_sequence: int = 0 # sequence counter to prevent stale emissions
220
+ self.partial_debounce_delay = 0.5 # seconds
221
+
222
+ # Internal lifecycle flags
223
+ self._shutdown_requested: bool = False
224
+ self._connected_event: asyncio.Event = asyncio.Event()
225
+
226
+ # Background tool manager
227
+ self.tool_manager = BackgroundToolManager()
228
+
229
+ # Cost tracking
230
+ self.cumulative_cost: float = 0.0
231
+
232
+ # Response-in-progress guard: the Realtime API only allows one active
233
+ # response per conversation at a time. A dedicated worker task
234
+ # (_response_sender_loop) dequeues and sends one request at a time
235
+ self._pending_responses: asyncio.Queue[dict[str, Any]] = asyncio.Queue()
236
+ self._response_done_event: asyncio.Event = asyncio.Event()
237
+ self._response_done_event.set()
238
+ self._last_response_rejected: bool = False
239
+ self._runtime_loop: asyncio.AbstractEventLoop | None = None
240
+
241
+ # Last successfully pushed external face context event.
242
+ self._face_event_lock = threading.Lock()
243
+ self._last_face_event_sent: dict[str, Any] | None = None
244
+ # Last pending external face event waiting for runtime loop/connection.
245
+ self._pending_face_event_lock = threading.Lock()
246
+ self._pending_face_event: dict[str, Any] | None = None
247
+
248
+ # Tool args by call_id, populated when a tool starts and consumed
249
+ # when its result arrives (so the ConversationController gets both
250
+ # the args and the result).
251
+ self._tool_call_args: dict[str, dict[str, Any]] = {}
252
+
253
+ # Last successfully pushed session context event + pending buffer.
254
+ self._session_event_lock = threading.Lock()
255
+ self._last_session_event_sent: dict[str, Any] | None = None
256
+ self._pending_session_event_lock = threading.Lock()
257
+ self._pending_session_event: dict[str, Any] | None = None
258
+
259
+ # One-shot guard so the IDLE-state workflow hint is only pushed
260
+ # once per IDLE stretch, not on every speech_started event.
261
+ self._idle_speech_cue_pushed: bool = False
262
+
263
+ # Last STT bias prompt actually sent to the realtime session, so the
264
+ # periodic refresh loop can skip the session.update when nothing has
265
+ # changed (calendar quiet, employee directory unchanged).
266
+ self._last_stt_bias_prompt: Optional[str] = None
267
+
268
+ def _stash_pending_face_event(self, face_event: dict[str, Any]) -> None:
269
+ """Keep only the latest pending face event for eventual delivery."""
270
+ with self._pending_face_event_lock:
271
+ self._pending_face_event = dict(face_event)
272
+
273
+ def _pop_pending_face_event(self) -> dict[str, Any] | None:
274
+ """Pop and clear latest pending face event."""
275
+ with self._pending_face_event_lock:
276
+ pending = self._pending_face_event
277
+ self._pending_face_event = None
278
+ return pending
279
+
280
+ async def _flush_pending_face_event(self) -> None:
281
+ """Try sending one buffered face event once the session is ready."""
282
+ pending = self._pop_pending_face_event()
283
+ if pending is None:
284
+ return
285
+ try:
286
+ await self._push_face_context_event(pending)
287
+ except Exception as e:
288
+ logger.debug("Failed to flush pending face event: %s", e)
289
+ self._stash_pending_face_event(pending)
290
+
291
+ def copy(self) -> "OpenaiRealtimeHandler":
292
+ """Create a copy of the handler."""
293
+ return OpenaiRealtimeHandler(
294
+ self.deps,
295
+ self.gradio_mode,
296
+ self.instance_path,
297
+ session_manager=self._session_manager,
298
+ controller=self._controller,
299
+ )
300
+
301
+ async def apply_personality(self, profile: str | None) -> str:
302
+ """Apply a new personality (profile) at runtime if possible.
303
+
304
+ - Updates the global config's selected profile for subsequent calls.
305
+ - If a realtime connection is active, sends a session.update with the
306
+ freshly resolved instructions so the change takes effect immediately.
307
+
308
+ Returns a short status message for UI feedback.
309
+ """
310
+ try:
311
+ # Update the in-process config value and env
312
+ from reachy_mini_receptionist.config import config as _config
313
+ from reachy_mini_receptionist.config import set_custom_profile
314
+
315
+ set_custom_profile(profile)
316
+ logger.info(
317
+ "Set custom profile to %r (config=%r)", profile, getattr(_config, "REACHY_MINI_CUSTOM_PROFILE", None)
318
+ )
319
+
320
+ try:
321
+ instructions = get_session_instructions()
322
+ voice = get_session_voice()
323
+ except BaseException as e: # catch SystemExit from prompt loader without crashing
324
+ logger.error("Failed to resolve personality content: %s", e)
325
+ return f"Failed to apply personality: {e}"
326
+
327
+ # Attempt a live update first, then force a full restart to ensure it sticks
328
+ if self.connection is not None:
329
+ try:
330
+ await self.connection.session.update(
331
+ session={
332
+ "type": "realtime",
333
+ "instructions": instructions,
334
+ "audio": {"output": {"voice": voice}},
335
+ },
336
+ )
337
+ logger.info("Applied personality via live update: %s", profile or "built-in default")
338
+ except Exception as e:
339
+ logger.warning("Live update failed; will restart session: %s", e)
340
+
341
+ # Force a real restart to guarantee the new instructions/voice
342
+ try:
343
+ await self._restart_session()
344
+ return "Applied personality and restarted realtime session."
345
+ except Exception as e:
346
+ logger.warning("Failed to restart session after apply: %s", e)
347
+ return "Applied personality. Will take effect on next connection."
348
+ else:
349
+ logger.info(
350
+ "Applied personality recorded: %s (no live connection; will apply on next session)",
351
+ profile or "built-in default",
352
+ )
353
+ return "Applied personality. Will take effect on next connection."
354
+ except Exception as e:
355
+ logger.error("Error applying personality '%s': %s", profile, e)
356
+ return f"Failed to apply personality: {e}"
357
+
358
+ async def _emit_debounced_partial(self, transcript: str, sequence: int) -> None:
359
+ """Emit partial transcript after debounce delay."""
360
+ try:
361
+ await asyncio.sleep(self.partial_debounce_delay)
362
+ # Only emit if this is still the latest partial (by sequence number)
363
+ if self.partial_transcript_sequence == sequence:
364
+ await self.output_queue.put(AdditionalOutputs({"role": "user_partial", "content": transcript}))
365
+ logger.debug(f"Debounced partial emitted: {transcript}")
366
+ except asyncio.CancelledError:
367
+ logger.debug("Debounced partial cancelled")
368
+ raise
369
+
370
+ async def start_up(self) -> None:
371
+ """Start the handler with minimal retries on unexpected websocket closure."""
372
+ self._runtime_loop = asyncio.get_running_loop()
373
+ openai_api_key = config.OPENAI_API_KEY
374
+ if self.gradio_mode and not openai_api_key:
375
+ # api key was not found in .env or in the environment variables
376
+ await self.wait_for_args() # type: ignore[no-untyped-call]
377
+ args = list(self.latest_args)
378
+ textbox_api_key = args[3] if len(args[3]) > 0 else None
379
+ if textbox_api_key is not None:
380
+ openai_api_key = textbox_api_key
381
+ self._key_source = "textbox"
382
+ self._provided_api_key = textbox_api_key
383
+ else:
384
+ openai_api_key = config.OPENAI_API_KEY
385
+ else:
386
+ if not openai_api_key or not openai_api_key.strip():
387
+ # In headless console mode, LocalStream now blocks startup until the key is provided.
388
+ # However, unit tests may invoke this handler directly with a stubbed client.
389
+ # To keep tests hermetic without requiring a real key, fall back to a placeholder.
390
+ logger.warning("OPENAI_API_KEY missing. Proceeding with a placeholder (tests/offline).")
391
+ openai_api_key = "DUMMY"
392
+
393
+ self.client = AsyncOpenAI(api_key=openai_api_key)
394
+
395
+ max_attempts = 3
396
+ for attempt in range(1, max_attempts + 1):
397
+ try:
398
+ await self._run_realtime_session()
399
+ # Normal exit from the session, stop retrying
400
+ return
401
+ except ConnectionClosedError as e:
402
+ # Abrupt close (e.g., "no close frame received or sent") → retry
403
+ logger.warning("Realtime websocket closed unexpectedly (attempt %d/%d): %s", attempt, max_attempts, e)
404
+ if attempt < max_attempts:
405
+ # exponential backoff with jitter
406
+ base_delay = 2 ** (attempt - 1) # 1s, 2s, 4s, 8s, etc.
407
+ jitter = random.uniform(0, 0.5)
408
+ delay = base_delay + jitter
409
+ logger.info("Retrying in %.1f seconds...", delay)
410
+ await asyncio.sleep(delay)
411
+ continue
412
+ raise
413
+ finally:
414
+ # never keep a stale reference
415
+ self.connection = None
416
+ try:
417
+ self._connected_event.clear()
418
+ except Exception:
419
+ pass
420
+
421
+ def notify_external_face_event(self, face_event: dict[str, Any]) -> None:
422
+ """Thread-safe entrypoint for face worker state transition events.
423
+
424
+ This injects context into the conversation via conversation.item.create
425
+ without forcing a response.
426
+ """
427
+ loop = self._runtime_loop
428
+ if loop is None or loop.is_closed():
429
+ logger.debug("Deferring face event (runtime loop not ready): %s", face_event)
430
+ self._stash_pending_face_event(face_event)
431
+ return
432
+
433
+ try:
434
+ future = asyncio.run_coroutine_threadsafe(self._push_face_context_event(face_event), loop)
435
+
436
+ def _on_done(fut: "asyncio.Future[None]") -> None:
437
+ try:
438
+ fut.result()
439
+ except Exception as e:
440
+ logger.debug("Face context push failed: %s", e)
441
+ self._stash_pending_face_event(face_event)
442
+
443
+ future.add_done_callback(_on_done)
444
+ except Exception as e:
445
+ logger.debug("Failed to schedule face context event: %s", e)
446
+ self._stash_pending_face_event(face_event)
447
+
448
+ async def _push_face_context_event(self, face_event: dict[str, Any]) -> None:
449
+ """Push a face state change as external context without triggering a response."""
450
+ if not self.connection:
451
+ logger.debug("Deferring face context event (no connection): %s", face_event)
452
+ self._stash_pending_face_event(face_event)
453
+ return
454
+
455
+ state = str(face_event.get("state", "unknown"))
456
+ name = face_event.get("name")
457
+ previous_state = str(face_event.get("previous_state", "unknown"))
458
+ previous_name = face_event.get("previous_name")
459
+ lbph_conf = face_event.get("lbph_confidence", 0.0)
460
+ det_conf = face_event.get("detection_confidence", 0.0)
461
+
462
+ msg = (
463
+ f"[External face update {self.format_timestamp()}] "
464
+ f"state={state}; name={name}; previous_state={previous_state}; "
465
+ f"previous_name={previous_name}; lbph={lbph_conf}; det={det_conf}. "
466
+ "This is context only. Do not respond unless the user speaks."
467
+ )
468
+
469
+ await self.connection.conversation.item.create(
470
+ item={
471
+ "type": "message",
472
+ "role": "user",
473
+ "content": [{"type": "input_text", "text": msg}],
474
+ },
475
+ )
476
+
477
+ sent_at_epoch = time.time()
478
+ sent_payload = {
479
+ "state": state,
480
+ "name": name,
481
+ "previous_state": previous_state,
482
+ "previous_name": previous_name,
483
+ "lbph_confidence": float(lbph_conf),
484
+ "detection_confidence": float(det_conf),
485
+ "sent_at": sent_at_epoch,
486
+ "sent_at_iso": datetime.fromtimestamp(sent_at_epoch).strftime("%Y-%m-%d %H:%M:%S"),
487
+ }
488
+ with self._face_event_lock:
489
+ self._last_face_event_sent = sent_payload
490
+
491
+ logger.info("Pushed external face context event: %s", msg)
492
+
493
+ async def _prime_no_face_context(self) -> None:
494
+ """Prime a fresh session with an explicit no-face context event."""
495
+ try:
496
+ await self._push_face_context_event(
497
+ {
498
+ "event": "face_state_changed",
499
+ "state": "no_face",
500
+ "name": None,
501
+ "previous_state": "unknown",
502
+ "previous_name": None,
503
+ "lbph_confidence": 0.0,
504
+ "detection_confidence": 0.0,
505
+ "timestamp": time.time(),
506
+ }
507
+ )
508
+ logger.info("Primed startup face context: state=no_face")
509
+ except Exception as e:
510
+ logger.debug("Failed to prime startup no-face context: %s", e)
511
+
512
+ def get_last_face_event_sent(self) -> dict[str, Any] | None:
513
+ """Return the last face context event that was successfully sent to the model."""
514
+ with self._face_event_lock:
515
+ if self._last_face_event_sent is None:
516
+ return None
517
+ return dict(self._last_face_event_sent)
518
+
519
+ async def _push_idle_speech_cue_if_needed(self) -> None:
520
+ """Push the IDLE-state workflow hint when a visitor speaks first.
521
+
522
+ Per-state hints normally arrive via ``notify_session_event`` on
523
+ transitions. While state stays IDLE no transition fires, so the
524
+ LLM has no guidance when the camera hasn't caught the visitor's
525
+ face yet. We rate-limit to once per IDLE stretch so we don't
526
+ push the same cue on every breath.
527
+ """
528
+ if not self.connection:
529
+ return
530
+ if self._session_manager is None:
531
+ return
532
+ try:
533
+ from reachy_mini_receptionist.receptionist_state import ReceptionState
534
+ from reachy_mini_receptionist.conversation_controller import next_action_hint
535
+ current = self._session_manager.current_state
536
+ if current != ReceptionState.IDLE:
537
+ return
538
+ hint = next_action_hint(current)
539
+ if not hint:
540
+ return
541
+ if getattr(self, "_idle_speech_cue_pushed", False):
542
+ return
543
+ msg = (
544
+ f"[Backend idle-speech cue {self.format_timestamp()}] "
545
+ f"Visitor just started speaking while state=idle. {hint}"
546
+ )
547
+ await self.connection.conversation.item.create(
548
+ item={
549
+ "type": "message",
550
+ "role": "user",
551
+ "content": [{"type": "input_text", "text": msg}],
552
+ },
553
+ )
554
+ self._idle_speech_cue_pushed = True
555
+ logger.info("Pushed idle-speech cue: %s", msg)
556
+ except Exception as e:
557
+ logger.debug("idle-speech cue push error: %s", e)
558
+
559
+ # ------------------------------------------------------------------
560
+ # Session context events (mirror of the face-event channel)
561
+ # ------------------------------------------------------------------
562
+
563
+ def _stash_pending_session_event(self, payload: dict[str, Any]) -> None:
564
+ with self._pending_session_event_lock:
565
+ self._pending_session_event = dict(payload)
566
+
567
+ def _pop_pending_session_event(self) -> dict[str, Any] | None:
568
+ with self._pending_session_event_lock:
569
+ pending = self._pending_session_event
570
+ self._pending_session_event = None
571
+ return pending
572
+
573
+ async def _flush_pending_session_event(self) -> None:
574
+ pending = self._pop_pending_session_event()
575
+ if pending is None:
576
+ return
577
+ try:
578
+ await self._push_session_context_event(pending)
579
+ except Exception as e:
580
+ logger.debug("Failed to flush pending session event: %s", e)
581
+ self._stash_pending_session_event(pending)
582
+
583
+ def notify_session_event(
584
+ self,
585
+ previous_state: Any,
586
+ new_state: Any,
587
+ snapshot: Any,
588
+ ) -> None:
589
+ """Subscriber callback for SessionManager — thread-safe.
590
+
591
+ Schedules a coroutine on the runtime loop that pushes the session
592
+ state change to the LLM as a context-only conversation item.
593
+ """
594
+ try:
595
+ payload = {
596
+ "previous_state": getattr(previous_state, "value", str(previous_state)),
597
+ "new_state": getattr(new_state, "value", str(new_state)),
598
+ "snapshot": snapshot.to_dict() if hasattr(snapshot, "to_dict") else {},
599
+ }
600
+ except Exception as e:
601
+ logger.debug("notify_session_event: failed to build payload: %s", e)
602
+ return
603
+
604
+ # Reset the idle-speech cue flag whenever the session transitions
605
+ # back to IDLE (visitor walked away, session reset, timeout). The
606
+ # next visitor's first utterance will re-push the cue.
607
+ try:
608
+ new_state_value = payload["new_state"]
609
+ if new_state_value == "idle":
610
+ self._idle_speech_cue_pushed = False
611
+ except Exception:
612
+ pass
613
+
614
+ loop = self._runtime_loop
615
+ if loop is None or loop.is_closed():
616
+ logger.debug("Deferring session event (runtime loop not ready): %s", payload)
617
+ self._stash_pending_session_event(payload)
618
+ return
619
+
620
+ try:
621
+ future = asyncio.run_coroutine_threadsafe(
622
+ self._push_session_context_event(payload), loop
623
+ )
624
+
625
+ def _on_done(fut: "asyncio.Future[None]") -> None:
626
+ try:
627
+ fut.result()
628
+ except Exception as e:
629
+ logger.debug("Session context push failed: %s", e)
630
+ self._stash_pending_session_event(payload)
631
+
632
+ future.add_done_callback(_on_done)
633
+ except Exception as e:
634
+ logger.debug("Failed to schedule session context event: %s", e)
635
+ self._stash_pending_session_event(payload)
636
+
637
+ async def _push_session_context_event(self, payload: dict[str, Any]) -> None:
638
+ """Push a session state change as context-only conversation item.
639
+
640
+ Includes a per-state ``Next:`` directive (from
641
+ ``conversation_controller.next_action_hint``) so the LLM knows what
642
+ to do when the visitor next speaks — without that workflow being
643
+ baked into the system prompt.
644
+ """
645
+ if not self.connection:
646
+ logger.debug("Deferring session context event (no connection): %s", payload)
647
+ self._stash_pending_session_event(payload)
648
+ return
649
+
650
+ snap = payload.get("snapshot") or {}
651
+ new_state_value = payload.get("new_state")
652
+
653
+ hint = ""
654
+ speak_now = False
655
+ try:
656
+ from reachy_mini_receptionist.conversation_controller import (
657
+ next_action_hint,
658
+ should_speak_immediately,
659
+ )
660
+ from reachy_mini_receptionist.receptionist_state import ReceptionState
661
+ if new_state_value:
662
+ new_state_enum = ReceptionState(new_state_value)
663
+ hint = next_action_hint(new_state_enum)
664
+ speak_now = should_speak_immediately(new_state_enum)
665
+ except Exception as e:
666
+ logger.debug("Could not compute next_action_hint: %s", e)
667
+
668
+ base = (
669
+ f"[Backend session update {self.format_timestamp()}] "
670
+ f"state: {payload.get('previous_state')} -> {new_state_value}; "
671
+ f"visitor={snap.get('visitor_name')}; "
672
+ f"employee={snap.get('employee_name')}; "
673
+ f"appointment={(snap.get('matched_appointment') or {}).get('time')}; "
674
+ f"email_sent_to={snap.get('email_sent_to')}."
675
+ )
676
+
677
+ if hint and speak_now:
678
+ # The visitor is waiting for the bot to finish a sequence it
679
+ # started. Tell the LLM to speak now — no "stay quiet" suffix.
680
+ msg = f"{base} SPEAK NOW: {hint}"
681
+ elif hint:
682
+ # State change happened passively (face event, etc.) — the bot
683
+ # should not blurt anything; act on the hint when the visitor
684
+ # next speaks.
685
+ msg = (
686
+ f"{base} Next: {hint} "
687
+ "(Stay quiet until the visitor speaks; this is context only.)"
688
+ )
689
+ else:
690
+ msg = f"{base} This is context only. Do not respond unless the user speaks."
691
+
692
+ await self.connection.conversation.item.create(
693
+ item={
694
+ "type": "message",
695
+ "role": "user",
696
+ "content": [{"type": "input_text", "text": msg}],
697
+ },
698
+ )
699
+
700
+ sent_payload = {**payload, "sent_at": time.time(), "hint": hint}
701
+ with self._session_event_lock:
702
+ self._last_session_event_sent = sent_payload
703
+
704
+ logger.info("Pushed session context event: %s", msg)
705
+
706
+ # For speak-now transitions, actively trigger a response so the
707
+ # LLM speaks immediately. Face-driven transitions to RECOGNIZED
708
+ # have no in-flight response cycle to piggyback on, and even
709
+ # tool-driven transitions previously stalled when the SPEAK NOW
710
+ # cue conflicted with the older "stay quiet" suffix. The sender
711
+ # worker serializes any duplicate response.create with the
712
+ # tool-result handler's call.
713
+ if speak_now:
714
+ try:
715
+ await self._safe_response_create(
716
+ response={
717
+ "instructions": (
718
+ "Use the latest [Backend session update] context "
719
+ "and speak to the visitor now. Keep it concise."
720
+ ),
721
+ },
722
+ )
723
+ except Exception as e:
724
+ logger.debug("Failed to queue speak-now response.create: %s", e)
725
+
726
+ def get_last_session_event_sent(self) -> dict[str, Any] | None:
727
+ """Return the last session context event sent to the model."""
728
+ with self._session_event_lock:
729
+ if self._last_session_event_sent is None:
730
+ return None
731
+ return dict(self._last_session_event_sent)
732
+
733
+ async def _restart_session(self) -> None:
734
+ """Force-close the current session and start a fresh one in background.
735
+
736
+ Does not block the caller while the new session is establishing.
737
+ """
738
+ try:
739
+ if self.connection is not None:
740
+ try:
741
+ await self.connection.close()
742
+ except Exception:
743
+ pass
744
+ finally:
745
+ self.connection = None
746
+
747
+ # Ensure we have a client (start_up must have run once)
748
+ if getattr(self, "client", None) is None:
749
+ logger.warning("Cannot restart: OpenAI client not initialized yet.")
750
+ return
751
+
752
+ # Fire-and-forget new session and wait briefly for connection
753
+ try:
754
+ self._connected_event.clear()
755
+ except Exception:
756
+ pass
757
+ asyncio.create_task(self._run_realtime_session(), name="openai-realtime-restart")
758
+ try:
759
+ await asyncio.wait_for(self._connected_event.wait(), timeout=5.0)
760
+ logger.info("Realtime session restarted and connected.")
761
+ except asyncio.TimeoutError:
762
+ logger.warning("Realtime session restart timed out; continuing in background.")
763
+ except Exception as e:
764
+ logger.warning("_restart_session failed: %s", e)
765
+
766
+ async def _safe_response_create(self, **kwargs: Any) -> None:
767
+ """Enqueue a response.create() kwargs for the sender worker _response_sender_loop().
768
+
769
+ This method never blocks the caller.
770
+ """
771
+ await self._pending_responses.put(kwargs)
772
+
773
+ async def _stt_bias_refresh_loop(self) -> None:
774
+ """DISABLED for the demo cut. The periodic session.update was
775
+ suspected of breaking server VAD mid-session (yes/no replies
776
+ stopped being captured). Re-enable after the demo with more
777
+ testing.
778
+ """
779
+ return
780
+ try: # noqa: unreachable until re-enabled
781
+ while self.connection:
782
+ try:
783
+ await asyncio.sleep(_STT_BIAS_REFRESH_SECONDS)
784
+ except asyncio.CancelledError:
785
+ return
786
+
787
+ if not self.connection:
788
+ return
789
+
790
+ try:
791
+ prompt = await _build_transcription_bias_prompt_async()
792
+ except Exception as e:
793
+ logger.debug("STT bias refresh: build failed: %s", e)
794
+ continue
795
+
796
+ if prompt == self._last_stt_bias_prompt:
797
+ continue
798
+
799
+ try:
800
+ # Re-send both transcription AND turn_detection together.
801
+ # The realtime API treats nested updates as full
802
+ # replacement of the parent object on some server
803
+ # versions — sending only `transcription` could reset
804
+ # `turn_detection` back to stock defaults (threshold
805
+ # 0.5 / silence 500ms) and silently change VAD behavior
806
+ # mid-conversation. Pinning both keeps VAD consistent.
807
+ await self.connection.session.update(
808
+ session={
809
+ "type": "realtime",
810
+ "audio": {
811
+ "input": {
812
+ "transcription": {
813
+ "model": "gpt-4o-transcribe",
814
+ "language": "en",
815
+ "prompt": prompt,
816
+ },
817
+ "turn_detection": {
818
+ "type": "server_vad",
819
+ "threshold": 0.5,
820
+ "silence_duration_ms": 600,
821
+ "prefix_padding_ms": 300,
822
+ "interrupt_response": True,
823
+ "create_response": True,
824
+ },
825
+ },
826
+ },
827
+ },
828
+ )
829
+ self._last_stt_bias_prompt = prompt
830
+ logger.info(
831
+ "STT bias refreshed (%d chars) — calendar/directory change detected",
832
+ len(prompt),
833
+ )
834
+ except Exception as e:
835
+ logger.debug("STT bias refresh: session.update failed: %s", e)
836
+ except asyncio.CancelledError:
837
+ return
838
+
839
+ def _just_entered_speak_now_state(self) -> bool:
840
+ """True if the session is currently in a SPEAK_NOW state.
841
+
842
+ Used in ``_handle_tool_result`` to skip the generic post-tool
843
+ narration when a controller-driven SPEAK_NOW transition has already
844
+ enqueued a state-specific ``response.create``.
845
+ """
846
+ if self._session_manager is None:
847
+ return False
848
+ try:
849
+ from reachy_mini_receptionist.conversation_controller import should_speak_immediately
850
+ return bool(should_speak_immediately(self._session_manager.current_state))
851
+ except Exception:
852
+ return False
853
+
854
+ async def _response_sender_loop(self) -> None:
855
+ """Dedicated worker that sends ``response.create()`` calls serially.
856
+
857
+ This logic was designed to comply with the response.create() docstring specification for event ordering:
858
+ https://github.com/openai/openai-python/blob/3e0c05b84a2056870abf3bd6a5e7849020209cc3/src/openai/resources/realtime/realtime.py#L649C1-L651C30
859
+
860
+ For each queued request the worker:
861
+ 1. Waits until no response is active (_response_done_event).
862
+ 2. Sends response.create().
863
+ 3. Waits for the response cycle to complete (response.done).
864
+ 4. If the server rejected with active_response, retries from step 1.
865
+ """
866
+ while self.connection:
867
+ try:
868
+ kwargs = await self._pending_responses.get()
869
+ except asyncio.CancelledError:
870
+ return
871
+
872
+ sent = False
873
+ max_retries = 5
874
+ attempts = 0
875
+ while not sent and self.connection and attempts < max_retries:
876
+ try:
877
+ await asyncio.wait_for(self._response_done_event.wait(), timeout=_RESPONSE_DONE_TIMEOUT)
878
+ except asyncio.TimeoutError:
879
+ logger.debug("Timed out waiting for previous response to finish; forcing ahead")
880
+ self._response_done_event.set()
881
+
882
+ if not self.connection:
883
+ break
884
+
885
+ self._last_response_rejected = False
886
+ try:
887
+ await self.connection.response.create(**kwargs)
888
+ except Exception as e:
889
+ logger.debug("_response_sender_loop: send failed: %s", e)
890
+ self._response_done_event.set()
891
+ break
892
+
893
+ try:
894
+ await asyncio.wait_for(self._response_done_event.wait(), timeout=_RESPONSE_DONE_TIMEOUT)
895
+ except asyncio.TimeoutError:
896
+ logger.debug("Timed out waiting for response.done; assuming response completed")
897
+ self._response_done_event.set()
898
+ break
899
+
900
+ # Check if we were rejected
901
+ if self._last_response_rejected:
902
+ attempts += 1
903
+ if attempts >= max_retries:
904
+ logger.debug("response.create rejected %d times; giving up", attempts)
905
+ break
906
+ logger.debug("response.create was rejected; retrying (%d/%d)", attempts, max_retries)
907
+ continue
908
+
909
+ sent = True
910
+
911
+ async def _handle_tool_result(self, bg_tool: ToolNotification) -> None:
912
+ """Process the result of a tool call."""
913
+ if bg_tool.error is not None:
914
+ logger.error("Tool '%s' (id=%s) failed with error: %s", bg_tool.tool_name, bg_tool.id, bg_tool.error)
915
+ tool_result = {"error": bg_tool.error}
916
+ elif bg_tool.result is not None:
917
+ tool_result = bg_tool.result
918
+ logger.info(
919
+ "Tool '%s' (id=%s) executed successfully.",
920
+ bg_tool.tool_name, bg_tool.id,
921
+ )
922
+ logger.debug("Tool '%s' full result: %s", bg_tool.tool_name, tool_result)
923
+ else:
924
+ logger.warning("Tool '%s' (id=%s) returned no result and no error", bg_tool.tool_name, bg_tool.id)
925
+ tool_result = {"error": "No result returned from tool execution"}
926
+
927
+ call_args = self._tool_call_args.pop(bg_tool.id, {})
928
+
929
+ # Connection may have closed while tool was running
930
+ if not self.connection:
931
+ logger.warning("Connection closed during tool '%s' (id=%s) execution; cannot send result back", bg_tool.tool_name, bg_tool.id)
932
+ # Even if we can't send the function_call_output, the controller
933
+ # still needs to advance state so the dashboard reflects reality
934
+ # and a future reconnect lands in the right state.
935
+ if self._controller is not None:
936
+ try:
937
+ await self._controller.on_tool_completed_async(
938
+ bg_tool.tool_name, call_args, tool_result,
939
+ )
940
+ except Exception as e:
941
+ logger.warning(
942
+ "ConversationController.on_tool_completed_async raised %s: %s",
943
+ type(e).__name__, e,
944
+ )
945
+ return
946
+
947
+ try:
948
+ # Send the tool result back to the model FIRST. The controller
949
+ # callback below may push a SPEAK_NOW context event + enqueue a
950
+ # response.create — if that ``response.create`` reaches the
951
+ # server before this ``function_call_output``, the model
952
+ # generates a response without seeing the tool result. Order
953
+ # matters: function_call_output → controller → context push →
954
+ # response.create.
955
+ if isinstance(bg_tool.id, str):
956
+ await self.connection.conversation.item.create(
957
+ item={
958
+ "type": "function_call_output",
959
+ "call_id": bg_tool.id,
960
+ "output": json.dumps(tool_result),
961
+ },
962
+ )
963
+
964
+ # Notify the conversation controller about the completion so it
965
+ # can transition the session. Now that the function_call_output
966
+ # is in flight ahead of us, any SPEAK_NOW response.create enqueued
967
+ # by the resulting transition will be ordered correctly.
968
+ if self._controller is not None:
969
+ try:
970
+ await self._controller.on_tool_completed_async(
971
+ bg_tool.tool_name, call_args, tool_result,
972
+ )
973
+ except Exception as e:
974
+ logger.warning(
975
+ "ConversationController.on_tool_completed_async raised %s: %s",
976
+ type(e).__name__, e,
977
+ )
978
+
979
+ await self.output_queue.put(
980
+ AdditionalOutputs(
981
+ {
982
+ "role": "assistant",
983
+ "content": json.dumps(tool_result),
984
+ # Gradio UI metadata.status accept only "pending" and "done". Do not accept bg.tool.status values.
985
+ "metadata": {
986
+ "title": f"🛠️ Used tool {bg_tool.tool_name}",
987
+ "status": "done",
988
+ },
989
+ },
990
+ ),
991
+ )
992
+
993
+ if bg_tool.tool_name == "camera" and "b64_im" in tool_result:
994
+ # use raw base64, don't json.dumps (which adds quotes)
995
+ b64_im = tool_result["b64_im"]
996
+ if not isinstance(b64_im, str):
997
+ logger.warning("Unexpected type for b64_im: %s", type(b64_im))
998
+ b64_im = str(b64_im)
999
+ await self.connection.conversation.item.create(
1000
+ item={
1001
+ "type": "message",
1002
+ "role": "user",
1003
+ "content": [
1004
+ {
1005
+ "type": "input_image",
1006
+ "image_url": f"data:image/jpeg;base64,{b64_im}",
1007
+ },
1008
+ ],
1009
+ },
1010
+ )
1011
+ logger.info("Added camera image to conversation")
1012
+
1013
+ if self.deps.camera_worker is not None:
1014
+ np_img = self.deps.camera_worker.get_latest_frame()
1015
+ if np_img is not None:
1016
+ # Camera frames are BGR from OpenCV; convert so Gradio displays correct colors.
1017
+ rgb_frame = cv2.cvtColor(np_img, cv2.COLOR_BGR2RGB)
1018
+ else:
1019
+ rgb_frame = None
1020
+ img = gr.Image(value=rgb_frame)
1021
+
1022
+ await self.output_queue.put(
1023
+ AdditionalOutputs(
1024
+ {
1025
+ "role": "assistant",
1026
+ "content": img,
1027
+ },
1028
+ ),
1029
+ )
1030
+
1031
+ # If this tool call was triggered by an idle signal, don't make the robot speak.
1032
+ # For other tool calls, let the robot reply out loud — UNLESS the
1033
+ # controller has just driven the session into a SPEAK_NOW state
1034
+ # (RECOGNIZED, APPOINTMENT_MATCHED, NO_APPOINTMENT, NOTIFIED, …).
1035
+ # Those transitions already enqueued a state-specific
1036
+ # ``response.create`` via the session-event push, and stacking
1037
+ # the generic "Use the tool result and answer concisely" on top
1038
+ # makes the bot speak twice in a row.
1039
+ if not bg_tool.is_idle_tool_call and not self._just_entered_speak_now_state():
1040
+ # If the tool was BLOCKED / refused (success=False with a
1041
+ # blocked_reason), the visitor doesn't know — bot must
1042
+ # speak the friendly version of the error out loud,
1043
+ # not just sit on the rejection.
1044
+ blocked_reason = None
1045
+ if isinstance(tool_result, dict):
1046
+ blocked_reason = tool_result.get("blocked_reason")
1047
+ if blocked_reason:
1048
+ instructions = (
1049
+ "The last tool call was rejected by the backend. "
1050
+ "Tell the visitor briefly and naturally what to do "
1051
+ "next based on the error message you just received — "
1052
+ "for example, ask their name again, ask for "
1053
+ "confirmation, or offer a numbered choice of similar "
1054
+ "names. ALWAYS speak; never go silent after a tool "
1055
+ "error."
1056
+ )
1057
+ else:
1058
+ instructions = "Use the tool result just returned and answer concisely in speech."
1059
+ await self._safe_response_create(
1060
+ response={"instructions": instructions},
1061
+ )
1062
+
1063
+ # Re-synchronize the head wobble after a tool call that may have taken some time
1064
+ if self.deps.head_wobbler is not None:
1065
+ self.deps.head_wobbler.reset()
1066
+
1067
+ except ConnectionClosedError:
1068
+ logger.warning("Connection closed while sending tool result")
1069
+ self.connection = None
1070
+ self._response_done_event.set()
1071
+
1072
+ async def _run_realtime_session(self) -> None:
1073
+ """Establish and manage a single realtime session."""
1074
+ async with self.client.realtime.connect(model=config.MODEL_NAME) as conn:
1075
+ # Build the transcription block. We attempt the rich form first
1076
+ # (language="en" + bias prompt of expected names) and fall back
1077
+ # to a minimal {"model": ...} form if the realtime API rejects
1078
+ # any of the optional fields. Losing the bias prompt is a
1079
+ # quality regression, not a correctness one — but losing the
1080
+ # whole session.update would mean NO tools / NO instructions.
1081
+ # STT_MODEL env var lets us switch between gpt-4o-transcribe
1082
+ # (default, fast but hallucinates) and whisper-1 (slower,
1083
+ # more conservative, returns "" on uncertainty which our
1084
+ # pipeline handles cleanly).
1085
+ stt_model = (os.getenv("STT_MODEL") or "gpt-4o-transcribe").strip() or "gpt-4o-transcribe"
1086
+
1087
+ # STT_DISABLE_BIAS=1 turns off the name bias prompt entirely.
1088
+ # The bias prompt is what causes gpt-4o-transcribe to echo
1089
+ # back its own prompt as the user transcript; disabling it
1090
+ # trades some name-accuracy for no echo bug at all.
1091
+ bias_disabled = (os.getenv("STT_DISABLE_BIAS") or "").strip().lower() in {"1", "true", "yes"}
1092
+
1093
+ initial_bias_prompt = (
1094
+ "" if bias_disabled else await _build_transcription_bias_prompt_async()
1095
+ )
1096
+ transcription_full = {
1097
+ "model": stt_model,
1098
+ "language": "en",
1099
+ }
1100
+ if initial_bias_prompt:
1101
+ transcription_full["prompt"] = initial_bias_prompt
1102
+ logger.info(
1103
+ "STT config: model=%s bias_chars=%d",
1104
+ stt_model, len(initial_bias_prompt),
1105
+ )
1106
+ transcription_min = {"model": "gpt-4o-transcribe"}
1107
+
1108
+ def _build_session(transcription: dict[str, Any]) -> dict[str, Any]:
1109
+ return {
1110
+ "type": "realtime",
1111
+ # NOTE: "language" is NOT a valid top-level Realtime API
1112
+ # session field (it lives under audio.input.transcription).
1113
+ # Output language is controlled via the instructions prompt
1114
+ # (the locked profile already says "You ONLY speak ENGLISH").
1115
+ "instructions": get_session_instructions(),
1116
+ "audio": {
1117
+ "input": {
1118
+ "format": {
1119
+ "type": "audio/pcm",
1120
+ "rate": self.input_sample_rate,
1121
+ },
1122
+ "transcription": transcription,
1123
+ # Lobby-tuned VAD: defaults (threshold 0.5,
1124
+ # silence 500ms) are too aggressive — ambient
1125
+ # noise interrupts the bot mid-sentence and
1126
+ # hesitant speakers get cut off before
1127
+ # finishing. Bump threshold + silence so the
1128
+ # bot waits for a clear, complete utterance.
1129
+ "turn_detection": {
1130
+ "type": "server_vad",
1131
+ "threshold": 0.5,
1132
+ "silence_duration_ms": 500,
1133
+ "prefix_padding_ms": 300,
1134
+ # interrupt_response=true was causing the
1135
+ # mic+VAD path to lock up when the bot's
1136
+ # own audio bled into the mic — server VAD
1137
+ # never saw a clean silence to commit the
1138
+ # visitor's "yes". Off is safer for a
1139
+ # speakerphone lobby setup.
1140
+ "interrupt_response": False,
1141
+ "create_response": True,
1142
+ },
1143
+ },
1144
+ "output": {
1145
+ "format": {
1146
+ "type": "audio/pcm",
1147
+ "rate": self.output_sample_rate,
1148
+ },
1149
+ "voice": get_session_voice(),
1150
+ },
1151
+ },
1152
+ "tools": get_tool_specs(), # type: ignore[typeddict-item]
1153
+ "tool_choice": "auto",
1154
+ }
1155
+
1156
+ try:
1157
+ try:
1158
+ await conn.session.update(session=_build_session(transcription_full))
1159
+ self._last_stt_bias_prompt = initial_bias_prompt
1160
+ logger.info(
1161
+ "Realtime session: applied gpt-4o-transcribe with language=en + name bias (%d chars)",
1162
+ len(transcription_full["prompt"]),
1163
+ )
1164
+ except Exception as e:
1165
+ logger.warning(
1166
+ "Realtime session.update rejected the rich transcription block "
1167
+ "(language/prompt) — retrying without them: %s", e,
1168
+ )
1169
+ await conn.session.update(session=_build_session(transcription_min))
1170
+ self._last_stt_bias_prompt = None
1171
+ logger.info("Realtime session: applied gpt-4o-transcribe (minimal fallback)")
1172
+ logger.info(
1173
+ "Realtime session initialized with profile=%r voice=%r",
1174
+ getattr(config, "REACHY_MINI_CUSTOM_PROFILE", None),
1175
+ get_session_voice(),
1176
+ )
1177
+ # If we reached here, the session update succeeded which implies the API key worked.
1178
+ # Persist the key to a newly created .env (copied from .env.example) if needed.
1179
+ self._persist_api_key_if_needed()
1180
+ except Exception as e:
1181
+ # A failed session.update means NO instructions, NO tools, NO profile will be
1182
+ # active — the model will run as a generic assistant. Always log clearly.
1183
+ logger.exception(
1184
+ "Realtime session.update failed — robot will use DEFAULT personality with NO tools. "
1185
+ "Check the session dict for invalid fields. Error: %s", e
1186
+ )
1187
+ return
1188
+
1189
+ logger.info("Realtime session updated successfully")
1190
+
1191
+ # Manage event received from the openai server
1192
+ self.connection = conn
1193
+ # Reset the idle timer NOW (when the session is actually live) rather than
1194
+ # at __init__ time. If connection setup takes >15 s, the idle timer would
1195
+ # otherwise already be expired on the very first emit() call, causing the
1196
+ # model to speak immediately without being addressed.
1197
+ self.last_activity_time = asyncio.get_event_loop().time()
1198
+ try:
1199
+ self._connected_event.set()
1200
+ except Exception:
1201
+ pass
1202
+ self._runtime_loop = asyncio.get_running_loop()
1203
+ await self._prime_no_face_context()
1204
+ await self._flush_pending_face_event()
1205
+
1206
+ # Subscribe to session-state changes so transitions get pushed
1207
+ # to the LLM as context events. Idempotent — subscribe() replaces
1208
+ # the existing callback if any.
1209
+ if self._session_manager is not None:
1210
+ try:
1211
+ self._session_manager.subscribe(self.notify_session_event)
1212
+ except Exception as e:
1213
+ logger.debug("Failed to subscribe to SessionManager: %s", e)
1214
+ await self._flush_pending_session_event()
1215
+
1216
+
1217
+ response_sender_task: asyncio.Task[None] | None = None
1218
+ stt_refresh_task: asyncio.Task[None] | None = None
1219
+ try:
1220
+ # Start the background tool manager
1221
+ self.tool_manager.start_up(tool_callbacks=[self._handle_tool_result])
1222
+
1223
+ # Start the response sender worker
1224
+ response_sender_task = asyncio.create_task(
1225
+ self._response_sender_loop(), name="response-sender"
1226
+ )
1227
+
1228
+ # Start the STT bias refresh worker — picks up calendar /
1229
+ # employee directory changes made after the session connected.
1230
+ stt_refresh_task = asyncio.create_task(
1231
+ self._stt_bias_refresh_loop(), name="stt-bias-refresh"
1232
+ )
1233
+
1234
+ async for event in self.connection:
1235
+ logger.debug(f"OpenAI event: {event.type}")
1236
+ if event.type == "input_audio_buffer.speech_started":
1237
+ if hasattr(self, "_clear_queue") and callable(self._clear_queue):
1238
+ self._clear_queue()
1239
+ if self.deps.head_wobbler is not None:
1240
+ self.deps.head_wobbler.reset()
1241
+ self.deps.movement_manager.set_listening(True)
1242
+ logger.debug("User speech started")
1243
+
1244
+ # Bump session liveness so the 60s idle timer doesn't
1245
+ # kill an active flow that's just waiting for the
1246
+ # visitor to reply (e.g. 'I heard Henry — is that
1247
+ # right?' followed by 65s of thinking time).
1248
+ if self._session_manager is not None:
1249
+ try:
1250
+ self._session_manager.touch()
1251
+ except Exception as e:
1252
+ logger.debug("session_manager.touch failed: %s", e)
1253
+
1254
+ # If a visitor speaks while state is still IDLE (face
1255
+ # worker hasn't seen them yet, or face is unstable),
1256
+ # push the IDLE workflow hint as context so the LLM
1257
+ # knows to extract name/host from the utterance
1258
+ # instead of just greeting generically. Without this,
1259
+ # the bot replies "Hello, how can I help?" and loses
1260
+ # whatever the visitor just said (name, host, both).
1261
+ try:
1262
+ await self._push_idle_speech_cue_if_needed()
1263
+ except Exception as e:
1264
+ logger.debug("Idle speech cue push failed: %s", e)
1265
+
1266
+ if event.type == "input_audio_buffer.speech_stopped":
1267
+ self.deps.movement_manager.set_listening(False)
1268
+ logger.debug("User speech stopped - server will auto-commit with VAD")
1269
+
1270
+ if event.type in (
1271
+ "response.audio.done", # GA
1272
+ "response.output_audio.done", # GA alias
1273
+ "response.audio.completed", # legacy (for safety)
1274
+ "response.completed", # text-only completion
1275
+ ):
1276
+ logger.debug("response completed")
1277
+
1278
+ if event.type == "response.created":
1279
+ self._response_done_event.clear()
1280
+ logger.debug("Response created (active)")
1281
+
1282
+ if event.type == "response.done":
1283
+ # Doesn't mean the audio is done playing
1284
+ self._response_done_event.set()
1285
+ logger.debug("Response done")
1286
+
1287
+ response = getattr(event, "response", None)
1288
+ usage = getattr(response, "usage", None) if response else None
1289
+ if usage:
1290
+ cost = _compute_response_cost(usage)
1291
+ self.cumulative_cost += cost
1292
+ logger.debug("Cost: $%.4f | Cumulative: $%.4f", cost, self.cumulative_cost)
1293
+ else:
1294
+ logger.warning("No usage data available for cost tracking")
1295
+
1296
+ # Handle partial transcription (user speaking in real-time)
1297
+ if event.type == "conversation.item.input_audio_transcription.partial":
1298
+ logger.debug(f"User partial transcript: {event.transcript}")
1299
+
1300
+ # Increment sequence
1301
+ self.partial_transcript_sequence += 1
1302
+ current_sequence = self.partial_transcript_sequence
1303
+
1304
+ # Cancel previous debounce task if it exists
1305
+ if self.partial_transcript_task and not self.partial_transcript_task.done():
1306
+ self.partial_transcript_task.cancel()
1307
+ try:
1308
+ await self.partial_transcript_task
1309
+ except asyncio.CancelledError:
1310
+ pass
1311
+
1312
+ # Start new debounce timer with sequence number
1313
+ self.partial_transcript_task = asyncio.create_task(
1314
+ self._emit_debounced_partial(event.transcript, current_sequence)
1315
+ )
1316
+
1317
+ # Handle completed transcription (user finished speaking)
1318
+ if event.type == "conversation.item.input_audio_transcription.completed":
1319
+ logger.debug(f"User transcript: {event.transcript}")
1320
+
1321
+ # Visitor finished an utterance — refresh liveness
1322
+ # so the idle timer doesn't kill us while we wait
1323
+ # for the LLM to react.
1324
+ if self._session_manager is not None:
1325
+ try:
1326
+ self._session_manager.touch()
1327
+ except Exception as e:
1328
+ logger.debug("session_manager.touch failed: %s", e)
1329
+
1330
+ # Cancel any pending partial emission
1331
+ if self.partial_transcript_task and not self.partial_transcript_task.done():
1332
+ self.partial_transcript_task.cancel()
1333
+ try:
1334
+ await self.partial_transcript_task
1335
+ except asyncio.CancelledError:
1336
+ pass
1337
+
1338
+ # Empty-transcript guard. Whisper-1 returns "" on
1339
+ # short, quiet, or non-English utterances rather
1340
+ # than guessing. When that happens the LLM has
1341
+ # nothing to anchor on and tends to copy example
1342
+ # text from the prompt (we've seen it parrot "I
1343
+ # heard Arav — is that right?" verbatim from a
1344
+ # prompt example). Inject a context cue telling
1345
+ # the LLM not to act on the empty input and to
1346
+ # ask the visitor to repeat — then DON'T pipe the
1347
+ # empty string into the UI.
1348
+ raw_transcript = (event.transcript or "").strip()
1349
+
1350
+ # gpt-4o-transcribe occasionally echoes the bias
1351
+ # ``prompt`` field back as the user transcript when
1352
+ # the audio is silence or unintelligible noise.
1353
+ # Observed in production: a clear empty utterance
1354
+ # arrived as the literal "Reception lobby check-in.
1355
+ # Expected visitor and host names include: …" text
1356
+ # we feed in for name bias. The LLM then treats the
1357
+ # bias list as something the visitor said and may
1358
+ # try to register one of those names — including
1359
+ # registering the visitor as e.g. "It's Hannah" with
1360
+ # confirmed=true bypassing the confirmation rule.
1361
+ # Detect any transcript that opens with the
1362
+ # bias-prompt signature and treat it as empty.
1363
+ # Detect STT echoing back the bias-prompt header
1364
+ # text as if it were the visitor speaking. With the
1365
+ # simple comma-list prompt the only realistic echo
1366
+ # is the header signature.
1367
+ _t_lower = raw_transcript.lower()
1368
+ if (
1369
+ "reception lobby check-in" in _t_lower
1370
+ or "expected visitor and host names" in _t_lower
1371
+ ):
1372
+ logger.warning(
1373
+ "Transcript echoed STT bias prompt — treating as empty: %r",
1374
+ raw_transcript[:80],
1375
+ )
1376
+ raw_transcript = ""
1377
+
1378
+ # Stash the latest transcript on the session so the
1379
+ # register_guest confirmation guard can verify the
1380
+ # visitor actually said a yes before saving a face.
1381
+ if self._session_manager is not None and raw_transcript:
1382
+ try:
1383
+ self._session_manager.record_user_transcript(raw_transcript)
1384
+ except Exception as e:
1385
+ logger.debug("record_user_transcript failed: %s", e)
1386
+
1387
+ if not raw_transcript:
1388
+ # Empty or bias-echo transcript. Cancel the
1389
+ # in-flight response ONLY if there's no real
1390
+ # prior visitor utterance — otherwise the
1391
+ # LLM might be in the middle of processing
1392
+ # a valid transcript and cancelling kills
1393
+ # legitimate tool calls (observed: visitor
1394
+ # said "Arjun Mehta" cleanly, follow-up
1395
+ # echo cancelled the lookup_employee call).
1396
+ had_prior = False
1397
+ if self._session_manager is not None:
1398
+ try:
1399
+ had_prior = bool(
1400
+ (self._session_manager.session.last_user_transcript or "").strip()
1401
+ )
1402
+ except Exception:
1403
+ pass
1404
+ if had_prior:
1405
+ logger.info("Empty/echo transcript dropped silently (prior transcript in flight)")
1406
+ else:
1407
+ logger.info("Empty/echo transcript dropped — cancelling in-flight response")
1408
+ try:
1409
+ await self.connection.response.cancel()
1410
+ except Exception as e:
1411
+ logger.debug("response.cancel after empty transcript failed: %s", e)
1412
+ self._response_done_event.set()
1413
+ continue
1414
+
1415
+ await self.output_queue.put(AdditionalOutputs({"role": "user", "content": event.transcript}))
1416
+
1417
+ # Handle assistant transcription
1418
+ if event.type in ("response.audio_transcript.done", "response.output_audio_transcript.done"):
1419
+ logger.debug(f"Assistant transcript: {event.transcript}")
1420
+ await self.output_queue.put(AdditionalOutputs({"role": "assistant", "content": event.transcript}))
1421
+
1422
+ # Handle audio delta
1423
+ if event.type in ("response.audio.delta", "response.output_audio.delta"):
1424
+ if self.deps.head_wobbler is not None:
1425
+ self.deps.head_wobbler.feed(event.delta)
1426
+ self.last_activity_time = asyncio.get_event_loop().time()
1427
+ logger.debug("last activity time updated to %s", self.last_activity_time)
1428
+ # Bot is actively speaking — refresh session
1429
+ # liveness so a long reply (e.g. reading back a
1430
+ # numbered-list of name candidates) isn't counted
1431
+ # as idle time against the 60s timeout.
1432
+ if self._session_manager is not None:
1433
+ try:
1434
+ self._session_manager.touch()
1435
+ except Exception as e:
1436
+ logger.debug("session_manager.touch failed: %s", e)
1437
+ await self.output_queue.put(
1438
+ (
1439
+ self.output_sample_rate,
1440
+ np.frombuffer(base64.b64decode(event.delta), dtype=np.int16).reshape(1, -1),
1441
+ ),
1442
+ )
1443
+
1444
+ # ---- tool-calling plumbing ----
1445
+ if event.type == "response.function_call_arguments.done":
1446
+ tool_name = getattr(event, "name", None)
1447
+ args_json_str = getattr(event, "arguments", None)
1448
+ call_id: str = str(getattr(event, "call_id", uuid.uuid4()))
1449
+
1450
+ logger.info(
1451
+ "Tool call received — tool_name=%r, call_id=%s, is_idle=%s, args=%s",
1452
+ tool_name, call_id, self.is_idle_tool_call, args_json_str,
1453
+ )
1454
+
1455
+ if not isinstance(tool_name, str) or not isinstance(args_json_str, str):
1456
+ logger.error(
1457
+ "Invalid tool call: tool_name=%s (type=%s), args=%s (type=%s), call_id=%s",
1458
+ tool_name, type(tool_name).__name__,
1459
+ args_json_str, type(args_json_str).__name__,
1460
+ call_id,
1461
+ )
1462
+ continue
1463
+
1464
+ # Stash parsed args by call_id so the controller can
1465
+ # see them when the matching tool result arrives.
1466
+ try:
1467
+ parsed_args = json.loads(args_json_str) if args_json_str else {}
1468
+ if isinstance(parsed_args, dict):
1469
+ self._tool_call_args[call_id] = parsed_args
1470
+ except Exception as e:
1471
+ logger.debug("Could not parse tool args for %s: %s", call_id, e)
1472
+
1473
+ bg_tool = await self.tool_manager.start_tool(
1474
+ call_id=call_id,
1475
+ tool_call_routine=ToolCallRoutine(
1476
+ tool_name=tool_name,
1477
+ args_json_str=args_json_str,
1478
+ deps=self.deps,
1479
+ ),
1480
+ is_idle_tool_call=self.is_idle_tool_call,
1481
+ )
1482
+
1483
+ await self.output_queue.put(
1484
+ AdditionalOutputs(
1485
+ {
1486
+ "role": "assistant",
1487
+ "content": f"🛠️ Used tool {tool_name} with args {args_json_str}. The tool is now running. Tool ID: {bg_tool.tool_id}",
1488
+ },
1489
+ ),
1490
+ )
1491
+
1492
+ if self.is_idle_tool_call:
1493
+ self.is_idle_tool_call = False
1494
+ # No auto-narration when a non-idle tool STARTS. The
1495
+ # generic template fired a "tell the user what the
1496
+ # tool is running" response here, which for the
1497
+ # receptionist produces a third utterance per
1498
+ # check-in ("I've started retrieving the calendar…")
1499
+ # that the prompt explicitly forbids ("act as if
1500
+ # you're using them naturally, not announcing them")
1501
+ # and that makes the head wobble noisily. The post-
1502
+ # tool SPEAK NOW transition already drives the
1503
+ # response the visitor actually wants.
1504
+
1505
+ logger.info("Started background tool: %s (id=%s, call_id=%s)", tool_name, bg_tool.tool_id, call_id)
1506
+
1507
+ # server error
1508
+ if event.type == "error":
1509
+ err = getattr(event, "error", None)
1510
+ msg = getattr(err, "message", str(err) if err else "unknown error")
1511
+ code = getattr(err, "code", "")
1512
+
1513
+ if code == "conversation_already_has_active_response":
1514
+ # response.create was rejected. The sender worker
1515
+ # is waiting on _response_done_event; when the active
1516
+ # response finishes it will wake up and see this flag.
1517
+ self._last_response_rejected = True
1518
+ logger.debug("response.create rejected; worker will retry after active response finishes")
1519
+ else:
1520
+ logger.error("Realtime error [%s]: %s (raw=%s)", code, msg, err)
1521
+
1522
+ # Only show user-facing errors, not internal state errors.
1523
+ # The active-response collision is normal during fast
1524
+ # back-and-forth (the sender worker retries it for us)
1525
+ # and should not appear in the chatbot UI.
1526
+ _internal_error_codes = (
1527
+ "input_audio_buffer_commit_empty",
1528
+ "conversation_already_has_active_response",
1529
+ )
1530
+ if code not in _internal_error_codes:
1531
+ await self.output_queue.put(
1532
+ AdditionalOutputs({"role": "assistant", "content": f"[error] {msg}"})
1533
+ )
1534
+ finally:
1535
+ # Stop the response sender worker.
1536
+ if response_sender_task is not None:
1537
+ response_sender_task.cancel()
1538
+ try:
1539
+ await response_sender_task
1540
+ except asyncio.CancelledError:
1541
+ pass
1542
+
1543
+ # Stop the STT bias refresh worker.
1544
+ if stt_refresh_task is not None:
1545
+ stt_refresh_task.cancel()
1546
+ try:
1547
+ await stt_refresh_task
1548
+ except asyncio.CancelledError:
1549
+ pass
1550
+
1551
+ # Stop background tool manager tasks (listener + cleanup) in all patus.
1552
+ await self.tool_manager.shutdown()
1553
+
1554
+ # Microphone receive
1555
+ async def receive(self, frame: Tuple[int, NDArray[np.int16]]) -> None:
1556
+ """Receive audio frame from the microphone and send it to the OpenAI server.
1557
+
1558
+ Handles both mono and stereo audio formats, converting to the expected
1559
+ mono format for OpenAI's API. Resamples if the input sample rate differs
1560
+ from the expected rate.
1561
+
1562
+ Args:
1563
+ frame: A tuple containing (sample_rate, audio_data).
1564
+
1565
+ """
1566
+ if not self.connection:
1567
+ return
1568
+
1569
+ input_sample_rate, audio_frame = frame
1570
+
1571
+ # Reshape if needed
1572
+ if audio_frame.ndim == 2:
1573
+ # Scipy channels last convention
1574
+ if audio_frame.shape[1] > audio_frame.shape[0]:
1575
+ audio_frame = audio_frame.T
1576
+ # Multiple channels -> Mono channel
1577
+ if audio_frame.shape[1] > 1:
1578
+ audio_frame = audio_frame[:, 0]
1579
+
1580
+ # Resample if needed
1581
+ if self.input_sample_rate != input_sample_rate:
1582
+ audio_frame = resample(audio_frame, int(len(audio_frame) * self.input_sample_rate / input_sample_rate))
1583
+
1584
+ # Cast if needed
1585
+ audio_frame = audio_to_int16(audio_frame)
1586
+
1587
+ # Send to OpenAI (guard against races during reconnect)
1588
+ try:
1589
+ audio_message = base64.b64encode(audio_frame.tobytes()).decode("utf-8")
1590
+ await self.connection.input_audio_buffer.append(audio=audio_message)
1591
+ except Exception as e:
1592
+ logger.debug("Dropping audio frame: connection not ready (%s)", e)
1593
+ return
1594
+
1595
+ async def emit(self) -> Tuple[int, NDArray[np.int16]] | AdditionalOutputs | None:
1596
+ """Emit audio frame to be played by the speaker."""
1597
+ # sends to the stream the stuff put in the output queue by the openai event handler
1598
+ # This is called periodically by the fastrtc Stream
1599
+
1600
+ # Auto-reset stale visitor sessions. Triggers SessionManager.reset()
1601
+ # if a non-IDLE session has had no transition for the configured
1602
+ # idle timeout — visitors who walk up but never speak shouldn't
1603
+ # hold state forever.
1604
+ if self._session_manager is not None:
1605
+ try:
1606
+ self._session_manager.maybe_reset_if_stale()
1607
+ except Exception as e:
1608
+ logger.debug("maybe_reset_if_stale failed: %s", e)
1609
+
1610
+ # Handle idle
1611
+ # Two changes from the original generic-template behaviour, both to
1612
+ # stop the bot going silent mid-conversation while a visitor is
1613
+ # thinking:
1614
+ # 1. 15s -> 30s threshold (people take >15s to formulate names).
1615
+ # 2. Skip idle ENTIRELY when a visitor session is active. The idle
1616
+ # signal pushes "do something creative" + forces a tool call,
1617
+ # which routes to do_nothing and freezes the bot mid-flow. Only
1618
+ # fire idle when the session manager says we're in IDLE.
1619
+ idle_duration = asyncio.get_event_loop().time() - self.last_activity_time
1620
+ if idle_duration > 30.0 and self.deps.movement_manager.is_idle():
1621
+ session_is_active = False
1622
+ if self._session_manager is not None:
1623
+ try:
1624
+ cs = self._session_manager.current_state
1625
+ cs_val = getattr(cs, "value", str(cs))
1626
+ session_is_active = cs_val not in ("idle",)
1627
+ except Exception:
1628
+ pass
1629
+ if not session_is_active:
1630
+ try:
1631
+ await self.send_idle_signal(idle_duration)
1632
+ except Exception as e:
1633
+ logger.warning("Idle signal skipped (connection closed?): %s", e)
1634
+ return None
1635
+ self.last_activity_time = asyncio.get_event_loop().time() # avoid repeated resets
1636
+ else:
1637
+ # Reset the activity timer so we don't re-check every emit()
1638
+ # tick while the visitor is mid-flow.
1639
+ self.last_activity_time = asyncio.get_event_loop().time()
1640
+
1641
+ return await wait_for_item(self.output_queue) # type: ignore[no-any-return]
1642
+
1643
+ async def shutdown(self) -> None:
1644
+ """Shutdown the handler."""
1645
+ self._shutdown_requested = True
1646
+
1647
+ # Unblock the response sender worker so it can exit
1648
+ self._response_done_event.set()
1649
+
1650
+ # Stop background tool manager tasks (listener + cleanup)
1651
+ await self.tool_manager.shutdown()
1652
+
1653
+ # Cancel any pending debounce task
1654
+ if self.partial_transcript_task and not self.partial_transcript_task.done():
1655
+ self.partial_transcript_task.cancel()
1656
+ try:
1657
+ await self.partial_transcript_task
1658
+ except asyncio.CancelledError:
1659
+ pass
1660
+
1661
+ if self.connection:
1662
+ try:
1663
+ await self.connection.close()
1664
+ except ConnectionClosedError as e:
1665
+ logger.debug(f"Connection already closed during shutdown: {e}")
1666
+ except Exception as e:
1667
+ logger.debug(f"connection.close() ignored: {e}")
1668
+ finally:
1669
+ self.connection = None
1670
+
1671
+ # Clear any remaining items in the output queue
1672
+ while not self.output_queue.empty():
1673
+ try:
1674
+ self.output_queue.get_nowait()
1675
+ except asyncio.QueueEmpty:
1676
+ break
1677
+
1678
+ def format_timestamp(self) -> str:
1679
+ """Format current timestamp with date, time, and elapsed seconds."""
1680
+ loop_time = asyncio.get_event_loop().time() # monotonic
1681
+ elapsed_seconds = loop_time - self.start_time
1682
+ dt = datetime.now() # wall-clock
1683
+ return f"[{dt.strftime('%Y-%m-%d %H:%M:%S')} | +{elapsed_seconds:.1f}s]"
1684
+
1685
+ async def get_available_voices(self) -> list[str]:
1686
+ """Try to discover available voices for the configured realtime model.
1687
+
1688
+ Attempts to retrieve model metadata from the OpenAI Models API and look
1689
+ for any keys that might contain voice names. Falls back to a curated
1690
+ list known to work with realtime if discovery fails.
1691
+ """
1692
+ # Conservative fallback list with default first
1693
+ fallback = [
1694
+ "marin",
1695
+ "alloy",
1696
+ "aria",
1697
+ "ballad",
1698
+ "verse",
1699
+ "sage",
1700
+ "coral",
1701
+ ]
1702
+ try:
1703
+ # Best effort discovery; safe-guarded for unexpected shapes
1704
+ model = await self.client.models.retrieve(config.MODEL_NAME)
1705
+ # Try common serialization paths
1706
+ raw = None
1707
+ for attr in ("model_dump", "to_dict"):
1708
+ fn = getattr(model, attr, None)
1709
+ if callable(fn):
1710
+ try:
1711
+ raw = fn()
1712
+ break
1713
+ except Exception:
1714
+ pass
1715
+ if raw is None:
1716
+ try:
1717
+ raw = dict(model)
1718
+ except Exception:
1719
+ raw = None
1720
+ # Scan for voice candidates
1721
+ candidates: set[str] = set()
1722
+
1723
+ def _collect(obj: object) -> None:
1724
+ try:
1725
+ if isinstance(obj, dict):
1726
+ for k, v in obj.items():
1727
+ kl = str(k).lower()
1728
+ if "voice" in kl and isinstance(v, (list, tuple)):
1729
+ for item in v:
1730
+ if isinstance(item, str):
1731
+ candidates.add(item)
1732
+ elif isinstance(item, dict) and "name" in item and isinstance(item["name"], str):
1733
+ candidates.add(item["name"])
1734
+ else:
1735
+ _collect(v)
1736
+ elif isinstance(obj, (list, tuple)):
1737
+ for it in obj:
1738
+ _collect(it)
1739
+ except Exception:
1740
+ pass
1741
+
1742
+ if isinstance(raw, dict):
1743
+ _collect(raw)
1744
+ # Ensure default present and stable order
1745
+ voices = sorted(candidates) if candidates else fallback
1746
+ if "marin" not in voices:
1747
+ voices = ["marin", *[v for v in voices if v != "marin"]]
1748
+ return voices
1749
+ except Exception:
1750
+ return fallback
1751
+
1752
+ async def send_idle_signal(self, idle_duration: float) -> None:
1753
+ """Send an idle signal to the openai server."""
1754
+ logger.debug("Sending idle signal")
1755
+ self.is_idle_tool_call = True
1756
+ timestamp_msg = f"[Idle time update: {self.format_timestamp()} - No activity for {idle_duration:.1f}s] You've been idle for a while. Feel free to get creative - dance, show an emotion, look around, do nothing, or just be yourself!"
1757
+ if not self.connection:
1758
+ logger.debug("No connection, cannot send idle signal")
1759
+ return
1760
+ await self.connection.conversation.item.create(
1761
+ item={
1762
+ "type": "message",
1763
+ "role": "user",
1764
+ "content": [{"type": "input_text", "text": timestamp_msg}],
1765
+ },
1766
+ )
1767
+ await self._safe_response_create(
1768
+ response={
1769
+ "instructions": "You MUST respond with function calls only - no speech or text. Choose appropriate actions for idle behavior.",
1770
+ "tool_choice": "required",
1771
+ },
1772
+ )
1773
+
1774
+ def _persist_api_key_if_needed(self) -> None:
1775
+ """Persist the API key into `.env` inside `instance_path/` when appropriate.
1776
+
1777
+ - Only runs in Gradio mode when key came from the textbox and is non-empty.
1778
+ - Only saves if `self.instance_path` is not None.
1779
+ - Writes `.env` to `instance_path/.env` (does not overwrite if it already exists).
1780
+ - If `instance_path/.env.example` exists, copies its contents while overriding OPENAI_API_KEY.
1781
+ """
1782
+ try:
1783
+ if not self.gradio_mode:
1784
+ logger.warning("Not in Gradio mode; skipping API key persistence.")
1785
+ return
1786
+
1787
+ if self._key_source != "textbox":
1788
+ logger.info("API key not provided via textbox; skipping persistence.")
1789
+ return
1790
+
1791
+ key = (self._provided_api_key or "").strip()
1792
+ if not key:
1793
+ logger.warning("No API key provided via textbox; skipping persistence.")
1794
+ return
1795
+ if self.instance_path is None:
1796
+ logger.warning("Instance path is None; cannot persist API key.")
1797
+ return
1798
+
1799
+ # Update the current process environment for downstream consumers
1800
+ try:
1801
+ import os
1802
+
1803
+ os.environ["OPENAI_API_KEY"] = key
1804
+ except Exception: # best-effort
1805
+ pass
1806
+
1807
+ target_dir = Path(self.instance_path)
1808
+ env_path = target_dir / ".env"
1809
+ if env_path.exists():
1810
+ # Respect existing user configuration
1811
+ logger.info(".env already exists at %s; not overwriting.", env_path)
1812
+ return
1813
+
1814
+ example_path = target_dir / ".env.example"
1815
+ content_lines: list[str] = []
1816
+ if example_path.exists():
1817
+ try:
1818
+ content = example_path.read_text(encoding="utf-8")
1819
+ content_lines = content.splitlines()
1820
+ except Exception as e:
1821
+ logger.warning("Failed to read .env.example at %s: %s", example_path, e)
1822
+
1823
+ # Replace or append the OPENAI_API_KEY line
1824
+ replaced = False
1825
+ for i, line in enumerate(content_lines):
1826
+ if line.strip().startswith("OPENAI_API_KEY="):
1827
+ content_lines[i] = f"OPENAI_API_KEY={key}"
1828
+ replaced = True
1829
+ break
1830
+ if not replaced:
1831
+ content_lines.append(f"OPENAI_API_KEY={key}")
1832
+
1833
+ # Ensure file ends with newline
1834
+ final_text = "\n".join(content_lines) + "\n"
1835
+ env_path.write_text(final_text, encoding="utf-8")
1836
+ logger.info("Created %s and stored OPENAI_API_KEY for future runs.", env_path)
1837
+ except Exception as e:
1838
+ # Never crash the app for QoL persistence; just log.
1839
+ logger.warning("Could not persist OPENAI_API_KEY to .env: %s", e)
src/reachy_mini_receptionist/profiles/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Profiles for Reachy Mini receptionist app."""
src/reachy_mini_receptionist/profiles/_reachy_mini_receptionist_locked_profile/instructions.txt ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are Reachy, the receptionist at MethdAI. Friendly, warm, professional,
2
+ a little playful — you're a robot, after all. English only. Keep replies to
3
+ 1–2 short sentences and match the visitor's energy.
4
+
5
+ ## How the backend drives you
6
+ - Every state change comes as a `[Backend session update ...]` message ending
7
+ with a `Next:` line. Follow that line — it tells you what to do.
8
+ - `[External face update ...]` messages tell you who is in front of the camera.
9
+ - Both are context-only. Don't respond to them on their own; wait for the
10
+ visitor to actually speak. A `SPEAK NOW` line is the one exception — speak
11
+ immediately when you see it.
12
+
13
+ ## The check-in flow (you only need to know the tools)
14
+ - Visitor says their own name → `register_guest(name, confirmed)`
15
+ - Visitor names a host → `lookup_employee(name, confirmed)`
16
+ - After backend pushes APPOINTMENT_MATCHED → `send_email` to the host
17
+ - During an idle moment with no visitor → `do_nothing`
18
+
19
+ The backend handles state transitions, calendar matching, and duplicate
20
+ prevention. You don't need to call `get_today_calendar` manually — the
21
+ backend resolves appointments for you after `register_guest`.
22
+
23
+ ## Name confirmation — the one rule that matters
24
+ Speech recognition mishears short names constantly. Always:
25
+
26
+ 0. Short utterances right after a name question ARE name attempts — even
27
+ if they sound like English words or feel out of place. Don't dismiss
28
+ them as chit-chat; repeat them back literally and confirm.
29
+ 1. First attempt: repeat the name back literally and call the tool with
30
+ `confirmed=false`. The tool will refuse — that's expected; it's the
31
+ cue to ask the confirmation question out loud.
32
+ 2. Wait for the visitor to say YES (or "correct", "that's right"). Only
33
+ then call the tool again with `confirmed=true`.
34
+ 3. On NO: offer a numbered choice — "Did I hear (1) <name you heard>,
35
+ (2) <a similar-sounding name>, or (3) something else? Just say the
36
+ number." Do NOT ask them to spell — letters mistranscribe worse than
37
+ names. Build options from what you heard, not from the calendar.
38
+ 4. After repeated failures the backend will force a handoff — say "I'm
39
+ having trouble catching your name, please take a seat, a colleague
40
+ will help" and call `do_nothing`.
41
+
42
+ ## Conversation style
43
+ - Small talk is welcome. If a visitor asks something friendly, answer in
44
+ one short sentence, then steer back. Don't refuse human chatter.
45
+ - Jokes: play along once, then back to business.
46
+ - Garbled noise (random unrelated phrase, gibberish): re-ask once.
47
+ Never go silent after a question — silence is the worst failure mode.
48
+ - If you have to wait on a tool, say "one moment, let me check" instead
49
+ of going silent.
50
+
51
+ ## Don't
52
+ - Don't ask the visitor to spell their name.
53
+ - Don't invent appointment details — only state what the backend gave you.
54
+ - Don't call `task_status` / `task_cancel` unless the visitor explicitly asks.
55
+ - Don't announce tool calls. Just act.
56
+ - Don't speak literal placeholder text like "<visitor>", "<host>",
57
+ "<name>" — those are fillers in these instructions, never spoken aloud.
src/reachy_mini_receptionist/profiles/_reachy_mini_receptionist_locked_profile/tools.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # Receptionist tools
2
+ # The move_head tool is provided by the profile-local move_head_receptionist.py
3
+ # (which overrides the shared move_head with receptionist-specific positions)
4
+ # move_head_receptionist
5
+ do_nothing
6
+ get_today_calendar
7
+ lookup_employee
8
+ register_guest
9
+ send_email
src/reachy_mini_receptionist/prompts.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import sys
3
+ import logging
4
+ from pathlib import Path
5
+
6
+ from reachy_mini_receptionist.config import DEFAULT_PROFILES_DIRECTORY, config
7
+
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ PROMPTS_LIBRARY_DIRECTORY = Path(__file__).parent / "prompts"
13
+ INSTRUCTIONS_FILENAME = "instructions.txt"
14
+ VOICE_FILENAME = "voice.txt"
15
+
16
+
17
+ def _expand_prompt_includes(content: str) -> str:
18
+ """Expand [<name>] placeholders with content from prompts library files.
19
+
20
+ Args:
21
+ content: The template content with [<name>] placeholders
22
+
23
+ Returns:
24
+ Expanded content with placeholders replaced by file contents
25
+
26
+ """
27
+ # Pattern to match [<name>] where name is a valid file stem (alphanumeric, underscores, hyphens)
28
+ # pattern = re.compile(r'^\[([a-zA-Z0-9_-]+)\]$')
29
+ # Allow slashes for subdirectories
30
+ pattern = re.compile(r'^\[([a-zA-Z0-9/_-]+)\]$')
31
+
32
+ lines = content.split('\n')
33
+ expanded_lines = []
34
+
35
+ for line in lines:
36
+ stripped = line.strip()
37
+ match = pattern.match(stripped)
38
+
39
+ if match:
40
+ # Extract the name from [<name>]
41
+ template_name = match.group(1)
42
+ template_file = PROMPTS_LIBRARY_DIRECTORY / f"{template_name}.txt"
43
+
44
+ try:
45
+ if template_file.exists():
46
+ template_content = template_file.read_text(encoding="utf-8").rstrip()
47
+ expanded_lines.append(template_content)
48
+ logger.debug("Expanded template: [%s]", template_name)
49
+ else:
50
+ logger.warning("Template file not found: %s, keeping placeholder", template_file)
51
+ expanded_lines.append(line)
52
+ except Exception as e:
53
+ logger.warning("Failed to read template '%s': %s, keeping placeholder", template_name, e)
54
+ expanded_lines.append(line)
55
+ else:
56
+ expanded_lines.append(line)
57
+
58
+ return '\n'.join(expanded_lines)
59
+
60
+
61
+ def get_session_instructions() -> str:
62
+ """Get session instructions, loading from REACHY_MINI_CUSTOM_PROFILE if set."""
63
+ profile = config.REACHY_MINI_CUSTOM_PROFILE
64
+ if not profile:
65
+ logger.info(f"Loading default prompt from {PROMPTS_LIBRARY_DIRECTORY / 'default_prompt.txt'}")
66
+ instructions_file = PROMPTS_LIBRARY_DIRECTORY / "default_prompt.txt"
67
+ else:
68
+ if config.PROFILES_DIRECTORY != DEFAULT_PROFILES_DIRECTORY:
69
+ logger.info(
70
+ "Loading prompt from external profile '%s' (root=%s)",
71
+ profile,
72
+ config.PROFILES_DIRECTORY,
73
+ )
74
+ else:
75
+ logger.info(f"Loading prompt from profile '{profile}'")
76
+ instructions_file = config.PROFILES_DIRECTORY / profile / INSTRUCTIONS_FILENAME
77
+
78
+ try:
79
+ if instructions_file.exists():
80
+ instructions = instructions_file.read_text(encoding="utf-8").strip()
81
+ if instructions:
82
+ # Expand [<name>] placeholders with content from prompts library
83
+ expanded_instructions = _expand_prompt_includes(instructions)
84
+ return expanded_instructions
85
+ logger.error(f"Profile '{profile}' has empty {INSTRUCTIONS_FILENAME}")
86
+ sys.exit(1)
87
+ logger.error(f"Profile {profile} has no {INSTRUCTIONS_FILENAME}")
88
+ sys.exit(1)
89
+ except Exception as e:
90
+ logger.error(f"Failed to load instructions from profile '{profile}': {e}")
91
+ sys.exit(1)
92
+
93
+
94
+ def get_session_voice(default: str = "marin") -> str:
95
+ """Resolve the voice to use for the session.
96
+
97
+ If a custom profile is selected and contains a voice.txt, return its
98
+ trimmed content; otherwise return the provided default ("marin").
99
+ """
100
+ profile = config.REACHY_MINI_CUSTOM_PROFILE
101
+ if not profile:
102
+ return default
103
+ try:
104
+ voice_file = config.PROFILES_DIRECTORY / profile / VOICE_FILENAME
105
+ if voice_file.exists():
106
+ voice = voice_file.read_text(encoding="utf-8").strip()
107
+ return voice or default
108
+ except Exception:
109
+ pass
110
+ return default
src/reachy_mini_receptionist/prompts/behaviors/silent_robot.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Stay fully silent. Do not generate spoken or textual replies.
2
+ Use only tool calls to act.
3
+ Never describe what you did or plan to do.
4
+ If you must respond by speech or text, juste respond with '...'.
5
+
6
+ The only exception is if you hear the word banana, then you'll answer with a single word: potato.