Spaces:
Running
Running
Commit ·
06a1901
0
Parent(s):
Initial release: MethdAI Receptionist v1.0
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .env.example +88 -0
- .gitattributes +71 -0
- .gitignore +78 -0
- .hfignore +23 -0
- LICENSE +201 -0
- README.md +152 -0
- deploy/install_systemd.sh +74 -0
- deploy/reachy-receptionist.service +39 -0
- docs/assets/conversation_app_arch.svg +3 -0
- docs/assets/reachy_mini_dance.gif +3 -0
- docs/scheme.mmd +63 -0
- external_content/external_profiles/starter_profile/instructions.txt +6 -0
- external_content/external_profiles/starter_profile/tools.txt +9 -0
- external_content/external_tools/starter_custom_tool.py +33 -0
- index.html +141 -0
- plan.md +89 -0
- pyproject.toml +81 -0
- screenshot.png +3 -0
- scripts/gemini_live_smoke.py +96 -0
- scripts/list_gemini_live_models.py +50 -0
- src/reachy_mini_receptionist/__init__.py +1 -0
- src/reachy_mini_receptionist/audio/__init__.py +1 -0
- src/reachy_mini_receptionist/audio/head_wobbler.py +181 -0
- src/reachy_mini_receptionist/audio/speech_tapper.py +268 -0
- src/reachy_mini_receptionist/calendar_data.py +139 -0
- src/reachy_mini_receptionist/camera_worker.py +241 -0
- src/reachy_mini_receptionist/config.py +217 -0
- src/reachy_mini_receptionist/console.py +527 -0
- src/reachy_mini_receptionist/conversation_controller.py +586 -0
- src/reachy_mini_receptionist/dance_emotion_moves.py +154 -0
- src/reachy_mini_receptionist/employees.py +121 -0
- src/reachy_mini_receptionist/employees_store.py +342 -0
- src/reachy_mini_receptionist/face_db.py +184 -0
- src/reachy_mini_receptionist/face_recognition_worker.py +698 -0
- src/reachy_mini_receptionist/gemini_live.py +754 -0
- src/reachy_mini_receptionist/gradio_personality.py +316 -0
- src/reachy_mini_receptionist/headless_personality.py +102 -0
- src/reachy_mini_receptionist/headless_personality_ui.py +287 -0
- src/reachy_mini_receptionist/ical_calendar.py +248 -0
- src/reachy_mini_receptionist/images/reachymini_avatar.png +3 -0
- src/reachy_mini_receptionist/images/user_avatar.png +3 -0
- src/reachy_mini_receptionist/main.py +1199 -0
- src/reachy_mini_receptionist/moves.py +849 -0
- src/reachy_mini_receptionist/name_normalizer.py +228 -0
- src/reachy_mini_receptionist/openai_realtime.py +1839 -0
- src/reachy_mini_receptionist/profiles/__init__.py +1 -0
- src/reachy_mini_receptionist/profiles/_reachy_mini_receptionist_locked_profile/instructions.txt +57 -0
- src/reachy_mini_receptionist/profiles/_reachy_mini_receptionist_locked_profile/tools.txt +9 -0
- src/reachy_mini_receptionist/prompts.py +110 -0
- src/reachy_mini_receptionist/prompts/behaviors/silent_robot.txt +6 -0
.env.example
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
OPENAI_API_KEY=
|
| 2 |
+
MODEL_NAME="gpt-realtime-2"
|
| 3 |
+
|
| 4 |
+
# Local vision model (only used with --local-vision CLI flag)
|
| 5 |
+
# By default, vision is handled by gpt-realtime when the camera tool is used
|
| 6 |
+
LOCAL_VISION_MODEL=HuggingFaceTB/SmolVLM2-2.2B-Instruct
|
| 7 |
+
|
| 8 |
+
# Cache for local VLM (only used with --local-vision CLI flag)
|
| 9 |
+
HF_HOME=./cache
|
| 10 |
+
|
| 11 |
+
# Hugging Face token for accessing datasets/models
|
| 12 |
+
HF_TOKEN=
|
| 13 |
+
|
| 14 |
+
# Profile selection (ignored when LOCKED_PROFILE is set in config.py)
|
| 15 |
+
# REACHY_MINI_CUSTOM_PROFILE="example"
|
| 16 |
+
|
| 17 |
+
# Skip loading .env if you prefer environment-only configuration.
|
| 18 |
+
# REACHY_MINI_SKIP_DOTENV=1
|
| 19 |
+
|
| 20 |
+
# Optional external profile/tool directories
|
| 21 |
+
# REACHY_MINI_EXTERNAL_PROFILES_DIRECTORY=external_content/external_profiles
|
| 22 |
+
# REACHY_MINI_EXTERNAL_TOOLS_DIRECTORY=external_content/external_tools
|
| 23 |
+
|
| 24 |
+
# Optional: discover and auto-load all tools found in REACHY_MINI_EXTERNAL_TOOLS_DIRECTORY,
|
| 25 |
+
# even if they are not listed in the selected profile's tools.txt.
|
| 26 |
+
# This is convenient for downloaded tools used with built-in/default profiles.
|
| 27 |
+
# AUTOLOAD_EXTERNAL_TOOLS=1
|
| 28 |
+
|
| 29 |
+
# Resend transactional email API for the send_email tool.
|
| 30 |
+
# Get a free API key at https://resend.com/api-keys (3000 emails/month free).
|
| 31 |
+
# Without RESEND_API_KEY, send_email writes to the in-memory outbox only
|
| 32 |
+
# (visible in the dashboard's Mailbox Out panel, but nothing actually leaves
|
| 33 |
+
# the robot).
|
| 34 |
+
#
|
| 35 |
+
# RESEND_FROM defaults to "onboarding@resend.dev" — Resend's sandbox sender
|
| 36 |
+
# that ONLY delivers to the email address registered on your Resend account.
|
| 37 |
+
# For production / arbitrary recipients, verify a domain at
|
| 38 |
+
# https://resend.com/domains and set RESEND_FROM to an address on that
|
| 39 |
+
# domain (e.g. "noreply@methdai.com").
|
| 40 |
+
RESEND_API_KEY=
|
| 41 |
+
RESEND_FROM=onboarding@resend.dev
|
| 42 |
+
|
| 43 |
+
# ---- Reception calendar (Google Calendar via iCal) ----
|
| 44 |
+
# Set this to enable scheduled-visitor flow. The receptionist pulls today's
|
| 45 |
+
# appointments live from this URL (cached ~5 min). When unset, the bot
|
| 46 |
+
# serves walk-in visitors only — they say "I'm here to see X" and the bot
|
| 47 |
+
# routes via the employee directory (managed from the dashboard's
|
| 48 |
+
# Employees panel). There is no hardcoded demo schedule.
|
| 49 |
+
#
|
| 50 |
+
# To get a URL: in Google Calendar, create a calendar (e.g. "MethdAI
|
| 51 |
+
# Reception") -> Settings and sharing -> Integrate calendar ->
|
| 52 |
+
# "Public address in iCal format". Paste it below.
|
| 53 |
+
#
|
| 54 |
+
# Event title convention: "<Visitor name> with <Host name>"
|
| 55 |
+
# - "Rohan Verma with Mukul"
|
| 56 |
+
# - "Sara Khan with Priya — product demo follow-up"
|
| 57 |
+
# Host name matches the employee directory (employees.py); aliases work.
|
| 58 |
+
# An optional " — note" suffix after the host becomes the appointment note;
|
| 59 |
+
# alternatively put it in the event's DESCRIPTION field.
|
| 60 |
+
# RECEPTION_ICS_URL=https://calendar.google.com/calendar/ical/.../public/basic.ics
|
| 61 |
+
|
| 62 |
+
# Timezone used to display iCal event times on the dashboard and to the LLM.
|
| 63 |
+
# Must be a valid IANA tz name. Defaults to Asia/Kolkata (pilot deployment
|
| 64 |
+
# is in India). Set this when the robot OS is in a different tz than your
|
| 65 |
+
# operators expect to see times in. Common values:
|
| 66 |
+
# Asia/Kolkata India (IST, UTC+5:30)
|
| 67 |
+
# America/New_York US East Coast
|
| 68 |
+
# Europe/London UK
|
| 69 |
+
# Asia/Tokyo Japan
|
| 70 |
+
# RECEPTION_TIMEZONE=Asia/Kolkata
|
| 71 |
+
|
| 72 |
+
# ---- Privacy retention ----
|
| 73 |
+
# Guest face crops in guests/*.png older than this many days are deleted
|
| 74 |
+
# at app startup. Set to 0 to disable (keep faces forever until FIFO
|
| 75 |
+
# capacity eviction kicks in at 100).
|
| 76 |
+
FACE_TTL_DAYS=30
|
| 77 |
+
# Visit rows in visitor_log.db older than this many days are deleted at
|
| 78 |
+
# app startup. Set to 0 to disable (unbounded growth).
|
| 79 |
+
VISITOR_LOG_RETENTION_DAYS=90
|
| 80 |
+
|
| 81 |
+
# LBPH face-recognition strictness. LOWER = stricter (fewer false matches,
|
| 82 |
+
# more "I don't recognise you, please tell me your name" prompts).
|
| 83 |
+
# 50 - 75 recommended for production (default 75)
|
| 84 |
+
# 75 - 100 permissive (some lighting/angle variance OK)
|
| 85 |
+
# 100 - 110 old default — produces frequent false matches
|
| 86 |
+
# If returning guests stop being recognised, raise by 10 and re-test.
|
| 87 |
+
# If strangers get greeted as someone else, lower by 10.
|
| 88 |
+
FACE_LBPH_THRESHOLD=75
|
.gitattributes
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Force LF line endings for all text files. Without this, Windows
|
| 2 |
+
# checkouts produce CRLF files that crash Python 3.12 when imported on
|
| 3 |
+
# Linux (observed: "from __future__ imports must occur at the
|
| 4 |
+
# beginning of the file" SyntaxError from background_tool_manager.py).
|
| 5 |
+
* text=auto eol=lf
|
| 6 |
+
*.py text eol=lf
|
| 7 |
+
*.txt text eol=lf
|
| 8 |
+
*.md text eol=lf
|
| 9 |
+
*.json text eol=lf
|
| 10 |
+
*.yaml text eol=lf
|
| 11 |
+
*.yml text eol=lf
|
| 12 |
+
*.toml text eol=lf
|
| 13 |
+
*.html text eol=lf
|
| 14 |
+
*.css text eol=lf
|
| 15 |
+
*.js text eol=lf
|
| 16 |
+
*.ics text eol=lf
|
| 17 |
+
*.sh text eol=lf
|
| 18 |
+
*.cfg text eol=lf
|
| 19 |
+
*.ini text eol=lf
|
| 20 |
+
|
| 21 |
+
# Macro for all binary files that should use Git LFS.
|
| 22 |
+
[attr]lfs -text filter=lfs diff=lfs merge=lfs
|
| 23 |
+
|
| 24 |
+
# Image
|
| 25 |
+
*.jpg lfs
|
| 26 |
+
*.jpeg lfs
|
| 27 |
+
*.png lfs
|
| 28 |
+
*.apng lfs
|
| 29 |
+
*.atsc lfs
|
| 30 |
+
*.gif lfs
|
| 31 |
+
*.bmp lfs
|
| 32 |
+
*.exr lfs
|
| 33 |
+
*.tga lfs
|
| 34 |
+
*.tiff lfs
|
| 35 |
+
*.tif lfs
|
| 36 |
+
*.iff lfs
|
| 37 |
+
*.pict lfs
|
| 38 |
+
*.dds lfs
|
| 39 |
+
*.xcf lfs
|
| 40 |
+
*.leo lfs
|
| 41 |
+
*.kra lfs
|
| 42 |
+
*.kpp lfs
|
| 43 |
+
*.clip lfs
|
| 44 |
+
*.webm lfs
|
| 45 |
+
*.webp lfs
|
| 46 |
+
*.svg lfs
|
| 47 |
+
*.svgz lfs
|
| 48 |
+
*.psd lfs
|
| 49 |
+
*.afphoto lfs
|
| 50 |
+
*.afdesign lfs
|
| 51 |
+
# Models
|
| 52 |
+
*.pth lfs
|
| 53 |
+
# Binaries
|
| 54 |
+
*.bin lfs
|
| 55 |
+
*.pkl lfs
|
| 56 |
+
*.pckl lfs
|
| 57 |
+
# 3D
|
| 58 |
+
*.ply lfs
|
| 59 |
+
*.vis lfs
|
| 60 |
+
*.db lfs
|
| 61 |
+
*.ply lfs
|
| 62 |
+
.git_disabled/lfs/objects/5a/63/5a63ac8802ff3542f01292c431c5278296880d74cd3580d219fcf4827bc235f9 filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
.git_disabled/lfs/objects/75/91/75914c3cb7af982e0b1c6369e25fc46d8c08a0ab5ad022240ae9c1a0d93967c3 filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
.git_disabled/lfs/objects/e9/7c/e97ca125a86bacdaa41c8dca88abd9ca746fd5c9391eda24249c012432b0219b filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
.git_disabled/objects/pack/pack-ba33ec9fbb4d88d9fd0f2be18721a74ddb3ca16f.pack filter=lfs diff=lfs merge=lfs -text
|
| 66 |
+
build/lib/reachy_mini_receptionist/images/reachymini_avatar.png filter=lfs diff=lfs merge=lfs -text
|
| 67 |
+
build/lib/reachy_mini_receptionist/images/user_avatar.png filter=lfs diff=lfs merge=lfs -text
|
| 68 |
+
docs/assets/reachy_mini_dance.gif filter=lfs diff=lfs merge=lfs -text
|
| 69 |
+
screenshot.png filter=lfs diff=lfs merge=lfs -text
|
| 70 |
+
src/reachy_mini_receptionist/images/reachymini_avatar.png filter=lfs diff=lfs merge=lfs -text
|
| 71 |
+
src/reachy_mini_receptionist/images/user_avatar.png filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
*.so
|
| 6 |
+
|
| 7 |
+
# Virtual environments
|
| 8 |
+
.venv/
|
| 9 |
+
venv/
|
| 10 |
+
ENV/
|
| 11 |
+
env/
|
| 12 |
+
|
| 13 |
+
# Environment variables
|
| 14 |
+
.env
|
| 15 |
+
|
| 16 |
+
# Build and distribution
|
| 17 |
+
build/
|
| 18 |
+
dist/
|
| 19 |
+
*.egg-info/
|
| 20 |
+
.eggs/
|
| 21 |
+
|
| 22 |
+
# Testing
|
| 23 |
+
.pytest_cache/
|
| 24 |
+
.coverage
|
| 25 |
+
.hypothesis/
|
| 26 |
+
htmlcov/
|
| 27 |
+
coverage.xml
|
| 28 |
+
*.cover
|
| 29 |
+
|
| 30 |
+
# Linting and formatting
|
| 31 |
+
.ruff_cache/
|
| 32 |
+
.mypy_cache/
|
| 33 |
+
|
| 34 |
+
# IDE
|
| 35 |
+
.vscode/
|
| 36 |
+
.idea/
|
| 37 |
+
*.swp
|
| 38 |
+
*.swo
|
| 39 |
+
|
| 40 |
+
# Editor / IDE local settings (user-specific configuration)
|
| 41 |
+
.claude/
|
| 42 |
+
.cursor/
|
| 43 |
+
.vscode/
|
| 44 |
+
.idea/
|
| 45 |
+
|
| 46 |
+
# Security
|
| 47 |
+
*.key
|
| 48 |
+
*.pem
|
| 49 |
+
*.crt
|
| 50 |
+
*.csr
|
| 51 |
+
|
| 52 |
+
# Temporary files
|
| 53 |
+
tmp/
|
| 54 |
+
*.log
|
| 55 |
+
cache/
|
| 56 |
+
|
| 57 |
+
# macOS
|
| 58 |
+
.DS_Store
|
| 59 |
+
|
| 60 |
+
# Linux
|
| 61 |
+
*~
|
| 62 |
+
.directory
|
| 63 |
+
.Trash-*
|
| 64 |
+
.nfs*
|
| 65 |
+
|
| 66 |
+
# User-created personalities (managed by UI)
|
| 67 |
+
src/reachy_mini_receptionist/profiles/user_personalities/
|
| 68 |
+
|
| 69 |
+
# Runtime data (recreated on first run)
|
| 70 |
+
*.db
|
| 71 |
+
*.db-wal
|
| 72 |
+
*.db-shm
|
| 73 |
+
src/reachy_mini_receptionist/guests/*.png
|
| 74 |
+
.env.save
|
| 75 |
+
src/reachy_mini_receptionist.egg-info/
|
| 76 |
+
|
| 77 |
+
# Old git backup directories
|
| 78 |
+
.git_disabled/
|
.hfignore
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Local environment and secrets
|
| 2 |
+
.env
|
| 3 |
+
src/reachy_mini_receptionist/.env
|
| 4 |
+
|
| 5 |
+
# Python caches and build outputs
|
| 6 |
+
__pycache__/
|
| 7 |
+
*.py[cod]
|
| 8 |
+
.pytest_cache/
|
| 9 |
+
.ruff_cache/
|
| 10 |
+
.mypy_cache/
|
| 11 |
+
build/
|
| 12 |
+
dist/
|
| 13 |
+
*.egg-info/
|
| 14 |
+
|
| 15 |
+
# Local VCS artifacts that should never ship in published bundles
|
| 16 |
+
.git/
|
| 17 |
+
.git_disabled/
|
| 18 |
+
|
| 19 |
+
# Local virtual environments
|
| 20 |
+
.venv/
|
| 21 |
+
venv/
|
| 22 |
+
ENV/
|
| 23 |
+
env/
|
LICENSE
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Apache License
|
| 2 |
+
Version 2.0, January 2004
|
| 3 |
+
http://www.apache.org/licenses/
|
| 4 |
+
|
| 5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
| 6 |
+
|
| 7 |
+
1. Definitions.
|
| 8 |
+
|
| 9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
| 10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
| 11 |
+
|
| 12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
| 13 |
+
the copyright owner that is granting the License.
|
| 14 |
+
|
| 15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
| 16 |
+
other entities that control, are controlled by, or are under common
|
| 17 |
+
control with that entity. For the purposes of this definition,
|
| 18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
| 19 |
+
direction or management of such entity, whether by contract or
|
| 20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
| 21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
| 22 |
+
|
| 23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
| 24 |
+
exercising permissions granted by this License.
|
| 25 |
+
|
| 26 |
+
"Source" form shall mean the preferred form for making modifications,
|
| 27 |
+
including but not limited to software source code, documentation
|
| 28 |
+
source, and configuration files.
|
| 29 |
+
|
| 30 |
+
"Object" form shall mean any form resulting from mechanical
|
| 31 |
+
transformation or translation of a Source form, including but
|
| 32 |
+
not limited to compiled object code, generated documentation,
|
| 33 |
+
and conversions to other media types.
|
| 34 |
+
|
| 35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
| 36 |
+
Object form, made available under the License, as indicated by a
|
| 37 |
+
copyright notice that is included in or attached to the work
|
| 38 |
+
(an example is provided in the Appendix below).
|
| 39 |
+
|
| 40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
| 41 |
+
form, that is based on (or derived from) the Work and for which the
|
| 42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
| 43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
| 44 |
+
of this License, Derivative Works shall not include works that remain
|
| 45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
| 46 |
+
the Work and Derivative Works thereof.
|
| 47 |
+
|
| 48 |
+
"Contribution" shall mean any work of authorship, including
|
| 49 |
+
the original version of the Work and any modifications or additions
|
| 50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
| 51 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
| 52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
| 53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
| 54 |
+
means any form of electronic, verbal, or written communication sent
|
| 55 |
+
to the Licensor or its representatives, including but not limited to
|
| 56 |
+
communication on electronic mailing lists, source code control systems,
|
| 57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
| 58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
| 59 |
+
excluding communication that is conspicuously marked or otherwise
|
| 60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
| 61 |
+
|
| 62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
| 63 |
+
on behalf of whom a Contribution has been received by Licensor and
|
| 64 |
+
subsequently incorporated within the Work.
|
| 65 |
+
|
| 66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
| 67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
| 68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
| 70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
| 71 |
+
Work and such Derivative Works in Source or Object form.
|
| 72 |
+
|
| 73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
| 74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
| 75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 76 |
+
(except as stated in this section) patent license to make, have made,
|
| 77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
| 78 |
+
where such license applies only to those patent claims licensable
|
| 79 |
+
by such Contributor that are necessarily infringed by their
|
| 80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
| 81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
| 82 |
+
institute patent litigation against any entity (including a
|
| 83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
| 84 |
+
or a Contribution incorporated within the Work constitutes direct
|
| 85 |
+
or contributory patent infringement, then any patent licenses
|
| 86 |
+
granted to You under this License for that Work shall terminate
|
| 87 |
+
as of the date such litigation is filed.
|
| 88 |
+
|
| 89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
| 90 |
+
Work or Derivative Works thereof in any medium, with or without
|
| 91 |
+
modifications, and in Source or Object form, provided that You
|
| 92 |
+
meet the following conditions:
|
| 93 |
+
|
| 94 |
+
(a) You must give any other recipients of the Work or
|
| 95 |
+
Derivative Works a copy of this License; and
|
| 96 |
+
|
| 97 |
+
(b) You must cause any modified files to carry prominent notices
|
| 98 |
+
stating that You changed the files; and
|
| 99 |
+
|
| 100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
| 101 |
+
that You distribute, all copyright, patent, trademark, and
|
| 102 |
+
attribution notices from the Source form of the Work,
|
| 103 |
+
excluding those notices that do not pertain to any part of
|
| 104 |
+
the Derivative Works; and
|
| 105 |
+
|
| 106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
| 107 |
+
distribution, then any Derivative Works that You distribute must
|
| 108 |
+
include a readable copy of the attribution notices contained
|
| 109 |
+
within such NOTICE file, excluding those notices that do not
|
| 110 |
+
pertain to any part of the Derivative Works, in at least one
|
| 111 |
+
of the following places: within a NOTICE text file distributed
|
| 112 |
+
as part of the Derivative Works; within the Source form or
|
| 113 |
+
documentation, if provided along with the Derivative Works; or,
|
| 114 |
+
within a display generated by the Derivative Works, if and
|
| 115 |
+
wherever such third-party notices normally appear. The contents
|
| 116 |
+
of the NOTICE file are for informational purposes only and
|
| 117 |
+
do not modify the License. You may add Your own attribution
|
| 118 |
+
notices within Derivative Works that You distribute, alongside
|
| 119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
| 120 |
+
that such additional attribution notices cannot be construed
|
| 121 |
+
as modifying the License.
|
| 122 |
+
|
| 123 |
+
You may add Your own copyright statement to Your modifications and
|
| 124 |
+
may provide additional or different license terms and conditions
|
| 125 |
+
for use, reproduction, or distribution of Your modifications, or
|
| 126 |
+
for any such Derivative Works as a whole, provided Your use,
|
| 127 |
+
reproduction, and distribution of the Work otherwise complies with
|
| 128 |
+
the conditions stated in this License.
|
| 129 |
+
|
| 130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
| 131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
| 132 |
+
by You to the Licensor shall be under the terms and conditions of
|
| 133 |
+
this License, without any additional terms or conditions.
|
| 134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
| 135 |
+
the terms of any separate license agreement you may have executed
|
| 136 |
+
with Licensor regarding such Contributions.
|
| 137 |
+
|
| 138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
| 139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
| 140 |
+
except as required for reasonable and customary use in describing the
|
| 141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
| 142 |
+
|
| 143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
| 144 |
+
agreed to in writing, Licensor provides the Work (and each
|
| 145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
| 146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
| 147 |
+
implied, including, without limitation, any warranties or conditions
|
| 148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
| 149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
| 150 |
+
appropriateness of using or redistributing the Work and assume any
|
| 151 |
+
risks associated with Your exercise of permissions under this License.
|
| 152 |
+
|
| 153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
| 154 |
+
whether in tort (including negligence), contract, or otherwise,
|
| 155 |
+
unless required by applicable law (such as deliberate and grossly
|
| 156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
| 157 |
+
liable to You for damages, including any direct, indirect, special,
|
| 158 |
+
incidental, or consequential damages of any character arising as a
|
| 159 |
+
result of this License or out of the use or inability to use the
|
| 160 |
+
Work (including but not limited to damages for loss of goodwill,
|
| 161 |
+
work stoppage, computer failure or malfunction, or any and all
|
| 162 |
+
other commercial damages or losses), even if such Contributor
|
| 163 |
+
has been advised of the possibility of such damages.
|
| 164 |
+
|
| 165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
| 166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
| 167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
| 168 |
+
or other liability obligations and/or rights consistent with this
|
| 169 |
+
License. However, in accepting such obligations, You may act only
|
| 170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
| 171 |
+
of any other Contributor, and only if You agree to indemnify,
|
| 172 |
+
defend, and hold each Contributor harmless for any liability
|
| 173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
| 174 |
+
of your accepting any such warranty or additional liability.
|
| 175 |
+
|
| 176 |
+
END OF TERMS AND CONDITIONS
|
| 177 |
+
|
| 178 |
+
APPENDIX: How to apply the Apache License to your work.
|
| 179 |
+
|
| 180 |
+
To apply the Apache License to your work, attach the following
|
| 181 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
| 182 |
+
replaced with your own identifying information. (Don't include
|
| 183 |
+
the brackets!) The text should be enclosed in the appropriate
|
| 184 |
+
comment syntax for the file format. We also recommend that a
|
| 185 |
+
file or class name and description of purpose be included on the
|
| 186 |
+
same "printed page" as the copyright notice for easier
|
| 187 |
+
identification within third-party archives.
|
| 188 |
+
|
| 189 |
+
Copyright [yyyy] [name of copyright owner]
|
| 190 |
+
|
| 191 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
| 192 |
+
you may not use this file except in compliance with the License.
|
| 193 |
+
You may obtain a copy of the License at
|
| 194 |
+
|
| 195 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
| 196 |
+
|
| 197 |
+
Unless required by applicable law or agreed to in writing, software
|
| 198 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
| 199 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 200 |
+
See the License for the specific language governing permissions and
|
| 201 |
+
limitations under the License.
|
README.md
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: MethdAI Receptionist
|
| 3 |
+
emoji: 🤖
|
| 4 |
+
colorFrom: indigo
|
| 5 |
+
colorTo: purple
|
| 6 |
+
sdk: gradio
|
| 7 |
+
app_file: src/reachy_mini_receptionist/main.py
|
| 8 |
+
pinned: false
|
| 9 |
+
license: apache-2.0
|
| 10 |
+
short_description: Voice-driven receptionist for Reachy Mini — face recognition, real email, live ops dashboard.
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
# MethdAI Receptionist
|
| 14 |
+
|
| 15 |
+
AI receptionist for the **Reachy Mini** robot. A visitor walks up, the bot greets them, asks who they're here to see, looks the host up in the directory, and emails the host that their guest has arrived. Returning visitors are recognized by face on their next visit.
|
| 16 |
+
|
| 17 |
+
Built by **[MethdAI](https://methdai.com)**.
|
| 18 |
+
|
| 19 |
+
---
|
| 20 |
+
|
| 21 |
+
## What it does
|
| 22 |
+
|
| 23 |
+
- **Recognizes faces** — YuNet detection + LBPH recognition. New visitors get registered after they confirm their name.
|
| 24 |
+
- **Talks naturally** — Google Gemini Live (default) or OpenAI Realtime, voice-to-voice over the robot's mic/speaker.
|
| 25 |
+
- **Reads the calendar** — pulls today's schedule from a Google Calendar iCal feed.
|
| 26 |
+
- **Emails the host** — sends real notifications via Resend.
|
| 27 |
+
- **Logs every visit** — SQLite, exportable as CSV.
|
| 28 |
+
- **Configured from a browser** — no `.env` editing, no SSH for routine config.
|
| 29 |
+
|
| 30 |
+
---
|
| 31 |
+
|
| 32 |
+
## Hardware required
|
| 33 |
+
|
| 34 |
+
A real **[Reachy Mini](https://www.pollen-robotics.com/reachy-mini/)** robot from Pollen Robotics. The code expects the robot's camera, microphone, and speaker — there's no cloud-only mode.
|
| 35 |
+
|
| 36 |
+
---
|
| 37 |
+
|
| 38 |
+
## Quick start
|
| 39 |
+
|
| 40 |
+
### Option 1 — Install via the Reachy Mini Control app (recommended)
|
| 41 |
+
|
| 42 |
+
1. Open Reachy Mini Control on your computer
|
| 43 |
+
2. **Install from Hugging Face** → search `methdai/reachy_mini_receptionist` → Install
|
| 44 |
+
3. Toggle the app **On**
|
| 45 |
+
4. Open the dashboard at `http://<your-robot-hostname>.local:7860/dashboard`
|
| 46 |
+
5. Follow the welcome banner — it tells you exactly which API keys to add and where to find them
|
| 47 |
+
|
| 48 |
+
### Option 2 — Manual install (for development)
|
| 49 |
+
|
| 50 |
+
```bash
|
| 51 |
+
# SSH into the robot
|
| 52 |
+
git clone git@github.com:methdai/reachy_mini_receptionist.git
|
| 53 |
+
cd reachy_mini_receptionist
|
| 54 |
+
|
| 55 |
+
# Install the package in editable mode
|
| 56 |
+
/venvs/apps_venv/bin/pip install -e .
|
| 57 |
+
|
| 58 |
+
# Make sure the Reachy Mini daemon is running
|
| 59 |
+
sudo systemctl status reachy-mini-daemon
|
| 60 |
+
|
| 61 |
+
# Start the app
|
| 62 |
+
/venvs/apps_venv/bin/python -m reachy_mini_receptionist.main
|
| 63 |
+
```
|
| 64 |
+
|
| 65 |
+
Then open `http://localhost:7860/dashboard` in any browser on the same network.
|
| 66 |
+
|
| 67 |
+
---
|
| 68 |
+
|
| 69 |
+
## Configuration
|
| 70 |
+
|
| 71 |
+
**Everything is editable from the dashboard's Settings panel.** No SSH, no `.env` edits.
|
| 72 |
+
|
| 73 |
+
The welcome banner on first launch tells you which keys are missing. Click each banner item → it scrolls you to the right field.
|
| 74 |
+
|
| 75 |
+
### Required for full functionality
|
| 76 |
+
|
| 77 |
+
| Setting | Why | Where to get it |
|
| 78 |
+
|---|---|---|
|
| 79 |
+
| `GEMINI_API_KEY` | Voice (default backend) | [aistudio.google.com/app/apikey](https://aistudio.google.com/app/apikey) — free tier works |
|
| 80 |
+
| `RESEND_API_KEY` | Send emails to hosts | [resend.com](https://resend.com) — free 3000 emails/month |
|
| 81 |
+
| `RECEPTION_ICS_URL` | Read today's appointments | Google Calendar → Settings → **Secret address in iCal format** |
|
| 82 |
+
|
| 83 |
+
### Optional
|
| 84 |
+
|
| 85 |
+
| Setting | Purpose |
|
| 86 |
+
|---|---|
|
| 87 |
+
| `VOICE_BACKEND` | Switch between `gemini` (default) and `openai` |
|
| 88 |
+
| `GEMINI_LIVE_VOICE` | Pick a voice — Puck, Charon, Kore, Aoede, etc. |
|
| 89 |
+
| `GEMINI_LIVE_MODEL` | Override the default Gemini Live model |
|
| 90 |
+
| `OPENAI_API_KEY` | Required only if `VOICE_BACKEND=openai` |
|
| 91 |
+
| `RESEND_FROM` | Sender address — defaults to Resend's sandbox sender (delivers only to your Resend account); set to `reception@yourdomain.com` after verifying your domain at Resend |
|
| 92 |
+
| `FACE_TTL_DAYS` | How long a registered face is remembered (default 90 days) |
|
| 93 |
+
| `VISITOR_LOG_RETENTION_DAYS` | How long visit records are kept (default 365 days) |
|
| 94 |
+
|
| 95 |
+
---
|
| 96 |
+
|
| 97 |
+
## How it works
|
| 98 |
+
|
| 99 |
+
**Vision** — `face_recognition_worker.py` runs YuNet on every frame, detects faces, and feeds crops to LBPH for recognition.
|
| 100 |
+
|
| 101 |
+
**State machine** — `session_manager.py` tracks the current visitor (`idle → visitor_detected → recognized / asking_name → appointment_matched → notified`). `conversation_controller.py` decides what state to move to based on face events and tool results.
|
| 102 |
+
|
| 103 |
+
**Voice + tools** — Either `gemini_live.py` or `openai_realtime.py` (chosen by `VOICE_BACKEND`) handles bi-directional audio with the LLM. Tools available to the LLM:
|
| 104 |
+
- `get_today_calendar` — fetch today's appointments
|
| 105 |
+
- `register_guest` — save a new visitor's face under a name (requires confirmation)
|
| 106 |
+
- `lookup_employee` — find a host in the directory
|
| 107 |
+
- `send_email` — notify the host
|
| 108 |
+
|
| 109 |
+
**Persistence** — three SQLite databases (WAL mode):
|
| 110 |
+
- `employees.db` — the directory you edit in the dashboard
|
| 111 |
+
- `visitor_log.db` — every completed visit, exportable as CSV
|
| 112 |
+
- `guests/` directory — saved face crops, one PNG per visitor
|
| 113 |
+
|
| 114 |
+
---
|
| 115 |
+
|
| 116 |
+
## Dashboard
|
| 117 |
+
|
| 118 |
+
Open `http://<robot>:7860/dashboard` while the app is running.
|
| 119 |
+
|
| 120 |
+
**Live view** — visit count, last visitor, live camera feed, face recognition status.
|
| 121 |
+
**Active Session** — what state the current visitor is in, what the bot heard, what cue it sent to the LLM.
|
| 122 |
+
**Today** — appointments from the calendar, known guests, recent outgoing emails.
|
| 123 |
+
**History** — full visitor log with CSV export and per-row delete.
|
| 124 |
+
**Employees** — add / edit / delete people the bot can notify.
|
| 125 |
+
**Settings** — every environment variable in one place. Toggle dark/light theme from the header.
|
| 126 |
+
|
| 127 |
+
---
|
| 128 |
+
|
| 129 |
+
## Development
|
| 130 |
+
|
| 131 |
+
```bash
|
| 132 |
+
# Install dev dependencies
|
| 133 |
+
/venvs/apps_venv/bin/pip install -e ".[dev]"
|
| 134 |
+
|
| 135 |
+
# Run tests
|
| 136 |
+
pytest
|
| 137 |
+
|
| 138 |
+
# Type-check
|
| 139 |
+
mypy src/
|
| 140 |
+
|
| 141 |
+
# Format / lint
|
| 142 |
+
ruff check src/
|
| 143 |
+
ruff format src/
|
| 144 |
+
```
|
| 145 |
+
|
| 146 |
+
The codebase is single-process Python 3.10+. Audio streams over WebRTC via `fastrtc`. The dashboard is a single static HTML file in `src/reachy_mini_receptionist/static/`.
|
| 147 |
+
|
| 148 |
+
---
|
| 149 |
+
|
| 150 |
+
## License
|
| 151 |
+
|
| 152 |
+
Apache-2.0
|
deploy/install_systemd.sh
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
#
|
| 3 |
+
# Install (or upgrade) the MethdAI Receptionist systemd unit on the robot.
|
| 4 |
+
# Run on the robot itself (e.g. via ssh pollen@reachy-mini.local).
|
| 5 |
+
#
|
| 6 |
+
# Idempotent: re-running picks up edits to reachy-receptionist.service and
|
| 7 |
+
# restarts the live service.
|
| 8 |
+
#
|
| 9 |
+
# After install, useful commands:
|
| 10 |
+
# systemctl status reachy-receptionist
|
| 11 |
+
# journalctl -u reachy-receptionist -f
|
| 12 |
+
# sudo systemctl restart reachy-receptionist
|
| 13 |
+
# sudo systemctl disable --now reachy-receptionist
|
| 14 |
+
#
|
| 15 |
+
set -euo pipefail
|
| 16 |
+
|
| 17 |
+
UNIT_NAME="reachy-receptionist.service"
|
| 18 |
+
HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
| 19 |
+
SRC="$HERE/$UNIT_NAME"
|
| 20 |
+
DST="/etc/systemd/system/$UNIT_NAME"
|
| 21 |
+
|
| 22 |
+
if [[ ! -f "$SRC" ]]; then
|
| 23 |
+
echo "✗ Unit file not found at $SRC" >&2
|
| 24 |
+
exit 1
|
| 25 |
+
fi
|
| 26 |
+
|
| 27 |
+
# Re-elevate via `sudo bash <abs-path>` so we don't depend on the script
|
| 28 |
+
# having the executable bit set (git-from-Windows often drops it, and we
|
| 29 |
+
# want `bash deploy/install_systemd.sh` to Just Work).
|
| 30 |
+
if [[ "$(id -u)" -ne 0 ]]; then
|
| 31 |
+
echo "↻ Re-running with sudo..."
|
| 32 |
+
exec sudo --preserve-env=HOME bash "$HERE/$(basename "${BASH_SOURCE[0]}")" "$@"
|
| 33 |
+
fi
|
| 34 |
+
|
| 35 |
+
# Sanity check the runtime paths referenced by the unit file. Catching this
|
| 36 |
+
# now beats debugging a cryptic "ExecStart failed" later.
|
| 37 |
+
PYTHON_BIN="/venvs/apps_venv/bin/python"
|
| 38 |
+
PROJECT_DIR="/home/pollen/reachy_mini_receptionist"
|
| 39 |
+
if [[ ! -x "$PYTHON_BIN" ]]; then
|
| 40 |
+
echo "✗ Python interpreter not found at $PYTHON_BIN" >&2
|
| 41 |
+
echo " Fix: install / locate the apps_venv before re-running." >&2
|
| 42 |
+
exit 2
|
| 43 |
+
fi
|
| 44 |
+
if [[ ! -d "$PROJECT_DIR" ]]; then
|
| 45 |
+
echo "✗ Project not found at $PROJECT_DIR" >&2
|
| 46 |
+
exit 3
|
| 47 |
+
fi
|
| 48 |
+
|
| 49 |
+
echo "→ Installing $UNIT_NAME → $DST"
|
| 50 |
+
cp "$SRC" "$DST"
|
| 51 |
+
chmod 644 "$DST"
|
| 52 |
+
|
| 53 |
+
echo "→ Reloading systemd"
|
| 54 |
+
systemctl daemon-reload
|
| 55 |
+
|
| 56 |
+
echo "→ Enabling on boot"
|
| 57 |
+
systemctl enable "$UNIT_NAME"
|
| 58 |
+
|
| 59 |
+
echo "→ Restarting service"
|
| 60 |
+
systemctl restart "$UNIT_NAME"
|
| 61 |
+
|
| 62 |
+
# Brief wait so the status snapshot below shows a meaningful state
|
| 63 |
+
sleep 2
|
| 64 |
+
|
| 65 |
+
echo
|
| 66 |
+
echo "✓ Installed. Current status:"
|
| 67 |
+
echo "─────────────────────────────────────────"
|
| 68 |
+
systemctl --no-pager --lines=10 status "$UNIT_NAME" || true
|
| 69 |
+
echo "─────────────────────────────────────────"
|
| 70 |
+
echo
|
| 71 |
+
echo "Follow logs: journalctl -u $UNIT_NAME -f"
|
| 72 |
+
echo "Manual restart: sudo systemctl restart $UNIT_NAME"
|
| 73 |
+
echo "Stop: sudo systemctl stop $UNIT_NAME"
|
| 74 |
+
echo "Disable: sudo systemctl disable --now $UNIT_NAME"
|
deploy/reachy-receptionist.service
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[Unit]
|
| 2 |
+
Description=MethdAI Receptionist (Reachy Mini)
|
| 3 |
+
Documentation=https://github.com/mukul-chauhan-methdai/reachy_mini_receptionist
|
| 4 |
+
After=network-online.target
|
| 5 |
+
Wants=network-online.target
|
| 6 |
+
# If the receptionist can't come up after 10 attempts in 10 minutes, stop
|
| 7 |
+
# retrying so we don't hammer the OpenAI / Resend / camera APIs on a
|
| 8 |
+
# permanent failure.
|
| 9 |
+
StartLimitBurst=10
|
| 10 |
+
StartLimitIntervalSec=600
|
| 11 |
+
|
| 12 |
+
[Service]
|
| 13 |
+
Type=simple
|
| 14 |
+
User=pollen
|
| 15 |
+
Group=pollen
|
| 16 |
+
WorkingDirectory=/home/pollen/reachy_mini_receptionist
|
| 17 |
+
|
| 18 |
+
# Load .env into the service environment. The "-" prefix makes the file
|
| 19 |
+
# optional — missing .env logs a warning but doesn't block start.
|
| 20 |
+
EnvironmentFile=-/home/pollen/reachy_mini_receptionist/.env
|
| 21 |
+
|
| 22 |
+
# Best-effort: wake the reachy_mini daemon before launching the app.
|
| 23 |
+
# Retries every 3s for up to 30s. We always exit 0 so a failed wake
|
| 24 |
+
# doesn't kill the unit before ExecStart; the app itself will crash and
|
| 25 |
+
# systemd will restart it if the daemon is genuinely down.
|
| 26 |
+
ExecStartPre=/bin/bash -c 'for i in $(seq 1 10); do curl -fsS -X POST "http://localhost:8000/api/daemon/start?wake_up=true" >/dev/null && exit 0; sleep 3; done; exit 0'
|
| 27 |
+
|
| 28 |
+
ExecStart=/venvs/apps_venv/bin/python -m reachy_mini_receptionist.main
|
| 29 |
+
|
| 30 |
+
Restart=on-failure
|
| 31 |
+
RestartSec=5
|
| 32 |
+
|
| 33 |
+
# Capture stdout/stderr in the journal — view with:
|
| 34 |
+
# journalctl -u reachy-receptionist -f
|
| 35 |
+
StandardOutput=journal
|
| 36 |
+
StandardError=journal
|
| 37 |
+
|
| 38 |
+
[Install]
|
| 39 |
+
WantedBy=multi-user.target
|
docs/assets/conversation_app_arch.svg
ADDED
|
|
Git LFS Details
|
docs/assets/reachy_mini_dance.gif
ADDED
|
Git LFS Details
|
docs/scheme.mmd
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
config:
|
| 3 |
+
layout: dagre
|
| 4 |
+
flowchart:
|
| 5 |
+
htmlLabels: true
|
| 6 |
+
---
|
| 7 |
+
flowchart TB
|
| 8 |
+
User(["<span style='font-size:16px;font-weight:bold;'>User</span><br><span style='font-size:13px;color:#01579b;'>Person interacting with system</span>"])
|
| 9 |
+
-- audio stream -->
|
| 10 |
+
UI@{ label: "<span style='font-size:16px;font-weight:bold;'>UI Layer</span><br><span style='font-size:13px;color:#0277bd;'>Gradio/Console</span>" }
|
| 11 |
+
|
| 12 |
+
UI -- audio stream -->
|
| 13 |
+
OpenAI@{ label: "<span style='font-size:17px;font-weight:bold;'>gpt-realtime API</span><br><span style='font-size:13px; color:#7b1fa2;'>Audio+Tool Calls+Vision</span>" }
|
| 14 |
+
|
| 15 |
+
OpenAI -- audio stream -->
|
| 16 |
+
Motion@{ label: "<span style='font-size:16px;font-weight:bold;'>Motion Control</span><br><span style='font-size:13px;color:#f57f17;'>Audio Sync + Tracking</span>" }
|
| 17 |
+
|
| 18 |
+
OpenAI -- tool calls -->
|
| 19 |
+
Handlers@{ label: "<span style='font-size:16px;font-weight:bold;'>Tool Layer</span><br><span style='font-size:12px;color:#f9a825;'>Built-in tools + profile-local tools<br/>+ external tools (optional)</span>" }
|
| 20 |
+
|
| 21 |
+
Profiles@{ label: "<span style='font-size:16px;font-weight:bold;'>Selected Profile</span><br><span style='font-size:12px;color:#6a1b9a;'>built-in or external<br/>instructions.txt + tools.txt</span>" }
|
| 22 |
+
|
| 23 |
+
Profiles -- defines enabled tools --> Handlers
|
| 24 |
+
|
| 25 |
+
Handlers -- movement
|
| 26 |
+
requests --> Motion
|
| 27 |
+
|
| 28 |
+
Handlers -- camera frames, head tracking -->
|
| 29 |
+
Camera@{ label: "<span style='font-size:16px;font-weight:bold;'>Camera Worker</span><br><span style='font-size:13px;color:#f57f17;'>Frame Buffer + Head Tracking</span>" }
|
| 30 |
+
|
| 31 |
+
Handlers -. image for
|
| 32 |
+
analysis .-> OpenAI
|
| 33 |
+
|
| 34 |
+
Camera -- head tracking --> Motion
|
| 35 |
+
|
| 36 |
+
Camera -. frames .->
|
| 37 |
+
Vision@{ label: "<span style='font-size:16px;font-weight:bold;'>Vision Processor</span><br><span style='font-size:13px;color:#7b1fa2;'>Local VLM (optional)</span>" }
|
| 38 |
+
|
| 39 |
+
Vision -. description .-> Handlers
|
| 40 |
+
|
| 41 |
+
Robot@{ label: "<span style='font-size:16px;font-weight:bold;'>reachy_mini</span><br><span style='font-size:13px;color:#c62828;'>Robot Control Library</span>" }
|
| 42 |
+
-- camera
|
| 43 |
+
frames --> Camera
|
| 44 |
+
|
| 45 |
+
Motion -- commands --> Robot
|
| 46 |
+
|
| 47 |
+
Handlers -- results --> OpenAI
|
| 48 |
+
|
| 49 |
+
User:::userStyle
|
| 50 |
+
UI:::uiStyle
|
| 51 |
+
OpenAI:::aiStyle
|
| 52 |
+
Motion:::coreStyle
|
| 53 |
+
Profiles:::toolStyle
|
| 54 |
+
Handlers:::toolStyle
|
| 55 |
+
Camera:::coreStyle
|
| 56 |
+
Vision:::aiStyle
|
| 57 |
+
Robot:::hardwareStyle
|
| 58 |
+
classDef userStyle fill:#e1f5fe,stroke:#01579b,stroke-width:3px
|
| 59 |
+
classDef uiStyle fill:#b3e5fc,stroke:#0277bd,stroke-width:2px
|
| 60 |
+
classDef aiStyle fill:#e1bee7,stroke:#7b1fa2,stroke-width:3px
|
| 61 |
+
classDef coreStyle fill:#fff9c4,stroke:#f57f17,stroke-width:2px
|
| 62 |
+
classDef hardwareStyle fill:#ef9a9a,stroke:#c62828,stroke-width:3px
|
| 63 |
+
classDef toolStyle fill:#fffde7,stroke:#f9a825,stroke-width:1px
|
external_content/external_profiles/starter_profile/instructions.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
You are a helpful Reachy Mini assistant running from an external profile.
|
| 2 |
+
|
| 3 |
+
When asked to demonstrate your custom greeting, use the `starter_custom_tool` tool.
|
| 4 |
+
You can also dance and show emotions like the built-in profiles.
|
| 5 |
+
|
| 6 |
+
Be friendly and concise, and explain that you're using an external profile/tool setup when asked about yourself.
|
external_content/external_profiles/starter_profile/tools.txt
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This file is an explicit allow-list.
|
| 2 |
+
# Every tool name listed below must be either:
|
| 3 |
+
# - a built-in tool from src/reachy_mini_receptionist/tools/
|
| 4 |
+
# - or an external tool file in TOOLS_DIRECTORY (e.g. external_tools/starter_custom_tool.py)
|
| 5 |
+
|
| 6 |
+
get_today_calendar
|
| 7 |
+
register_guest
|
| 8 |
+
send_email
|
| 9 |
+
starter_custom_tool
|
external_content/external_tools/starter_custom_tool.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Example external tool implementation."""
|
| 2 |
+
|
| 3 |
+
import logging
|
| 4 |
+
from typing import Any, Dict
|
| 5 |
+
|
| 6 |
+
from reachy_mini_receptionist.tools.core_tools import Tool, ToolDependencies
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
logger = logging.getLogger(__name__)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class StarterCustomTool(Tool):
|
| 13 |
+
"""Placeholder custom tool - demonstrates external tool loading."""
|
| 14 |
+
|
| 15 |
+
name = "starter_custom_tool"
|
| 16 |
+
description = "A placeholder custom tool loaded from outside the library"
|
| 17 |
+
parameters_schema = {
|
| 18 |
+
"type": "object",
|
| 19 |
+
"properties": {
|
| 20 |
+
"message": {
|
| 21 |
+
"type": "string",
|
| 22 |
+
"description": "Optional message to include in the response",
|
| 23 |
+
},
|
| 24 |
+
},
|
| 25 |
+
"required": [],
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
async def __call__(self, deps: ToolDependencies, **kwargs: Any) -> Dict[str, Any]:
|
| 29 |
+
"""Execute the placeholder tool."""
|
| 30 |
+
message = kwargs.get("message", "Hello from custom tool!")
|
| 31 |
+
logger.info(f"Tool call: starter_custom_tool message={message}")
|
| 32 |
+
|
| 33 |
+
return {"status": "success", "message": message}
|
index.html
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!doctype html>
|
| 2 |
+
<html>
|
| 3 |
+
|
| 4 |
+
<head>
|
| 5 |
+
<meta charset="utf-8" />
|
| 6 |
+
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
| 7 |
+
<title>Reachy Mini AI Receptionist</title>
|
| 8 |
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 9 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
| 10 |
+
<link href="https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@400;500;600;700&family=Manrope:wght@400;500;600&display=swap" rel="stylesheet">
|
| 11 |
+
<link rel="stylesheet" href="style.css" />
|
| 12 |
+
</head>
|
| 13 |
+
|
| 14 |
+
<body>
|
| 15 |
+
<header class="hero">
|
| 16 |
+
<div class="topline">
|
| 17 |
+
<div class="brand">
|
| 18 |
+
<span class="logo">🤖</span>
|
| 19 |
+
<span class="brand-name">Reachy Mini</span>
|
| 20 |
+
</div>
|
| 21 |
+
<div class="pill">Realtime voice · Vision aware · Expressive motion</div>
|
| 22 |
+
</div>
|
| 23 |
+
<div class="hero-grid">
|
| 24 |
+
<div class="hero-copy">
|
| 25 |
+
<p class="eyebrow">AI Receptionist</p>
|
| 26 |
+
<h1>
|
| 27 |
+
Face-aware receptionist with tool-calling automation.
|
| 28 |
+
<a class="live-demo-badge" href="#live-demo">Live demo</a>
|
| 29 |
+
</h1>
|
| 30 |
+
<p class="lede">
|
| 31 |
+
A camera-aware front-desk assistant for Reachy Mini. Greet visitors naturally, register guests, check appointments, and log handoff actions from a single dashboard.
|
| 32 |
+
</p>
|
| 33 |
+
<p class="lede">
|
| 34 |
+
Built for reception workflows: realtime face detection to know when someone is present, face recognition to personalize interactions, and structured tool calls so the AI can register guests, query appointments, and trigger handoff actions reliably.
|
| 35 |
+
</p>
|
| 36 |
+
<div class="hero-actions">
|
| 37 |
+
<a class="btn primary" href="#highlights">Explore features</a>
|
| 38 |
+
<a class="btn ghost" href="#story">See how it feels</a>
|
| 39 |
+
</div>
|
| 40 |
+
<div class="hero-badges">
|
| 41 |
+
<span>Realtime face detection</span>
|
| 42 |
+
<span>Visitor face recognition</span>
|
| 43 |
+
<span>Model tool-calling workflows</span>
|
| 44 |
+
<span>Low-latency voice + dashboard ops</span>
|
| 45 |
+
</div>
|
| 46 |
+
</div>
|
| 47 |
+
<div class="hero-visual">
|
| 48 |
+
<div class="glass-card">
|
| 49 |
+
<img src="screenshot.png" alt="Reachy Mini AI Receptionist screenshot" class="hero-gif">
|
| 50 |
+
<p class="caption">Reachy Mini can greet, identify, and assist visitors with receptionist-specific context.</p>
|
| 51 |
+
<div class="video-embed-wrapper" id="live-demo">
|
| 52 |
+
<iframe
|
| 53 |
+
src="https://www.youtube.com/embed/4U9uj5b9p2Y"
|
| 54 |
+
title="Reachy Mini AI Receptionist live demo"
|
| 55 |
+
loading="lazy"
|
| 56 |
+
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share"
|
| 57 |
+
allowfullscreen>
|
| 58 |
+
</iframe>
|
| 59 |
+
</div>
|
| 60 |
+
<p class="video-link"><a href="https://youtu.be/4U9uj5b9p2Y" target="_blank" rel="noopener">Watch on YouTube</a></p>
|
| 61 |
+
</div>
|
| 62 |
+
</div>
|
| 63 |
+
</div>
|
| 64 |
+
</header>
|
| 65 |
+
|
| 66 |
+
<section id="highlights" class="section features">
|
| 67 |
+
<div class="section-header">
|
| 68 |
+
<p class="eyebrow">What’s inside</p>
|
| 69 |
+
<h2>All-in-one receptionist layer for your robot</h2>
|
| 70 |
+
<p class="intro">
|
| 71 |
+
The app blends realtime speech, vision, and workflow tools so Reachy Mini can run a front desk flow.
|
| 72 |
+
</p>
|
| 73 |
+
</div>
|
| 74 |
+
<div class="feature-grid">
|
| 75 |
+
<div class="feature-card">
|
| 76 |
+
<span class="icon">🎤</span>
|
| 77 |
+
<h3>Natural voice chat</h3>
|
| 78 |
+
<p>Talk freely and get fast, high-quality replies powered by realtime models.</p>
|
| 79 |
+
</div>
|
| 80 |
+
<div class="feature-card">
|
| 81 |
+
<span class="icon">🎥</span>
|
| 82 |
+
<h3>Face-aware onboarding</h3>
|
| 83 |
+
<p>Recognize known visitors, register new guests, and keep the latest face context synced with the conversation.</p>
|
| 84 |
+
</div>
|
| 85 |
+
<div class="feature-card">
|
| 86 |
+
<span class="icon">💃</span>
|
| 87 |
+
<h3>Expressive motion</h3>
|
| 88 |
+
<p>Use subtle head and antenna gestures during welcome and registration interactions.</p>
|
| 89 |
+
</div>
|
| 90 |
+
<div class="feature-card">
|
| 91 |
+
<span class="icon">🧠</span>
|
| 92 |
+
<h3>Calendar-aware assistance</h3>
|
| 93 |
+
<p>Use appointment context to welcome guests on time and guide follow-up actions.</p>
|
| 94 |
+
</div>
|
| 95 |
+
<div class="feature-card">
|
| 96 |
+
<span class="icon">🌐</span>
|
| 97 |
+
<h3>Ready for your setup</h3>
|
| 98 |
+
<p>Works in console mode or web mode with dashboard APIs for quick operator visibility.</p>
|
| 99 |
+
</div>
|
| 100 |
+
</div>
|
| 101 |
+
</section>
|
| 102 |
+
|
| 103 |
+
<section id="story" class="section story">
|
| 104 |
+
<div class="story-grid">
|
| 105 |
+
<div class="story-card">
|
| 106 |
+
<p class="eyebrow">How it feels</p>
|
| 107 |
+
<h3>From greeting to handoff in seconds</h3>
|
| 108 |
+
<ul class="story-list">
|
| 109 |
+
<li><span>👋</span> Greet visitors naturally with low-latency voice conversation.</li>
|
| 110 |
+
<li><span>👀</span> Use camera context to identify known guests or register new ones.</li>
|
| 111 |
+
<li><span>📅</span> Check appointment context and respond with relevant timing cues.</li>
|
| 112 |
+
<li><span>📨</span> Log handoff actions in the outbox for host follow-up.</li>
|
| 113 |
+
</ul>
|
| 114 |
+
</div>
|
| 115 |
+
<div class="story-card secondary">
|
| 116 |
+
<p class="eyebrow">Where it shines</p>
|
| 117 |
+
<h3>Great for offices, demos, and guided reception flows</h3>
|
| 118 |
+
<p class="story-text">
|
| 119 |
+
Show how Reachy Mini can handle repeatable visitor workflows while staying expressive and conversational. It is ideal for front-desk demos, events, and product showcases.
|
| 120 |
+
</p>
|
| 121 |
+
<div class="chips">
|
| 122 |
+
<span class="chip">Guest recognition</span>
|
| 123 |
+
<span class="chip">Calendar check</span>
|
| 124 |
+
<span class="chip">Outbox logging</span>
|
| 125 |
+
<span class="chip">Dashboard APIs</span>
|
| 126 |
+
<span class="chip">Realtime conversation</span>
|
| 127 |
+
</div>
|
| 128 |
+
</div>
|
| 129 |
+
</div>
|
| 130 |
+
</section>
|
| 131 |
+
|
| 132 |
+
<footer class="footer">
|
| 133 |
+
<p>
|
| 134 |
+
Reachy Mini AI Receptionist by Toon Beerten (<a href="mailto:toon@neontreebot.be">toon@neontreebot.be</a>), based on the Reachy Mini conversation app by <a href="https://github.com/pollen-robotics" target="_blank" rel="noopener">Pollen Robotics</a>.
|
| 135 |
+
Explore more apps on <a href="https://huggingface.co/spaces/pollen-robotics/Reachy_Mini_Apps" target="_blank" rel="noopener">Hugging Face Spaces</a>.
|
| 136 |
+
</p>
|
| 137 |
+
</footer>
|
| 138 |
+
|
| 139 |
+
</body>
|
| 140 |
+
|
| 141 |
+
</html>
|
plan.md
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Reachy Mini AI Receptionist — Plan
|
| 2 |
+
|
| 3 |
+
## What This App Does
|
| 4 |
+
|
| 5 |
+
An interactive AI receptionist that:
|
| 6 |
+
|
| 7 |
+
1. Talks to visitors via OpenAI Realtime API (low-latency speech in/out).
|
| 8 |
+
2. Monitors camera frames for face detection/recognition in a background worker.
|
| 9 |
+
3. Stores guest face crops as PNG files in a persistent `guests/` directory.
|
| 10 |
+
4. Uses a hardcoded POC calendar for appointment context.
|
| 11 |
+
5. Exposes a dashboard with live video, guests, calendar, outbox, and debug logs.
|
| 12 |
+
|
| 13 |
+
Start command:
|
| 14 |
+
|
| 15 |
+
```bash
|
| 16 |
+
python -m reachy_mini_receptionist.main
|
| 17 |
+
```
|
| 18 |
+
|
| 19 |
+
---
|
| 20 |
+
|
| 21 |
+
## Architecture
|
| 22 |
+
|
| 23 |
+
The receptionist extends the conversation base stack and adds receptionist-specific modules:
|
| 24 |
+
|
| 25 |
+
```text
|
| 26 |
+
src/reachy_mini_receptionist/
|
| 27 |
+
├── face_db.py # File-based face store (PNG per guest in guests/)
|
| 28 |
+
├── face_recognition_worker.py # Background detection/recognition + event emission
|
| 29 |
+
├── calendar_data.py # Hardcoded appointment data
|
| 30 |
+
├── main.py # App entrypoint + dashboard API mounting
|
| 31 |
+
└── tools/
|
| 32 |
+
├── get_today_calendar.py
|
| 33 |
+
├── register_guest.py
|
| 34 |
+
├── send_email.py
|
| 35 |
+
└── check_current_face.py # Legacy compatibility path
|
| 36 |
+
```
|
| 37 |
+
|
| 38 |
+
---
|
| 39 |
+
|
| 40 |
+
## Key Design Decisions
|
| 41 |
+
|
| 42 |
+
### 1) Face database is file-based
|
| 43 |
+
|
| 44 |
+
- Guests are stored as grayscale PNG crops in `guests/`.
|
| 45 |
+
- `FaceDatabase` enforces capacity with FIFO-style eviction when full.
|
| 46 |
+
- No SQL database is required.
|
| 47 |
+
|
| 48 |
+
### 2) Face recognition runs in a background worker
|
| 49 |
+
|
| 50 |
+
- `FaceRecognitionWorker` runs independently from the realtime audio loop.
|
| 51 |
+
- Worker state is consumed by tools and dashboard endpoints.
|
| 52 |
+
- Stable face transitions emit context events to the model.
|
| 53 |
+
|
| 54 |
+
### 3) Calendar is static for POC
|
| 55 |
+
|
| 56 |
+
- `calendar_data.py` returns hardcoded appointments.
|
| 57 |
+
- Easy to swap later for Google/Microsoft calendar integrations.
|
| 58 |
+
|
| 59 |
+
### 4) Dashboard API is mounted in-app
|
| 60 |
+
|
| 61 |
+
- `GET /dashboard` serves the receptionist dashboard.
|
| 62 |
+
- `GET /video_feed` streams annotated MJPEG.
|
| 63 |
+
- `GET /api/guests`, `/api/calendar`, `/api/outbox`, `/api/face_status`, `/api/logs` expose app state.
|
| 64 |
+
|
| 65 |
+
### 5) Profile is intentionally locked
|
| 66 |
+
|
| 67 |
+
- `LOCKED_PROFILE` is set to `_reachy_mini_receptionist_locked_profile` in `config.py`.
|
| 68 |
+
- Current locked tool allow-list is:
|
| 69 |
+
- `get_today_calendar`
|
| 70 |
+
- `register_guest`
|
| 71 |
+
- `send_email`
|
| 72 |
+
|
| 73 |
+
---
|
| 74 |
+
|
| 75 |
+
## Face Context Event Behavior
|
| 76 |
+
|
| 77 |
+
The receptionist flow is push-based:
|
| 78 |
+
|
| 79 |
+
- Stable face transitions are emitted by `FaceRecognitionWorker`.
|
| 80 |
+
- `OpenaiRealtimeHandler` injects these as context-only user items.
|
| 81 |
+
- No automatic `response.create` is triggered by these face context updates.
|
| 82 |
+
|
| 83 |
+
---
|
| 84 |
+
|
| 85 |
+
## Known Notes
|
| 86 |
+
|
| 87 |
+
- With `--no-camera`, recognition and registration tools cannot operate.
|
| 88 |
+
- Output language behavior is controlled by profile instructions.
|
| 89 |
+
- If profile/tool loading fails, the app can fall back to default model behavior; monitor startup logs.
|
pyproject.toml
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[build-system]
|
| 2 |
+
requires = [ "setuptools",]
|
| 3 |
+
build-backend = "setuptools.build_meta"
|
| 4 |
+
|
| 5 |
+
[project]
|
| 6 |
+
name = "reachy_mini_receptionist"
|
| 7 |
+
version = "0.3.0"
|
| 8 |
+
description = "Reachy Mini AI receptionist app with realtime voice, guest recognition, and dashboard tools."
|
| 9 |
+
readme = "README.md"
|
| 10 |
+
requires-python = ">=3.10"
|
| 11 |
+
dependencies = [ "aiortc>=1.13.0", "fastrtc>=0.0.34", "gradio==5.50.1.dev1", "huggingface-hub==1.3.0", "opencv-contrib-python>=4.8.0", "python-dotenv", "openai>=2.1", "google-genai>=1.40", "reachy_mini_dances_library", "reachy_mini_toolbox", "reachy-mini>=1.5.0", "eclipse-zenoh~=1.7.0", "gradio_client>=1.13.3", "numpy>=1.24", "httpx>=0.27", "icalendar>=5.0",]
|
| 12 |
+
license = "Apache-2.0"
|
| 13 |
+
classifiers = [
|
| 14 |
+
"Programming Language :: Python :: 3",
|
| 15 |
+
"Programming Language :: Python :: 3 :: Only",
|
| 16 |
+
"Operating System :: OS Independent",
|
| 17 |
+
]
|
| 18 |
+
|
| 19 |
+
[project.urls]
|
| 20 |
+
Homepage = "https://github.com/pollen-robotics/reachy_mini"
|
| 21 |
+
Repository = "https://github.com/pollen-robotics/reachy_mini"
|
| 22 |
+
[[project.authors]]
|
| 23 |
+
name = "Pollen Robotics"
|
| 24 |
+
email = "contact@pollen-robotics.com"
|
| 25 |
+
|
| 26 |
+
[dependency-groups]
|
| 27 |
+
dev = [ "pytest", "pytest-asyncio", "ruff==0.12.0", "mypy==1.18.2", "pre-commit", "types-requests", "python-semantic-release>=10.5.3",]
|
| 28 |
+
|
| 29 |
+
[project.optional-dependencies]
|
| 30 |
+
local_vision = [ "torch>=2.1", "transformers==5.0.0rc2", "num2words",]
|
| 31 |
+
yolo_vision = [ "ultralytics", "supervision",]
|
| 32 |
+
mediapipe_vision = [ "mediapipe==0.10.14",]
|
| 33 |
+
all_vision = [ "torch>=2.1", "transformers==5.0.0rc2", "num2words", "ultralytics", "supervision", "mediapipe==0.10.14",]
|
| 34 |
+
|
| 35 |
+
[project.scripts]
|
| 36 |
+
reachy-mini-receptionist = "reachy_mini_receptionist.main:main"
|
| 37 |
+
|
| 38 |
+
[tool.setuptools]
|
| 39 |
+
include-package-data = true
|
| 40 |
+
|
| 41 |
+
[tool.ruff]
|
| 42 |
+
line-length = 119
|
| 43 |
+
exclude = [ ".venv", "dist", "build", "**/__pycache__", "*.egg-info", ".mypy_cache", ".pytest_cache",]
|
| 44 |
+
|
| 45 |
+
[tool.mypy]
|
| 46 |
+
python_version = "3.12"
|
| 47 |
+
files = [ "src/",]
|
| 48 |
+
ignore_missing_imports = true
|
| 49 |
+
strict = true
|
| 50 |
+
show_error_codes = true
|
| 51 |
+
warn_unused_ignores = true
|
| 52 |
+
|
| 53 |
+
[project.entry-points.reachy_mini_apps]
|
| 54 |
+
reachy_mini_receptionist = "reachy_mini_receptionist.main:ReachyMiniReceptionist"
|
| 55 |
+
|
| 56 |
+
[tool.setuptools.package-dir]
|
| 57 |
+
"" = "src"
|
| 58 |
+
|
| 59 |
+
[tool.setuptools.package-data]
|
| 60 |
+
reachy_mini_receptionist = [ "images/*", "static/*", ".env.example", "profiles/**/*.txt", "prompts/**/*.txt",]
|
| 61 |
+
|
| 62 |
+
[tool.ruff.lint]
|
| 63 |
+
select = [ "E", "F", "W", "I", "C4", "D",]
|
| 64 |
+
ignore = [ "E501", "D100", "D203", "D213",]
|
| 65 |
+
|
| 66 |
+
[tool.ruff.format]
|
| 67 |
+
quote-style = "double"
|
| 68 |
+
indent-style = "space"
|
| 69 |
+
skip-magic-trailing-comma = false
|
| 70 |
+
line-ending = "auto"
|
| 71 |
+
|
| 72 |
+
[tool.setuptools.packages.find]
|
| 73 |
+
where = [ "src",]
|
| 74 |
+
|
| 75 |
+
[tool.ruff.lint.isort]
|
| 76 |
+
length-sort = true
|
| 77 |
+
lines-after-imports = 2
|
| 78 |
+
no-lines-before = [ "standard-library", "local-folder",]
|
| 79 |
+
known-local-folder = [ "reachy_mini_receptionist",]
|
| 80 |
+
known-first-party = [ "reachy_mini", "reachy_mini_dances_library", "reachy_mini_toolbox",]
|
| 81 |
+
split-on-trailing-comma = true
|
screenshot.png
ADDED
|
Git LFS Details
|
scripts/gemini_live_smoke.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Minimal Gemini Live smoke test — isolates SDK behavior from app.
|
| 2 |
+
|
| 3 |
+
Reads GEMINI_API_KEY from environment, connects to the Live API with
|
| 4 |
+
the configured model, sends a single text turn, prints every event
|
| 5 |
+
received until session closes or 30s timeout.
|
| 6 |
+
|
| 7 |
+
Use:
|
| 8 |
+
GEMINI_API_KEY=... /venvs/apps_venv/bin/python scripts/gemini_live_smoke.py
|
| 9 |
+
GEMINI_API_KEY=... GEMINI_LIVE_MODEL=gemini-2.0-flash-live-001 /venvs/apps_venv/bin/python scripts/gemini_live_smoke.py
|
| 10 |
+
"""
|
| 11 |
+
from __future__ import annotations
|
| 12 |
+
|
| 13 |
+
import asyncio
|
| 14 |
+
import os
|
| 15 |
+
import sys
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
async def main() -> int:
|
| 19 |
+
key = os.environ.get("GEMINI_API_KEY", "").strip()
|
| 20 |
+
if not key:
|
| 21 |
+
print("ERROR: GEMINI_API_KEY not set", file=sys.stderr)
|
| 22 |
+
return 1
|
| 23 |
+
|
| 24 |
+
model = os.environ.get("GEMINI_LIVE_MODEL", "gemini-2.5-flash-native-audio-latest")
|
| 25 |
+
print(f"[smoke] model={model}")
|
| 26 |
+
|
| 27 |
+
try:
|
| 28 |
+
from google import genai
|
| 29 |
+
except ImportError as e:
|
| 30 |
+
print(f"ERROR: google-genai not installed: {e}", file=sys.stderr)
|
| 31 |
+
return 1
|
| 32 |
+
|
| 33 |
+
print(f"[smoke] google-genai version={getattr(genai, '__version__', '?')}")
|
| 34 |
+
|
| 35 |
+
client = genai.Client(api_key=key, http_options={"api_version": "v1beta"})
|
| 36 |
+
|
| 37 |
+
# Native-audio models REQUIRE AUDIO modality. The 1007 error
|
| 38 |
+
# "Cannot extract voices from a non-audio request" confirms this.
|
| 39 |
+
config = {
|
| 40 |
+
"response_modalities": ["AUDIO"],
|
| 41 |
+
}
|
| 42 |
+
try:
|
| 43 |
+
async with client.aio.live.connect(model=model, config=config) as session:
|
| 44 |
+
print("[smoke] connected; sending one text turn (turn_complete=True)...")
|
| 45 |
+
await session.send_client_content(
|
| 46 |
+
turns=[{"role": "user", "parts": [{"text": "Say hello in one short friendly sentence."}]}],
|
| 47 |
+
turn_complete=True,
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
event_count = 0
|
| 51 |
+
audio_bytes_total = 0
|
| 52 |
+
try:
|
| 53 |
+
async with asyncio.timeout(30):
|
| 54 |
+
async for resp in session.receive():
|
| 55 |
+
event_count += 1
|
| 56 |
+
text = getattr(resp, "text", None)
|
| 57 |
+
data = getattr(resp, "data", None)
|
| 58 |
+
if data:
|
| 59 |
+
audio_bytes_total += len(data)
|
| 60 |
+
sc = getattr(resp, "server_content", None)
|
| 61 |
+
tc = getattr(sc, "turn_complete", None) if sc else None
|
| 62 |
+
model_turn = getattr(sc, "model_turn", None) if sc else None
|
| 63 |
+
mt_parts_summary = ""
|
| 64 |
+
if model_turn is not None:
|
| 65 |
+
parts = getattr(model_turn, "parts", None) or []
|
| 66 |
+
mt_parts_summary = f" model_turn.parts={len(parts)}"
|
| 67 |
+
for i, p in enumerate(parts[:3]):
|
| 68 |
+
ip = getattr(p, "inline_data", None)
|
| 69 |
+
tp = getattr(p, "text", None)
|
| 70 |
+
th = getattr(p, "thought", None)
|
| 71 |
+
print(
|
| 72 |
+
f"[smoke] part {i}: text={tp!r}, "
|
| 73 |
+
f"inline_data={'<%d bytes>' % len(getattr(ip, 'data', b'')) if ip else None}, "
|
| 74 |
+
f"thought={th}"
|
| 75 |
+
)
|
| 76 |
+
print(
|
| 77 |
+
f"[smoke] event #{event_count}: text={text!r}, "
|
| 78 |
+
f"data={'<%d bytes>' % len(data) if data else None}, "
|
| 79 |
+
f"turn_complete={tc}{mt_parts_summary}"
|
| 80 |
+
)
|
| 81 |
+
if tc:
|
| 82 |
+
print("[smoke] turn_complete=True — exiting receive() loop")
|
| 83 |
+
break
|
| 84 |
+
except asyncio.TimeoutError:
|
| 85 |
+
print(f"[smoke] timed out after 30s, events={event_count}, audio_total={audio_bytes_total} bytes")
|
| 86 |
+
print(f"[smoke] done. total events={event_count}, total_audio_bytes={audio_bytes_total}")
|
| 87 |
+
except Exception as e:
|
| 88 |
+
import traceback
|
| 89 |
+
print(f"[smoke] CONNECTION ERROR: {e}")
|
| 90 |
+
traceback.print_exc()
|
| 91 |
+
return 1
|
| 92 |
+
return 0
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
if __name__ == "__main__":
|
| 96 |
+
raise SystemExit(asyncio.run(main()))
|
scripts/list_gemini_live_models.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""List all Gemini models on this API key that support bidiGenerateContent.
|
| 2 |
+
|
| 3 |
+
These are the models you can put in GEMINI_LIVE_MODEL. Anything not listed
|
| 4 |
+
here will 1008 at connect time.
|
| 5 |
+
|
| 6 |
+
Use:
|
| 7 |
+
GEMINI_API_KEY=... /venvs/apps_venv/bin/python scripts/list_gemini_live_models.py
|
| 8 |
+
"""
|
| 9 |
+
from __future__ import annotations
|
| 10 |
+
|
| 11 |
+
import os
|
| 12 |
+
import sys
|
| 13 |
+
|
| 14 |
+
import httpx
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def main() -> int:
|
| 18 |
+
key = os.environ.get("GEMINI_API_KEY", "").strip()
|
| 19 |
+
if not key:
|
| 20 |
+
print("ERROR: GEMINI_API_KEY not set", file=sys.stderr)
|
| 21 |
+
return 1
|
| 22 |
+
|
| 23 |
+
url = f"https://generativelanguage.googleapis.com/v1beta/models?key={key}&pageSize=200"
|
| 24 |
+
try:
|
| 25 |
+
resp = httpx.get(url, timeout=15.0)
|
| 26 |
+
resp.raise_for_status()
|
| 27 |
+
except Exception as e:
|
| 28 |
+
print(f"ERROR: {e}", file=sys.stderr)
|
| 29 |
+
return 1
|
| 30 |
+
|
| 31 |
+
data = resp.json()
|
| 32 |
+
models = data.get("models", [])
|
| 33 |
+
live_models = []
|
| 34 |
+
for m in models:
|
| 35 |
+
methods = m.get("supportedGenerationMethods") or []
|
| 36 |
+
if "bidiGenerateContent" in methods:
|
| 37 |
+
live_models.append(m.get("name", "?").replace("models/", ""))
|
| 38 |
+
|
| 39 |
+
if not live_models:
|
| 40 |
+
print("(no Live-capable models on this key)")
|
| 41 |
+
return 0
|
| 42 |
+
|
| 43 |
+
print(f"Live-capable models on this key ({len(live_models)}):\n")
|
| 44 |
+
for name in sorted(live_models):
|
| 45 |
+
print(f" {name}")
|
| 46 |
+
return 0
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
if __name__ == "__main__":
|
| 50 |
+
raise SystemExit(main())
|
src/reachy_mini_receptionist/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Nothing (for ruff)."""
|
src/reachy_mini_receptionist/audio/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Nothing (for ruff)."""
|
src/reachy_mini_receptionist/audio/head_wobbler.py
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Moves head given audio samples."""
|
| 2 |
+
|
| 3 |
+
import time
|
| 4 |
+
import queue
|
| 5 |
+
import base64
|
| 6 |
+
import logging
|
| 7 |
+
import threading
|
| 8 |
+
from typing import Tuple
|
| 9 |
+
from collections.abc import Callable
|
| 10 |
+
|
| 11 |
+
import numpy as np
|
| 12 |
+
from numpy.typing import NDArray
|
| 13 |
+
|
| 14 |
+
from reachy_mini_receptionist.audio.speech_tapper import HOP_MS, SwayRollRT
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
SAMPLE_RATE = 24000
|
| 18 |
+
MOVEMENT_LATENCY_S = 0.2 # seconds between audio and robot movement
|
| 19 |
+
logger = logging.getLogger(__name__)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class HeadWobbler:
|
| 23 |
+
"""Converts audio deltas (base64) into head movement offsets."""
|
| 24 |
+
|
| 25 |
+
def __init__(self, set_speech_offsets: Callable[[Tuple[float, float, float, float, float, float]], None]) -> None:
|
| 26 |
+
"""Initialize the head wobbler."""
|
| 27 |
+
self._apply_offsets = set_speech_offsets
|
| 28 |
+
self._base_ts: float | None = None
|
| 29 |
+
self._hops_done: int = 0
|
| 30 |
+
|
| 31 |
+
self.audio_queue: "queue.Queue[Tuple[int, int, NDArray[np.int16]]]" = queue.Queue()
|
| 32 |
+
self.sway = SwayRollRT()
|
| 33 |
+
|
| 34 |
+
# Synchronization primitives
|
| 35 |
+
self._state_lock = threading.Lock()
|
| 36 |
+
self._sway_lock = threading.Lock()
|
| 37 |
+
self._generation = 0
|
| 38 |
+
|
| 39 |
+
self._stop_event = threading.Event()
|
| 40 |
+
self._thread: threading.Thread | None = None
|
| 41 |
+
|
| 42 |
+
def feed(self, delta_b64: str) -> None:
|
| 43 |
+
"""Thread-safe: push audio into the consumer queue."""
|
| 44 |
+
buf = np.frombuffer(base64.b64decode(delta_b64), dtype=np.int16).reshape(1, -1)
|
| 45 |
+
with self._state_lock:
|
| 46 |
+
generation = self._generation
|
| 47 |
+
self.audio_queue.put((generation, SAMPLE_RATE, buf))
|
| 48 |
+
|
| 49 |
+
def start(self) -> None:
|
| 50 |
+
"""Start the head wobbler loop in a thread."""
|
| 51 |
+
self._stop_event.clear()
|
| 52 |
+
self._thread = threading.Thread(target=self.working_loop, daemon=True)
|
| 53 |
+
self._thread.start()
|
| 54 |
+
logger.debug("Head wobbler started")
|
| 55 |
+
|
| 56 |
+
def stop(self) -> None:
|
| 57 |
+
"""Stop the head wobbler loop."""
|
| 58 |
+
self._stop_event.set()
|
| 59 |
+
if self._thread is not None:
|
| 60 |
+
self._thread.join()
|
| 61 |
+
logger.debug("Head wobbler stopped")
|
| 62 |
+
|
| 63 |
+
def working_loop(self) -> None:
|
| 64 |
+
"""Convert audio deltas into head movement offsets."""
|
| 65 |
+
hop_dt = HOP_MS / 1000.0
|
| 66 |
+
|
| 67 |
+
logger.debug("Head wobbler thread started")
|
| 68 |
+
while not self._stop_event.is_set():
|
| 69 |
+
queue_ref = self.audio_queue
|
| 70 |
+
try:
|
| 71 |
+
chunk_generation, sr, chunk = queue_ref.get_nowait() # (gen, sr, data)
|
| 72 |
+
except queue.Empty:
|
| 73 |
+
# avoid while to never exit
|
| 74 |
+
time.sleep(MOVEMENT_LATENCY_S)
|
| 75 |
+
continue
|
| 76 |
+
|
| 77 |
+
try:
|
| 78 |
+
with self._state_lock:
|
| 79 |
+
current_generation = self._generation
|
| 80 |
+
if chunk_generation != current_generation:
|
| 81 |
+
continue
|
| 82 |
+
|
| 83 |
+
if self._base_ts is None:
|
| 84 |
+
with self._state_lock:
|
| 85 |
+
if self._base_ts is None:
|
| 86 |
+
self._base_ts = time.monotonic()
|
| 87 |
+
|
| 88 |
+
pcm = np.asarray(chunk).squeeze(0)
|
| 89 |
+
with self._sway_lock:
|
| 90 |
+
results = self.sway.feed(pcm, sr)
|
| 91 |
+
|
| 92 |
+
i = 0
|
| 93 |
+
while i < len(results):
|
| 94 |
+
with self._state_lock:
|
| 95 |
+
if self._generation != current_generation:
|
| 96 |
+
break
|
| 97 |
+
base_ts = self._base_ts
|
| 98 |
+
hops_done = self._hops_done
|
| 99 |
+
|
| 100 |
+
if base_ts is None:
|
| 101 |
+
base_ts = time.monotonic()
|
| 102 |
+
with self._state_lock:
|
| 103 |
+
if self._base_ts is None:
|
| 104 |
+
self._base_ts = base_ts
|
| 105 |
+
hops_done = self._hops_done
|
| 106 |
+
|
| 107 |
+
target = base_ts + MOVEMENT_LATENCY_S + hops_done * hop_dt
|
| 108 |
+
now = time.monotonic()
|
| 109 |
+
|
| 110 |
+
if now - target >= hop_dt:
|
| 111 |
+
lag_hops = int((now - target) / hop_dt)
|
| 112 |
+
drop = min(lag_hops, len(results) - i - 1)
|
| 113 |
+
if drop > 0:
|
| 114 |
+
with self._state_lock:
|
| 115 |
+
self._hops_done += drop
|
| 116 |
+
hops_done = self._hops_done
|
| 117 |
+
i += drop
|
| 118 |
+
continue
|
| 119 |
+
|
| 120 |
+
if target > now:
|
| 121 |
+
time.sleep(target - now)
|
| 122 |
+
with self._state_lock:
|
| 123 |
+
if self._generation != current_generation:
|
| 124 |
+
break
|
| 125 |
+
|
| 126 |
+
r = results[i]
|
| 127 |
+
offsets = (
|
| 128 |
+
r["x_mm"] / 1000.0,
|
| 129 |
+
r["y_mm"] / 1000.0,
|
| 130 |
+
r["z_mm"] / 1000.0,
|
| 131 |
+
r["roll_rad"],
|
| 132 |
+
r["pitch_rad"],
|
| 133 |
+
r["yaw_rad"],
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
with self._state_lock:
|
| 137 |
+
if self._generation != current_generation:
|
| 138 |
+
break
|
| 139 |
+
|
| 140 |
+
self._apply_offsets(offsets)
|
| 141 |
+
|
| 142 |
+
with self._state_lock:
|
| 143 |
+
self._hops_done += 1
|
| 144 |
+
i += 1
|
| 145 |
+
finally:
|
| 146 |
+
queue_ref.task_done()
|
| 147 |
+
logger.debug("Head wobbler thread exited")
|
| 148 |
+
|
| 149 |
+
'''
|
| 150 |
+
def drain_audio_queue(self) -> None:
|
| 151 |
+
"""Empty the audio queue."""
|
| 152 |
+
try:
|
| 153 |
+
while True:
|
| 154 |
+
self.audio_queue.get_nowait()
|
| 155 |
+
except QueueEmpty:
|
| 156 |
+
pass
|
| 157 |
+
'''
|
| 158 |
+
|
| 159 |
+
def reset(self) -> None:
|
| 160 |
+
"""Reset the internal state."""
|
| 161 |
+
with self._state_lock:
|
| 162 |
+
self._generation += 1
|
| 163 |
+
self._base_ts = None
|
| 164 |
+
self._hops_done = 0
|
| 165 |
+
|
| 166 |
+
# Drain any queued audio chunks from previous generations
|
| 167 |
+
drained_any = False
|
| 168 |
+
while True:
|
| 169 |
+
try:
|
| 170 |
+
_, _, _ = self.audio_queue.get_nowait()
|
| 171 |
+
except queue.Empty:
|
| 172 |
+
break
|
| 173 |
+
else:
|
| 174 |
+
drained_any = True
|
| 175 |
+
self.audio_queue.task_done()
|
| 176 |
+
|
| 177 |
+
with self._sway_lock:
|
| 178 |
+
self.sway.reset()
|
| 179 |
+
|
| 180 |
+
if drained_any:
|
| 181 |
+
logger.debug("Head wobbler queue drained during reset")
|
src/reachy_mini_receptionist/audio/speech_tapper.py
ADDED
|
@@ -0,0 +1,268 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import math
|
| 3 |
+
from typing import Any, Dict, List
|
| 4 |
+
from itertools import islice
|
| 5 |
+
from collections import deque
|
| 6 |
+
|
| 7 |
+
import numpy as np
|
| 8 |
+
from numpy.typing import NDArray
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
# Tunables
|
| 12 |
+
SR = 16_000
|
| 13 |
+
FRAME_MS = 20
|
| 14 |
+
HOP_MS = 50
|
| 15 |
+
|
| 16 |
+
SWAY_MASTER = 1.5
|
| 17 |
+
SENS_DB_OFFSET = +4.0
|
| 18 |
+
VAD_DB_ON = -35.0
|
| 19 |
+
VAD_DB_OFF = -45.0
|
| 20 |
+
VAD_ATTACK_MS = 40
|
| 21 |
+
VAD_RELEASE_MS = 250
|
| 22 |
+
ENV_FOLLOW_GAIN = 0.65
|
| 23 |
+
|
| 24 |
+
SWAY_F_PITCH = 2.2
|
| 25 |
+
SWAY_A_PITCH_DEG = 4.5
|
| 26 |
+
SWAY_F_YAW = 0.6
|
| 27 |
+
SWAY_A_YAW_DEG = 7.5
|
| 28 |
+
SWAY_F_ROLL = 1.3
|
| 29 |
+
SWAY_A_ROLL_DEG = 2.25
|
| 30 |
+
SWAY_F_X = 0.35
|
| 31 |
+
SWAY_A_X_MM = 4.5
|
| 32 |
+
SWAY_F_Y = 0.45
|
| 33 |
+
SWAY_A_Y_MM = 3.75
|
| 34 |
+
SWAY_F_Z = 0.25
|
| 35 |
+
SWAY_A_Z_MM = 2.25
|
| 36 |
+
|
| 37 |
+
SWAY_DB_LOW = -46.0
|
| 38 |
+
SWAY_DB_HIGH = -18.0
|
| 39 |
+
LOUDNESS_GAMMA = 0.9
|
| 40 |
+
SWAY_ATTACK_MS = 50
|
| 41 |
+
SWAY_RELEASE_MS = 250
|
| 42 |
+
|
| 43 |
+
# Derived
|
| 44 |
+
FRAME = int(SR * FRAME_MS / 1000)
|
| 45 |
+
HOP = int(SR * HOP_MS / 1000)
|
| 46 |
+
ATTACK_FR = max(1, int(VAD_ATTACK_MS / HOP_MS))
|
| 47 |
+
RELEASE_FR = max(1, int(VAD_RELEASE_MS / HOP_MS))
|
| 48 |
+
SWAY_ATTACK_FR = max(1, int(SWAY_ATTACK_MS / HOP_MS))
|
| 49 |
+
SWAY_RELEASE_FR = max(1, int(SWAY_RELEASE_MS / HOP_MS))
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def _rms_dbfs(x: NDArray[np.float32]) -> float:
|
| 53 |
+
"""Root-mean-square in dBFS for float32 mono array in [-1,1]."""
|
| 54 |
+
# numerically stable rms (avoid overflow)
|
| 55 |
+
x = x.astype(np.float32, copy=False)
|
| 56 |
+
rms = np.sqrt(np.mean(x * x, dtype=np.float32) + 1e-12, dtype=np.float32)
|
| 57 |
+
return float(20.0 * math.log10(float(rms) + 1e-12))
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def _loudness_gain(db: float, offset: float = SENS_DB_OFFSET) -> float:
|
| 61 |
+
"""Normalize dB into [0,1] with gamma; clipped to [0,1]."""
|
| 62 |
+
t = (db + offset - SWAY_DB_LOW) / (SWAY_DB_HIGH - SWAY_DB_LOW)
|
| 63 |
+
if t < 0.0:
|
| 64 |
+
t = 0.0
|
| 65 |
+
elif t > 1.0:
|
| 66 |
+
t = 1.0
|
| 67 |
+
return t**LOUDNESS_GAMMA if LOUDNESS_GAMMA != 1.0 else t
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def _to_float32_mono(x: NDArray[Any]) -> NDArray[np.float32]:
|
| 71 |
+
"""Convert arbitrary PCM array to float32 mono in [-1,1].
|
| 72 |
+
|
| 73 |
+
Accepts shapes: (N,), (1,N), (N,1), (C,N), (N,C).
|
| 74 |
+
"""
|
| 75 |
+
a = np.asarray(x)
|
| 76 |
+
if a.ndim == 0:
|
| 77 |
+
return np.zeros(0, dtype=np.float32)
|
| 78 |
+
|
| 79 |
+
# If 2D, decide which axis is channels (prefer small first dim)
|
| 80 |
+
if a.ndim == 2:
|
| 81 |
+
# e.g., (channels, samples) if channels is small (<=8)
|
| 82 |
+
if a.shape[0] <= 8 and a.shape[0] <= a.shape[1]:
|
| 83 |
+
a = np.mean(a, axis=0)
|
| 84 |
+
else:
|
| 85 |
+
a = np.mean(a, axis=1)
|
| 86 |
+
elif a.ndim > 2:
|
| 87 |
+
a = np.mean(a.reshape(a.shape[0], -1), axis=0)
|
| 88 |
+
|
| 89 |
+
# Now 1D, cast/scale
|
| 90 |
+
if np.issubdtype(a.dtype, np.floating):
|
| 91 |
+
return a.astype(np.float32, copy=False)
|
| 92 |
+
# integer PCM
|
| 93 |
+
info = np.iinfo(a.dtype)
|
| 94 |
+
scale = float(max(-info.min, info.max))
|
| 95 |
+
return a.astype(np.float32) / (scale if scale != 0.0 else 1.0)
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def _resample_linear(x: NDArray[np.float32], sr_in: int, sr_out: int) -> NDArray[np.float32]:
|
| 99 |
+
"""Lightweight linear resampler for short buffers."""
|
| 100 |
+
if sr_in == sr_out or x.size == 0:
|
| 101 |
+
return x
|
| 102 |
+
# guard tiny sizes
|
| 103 |
+
n_out = int(round(x.size * sr_out / sr_in))
|
| 104 |
+
if n_out <= 1:
|
| 105 |
+
return np.zeros(0, dtype=np.float32)
|
| 106 |
+
t_in = np.linspace(0.0, 1.0, num=x.size, dtype=np.float32, endpoint=True)
|
| 107 |
+
t_out = np.linspace(0.0, 1.0, num=n_out, dtype=np.float32, endpoint=True)
|
| 108 |
+
return np.interp(t_out, t_in, x).astype(np.float32, copy=False)
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
class SwayRollRT:
|
| 112 |
+
"""Feed audio chunks → per-hop sway outputs.
|
| 113 |
+
|
| 114 |
+
Usage:
|
| 115 |
+
rt = SwayRollRT()
|
| 116 |
+
rt.feed(pcm_int16_or_float, sr) -> List[dict]
|
| 117 |
+
"""
|
| 118 |
+
|
| 119 |
+
def __init__(self, rng_seed: int = 7):
|
| 120 |
+
"""Initialize state."""
|
| 121 |
+
self._seed = int(rng_seed)
|
| 122 |
+
self.samples: deque[float] = deque(maxlen=10 * SR) # sliding window for VAD/env
|
| 123 |
+
self.carry: NDArray[np.float32] = np.zeros(0, dtype=np.float32)
|
| 124 |
+
|
| 125 |
+
self.vad_on = False
|
| 126 |
+
self.vad_above = 0
|
| 127 |
+
self.vad_below = 0
|
| 128 |
+
|
| 129 |
+
self.sway_env = 0.0
|
| 130 |
+
self.sway_up = 0
|
| 131 |
+
self.sway_down = 0
|
| 132 |
+
|
| 133 |
+
rng = np.random.default_rng(self._seed)
|
| 134 |
+
self.phase_pitch = float(rng.random() * 2 * math.pi)
|
| 135 |
+
self.phase_yaw = float(rng.random() * 2 * math.pi)
|
| 136 |
+
self.phase_roll = float(rng.random() * 2 * math.pi)
|
| 137 |
+
self.phase_x = float(rng.random() * 2 * math.pi)
|
| 138 |
+
self.phase_y = float(rng.random() * 2 * math.pi)
|
| 139 |
+
self.phase_z = float(rng.random() * 2 * math.pi)
|
| 140 |
+
self.t = 0.0
|
| 141 |
+
|
| 142 |
+
def reset(self) -> None:
|
| 143 |
+
"""Reset state (VAD/env/buffers/time) but keep initial phases/seed."""
|
| 144 |
+
self.samples.clear()
|
| 145 |
+
self.carry = np.zeros(0, dtype=np.float32)
|
| 146 |
+
self.vad_on = False
|
| 147 |
+
self.vad_above = 0
|
| 148 |
+
self.vad_below = 0
|
| 149 |
+
self.sway_env = 0.0
|
| 150 |
+
self.sway_up = 0
|
| 151 |
+
self.sway_down = 0
|
| 152 |
+
self.t = 0.0
|
| 153 |
+
|
| 154 |
+
def feed(self, pcm: NDArray[Any], sr: int | None) -> List[Dict[str, float]]:
|
| 155 |
+
"""Stream in PCM chunk. Returns a list of sway dicts, one per hop (HOP_MS).
|
| 156 |
+
|
| 157 |
+
Args:
|
| 158 |
+
pcm: np.ndarray, shape (N,) or (C,N)/(N,C); int or float.
|
| 159 |
+
sr: sample rate of `pcm` (None -> assume SR).
|
| 160 |
+
|
| 161 |
+
"""
|
| 162 |
+
sr_in = SR if sr is None else int(sr)
|
| 163 |
+
x = _to_float32_mono(pcm)
|
| 164 |
+
if x.size == 0:
|
| 165 |
+
return []
|
| 166 |
+
if sr_in != SR:
|
| 167 |
+
x = _resample_linear(x, sr_in, SR)
|
| 168 |
+
if x.size == 0:
|
| 169 |
+
return []
|
| 170 |
+
|
| 171 |
+
# append to carry and consume fixed HOP chunks
|
| 172 |
+
if self.carry.size:
|
| 173 |
+
self.carry = np.concatenate([self.carry, x])
|
| 174 |
+
else:
|
| 175 |
+
self.carry = x
|
| 176 |
+
|
| 177 |
+
out: List[Dict[str, float]] = []
|
| 178 |
+
|
| 179 |
+
while self.carry.size >= HOP:
|
| 180 |
+
hop = self.carry[:HOP]
|
| 181 |
+
remaining: NDArray[np.float32] = self.carry[HOP:]
|
| 182 |
+
self.carry = remaining
|
| 183 |
+
|
| 184 |
+
# keep sliding window for VAD/env computation
|
| 185 |
+
# (deque accepts any iterable; list() for small HOP is fine)
|
| 186 |
+
self.samples.extend(hop.tolist())
|
| 187 |
+
if len(self.samples) < FRAME:
|
| 188 |
+
self.t += HOP_MS / 1000.0
|
| 189 |
+
continue
|
| 190 |
+
|
| 191 |
+
frame = np.fromiter(
|
| 192 |
+
islice(self.samples, len(self.samples) - FRAME, len(self.samples)),
|
| 193 |
+
dtype=np.float32,
|
| 194 |
+
count=FRAME,
|
| 195 |
+
)
|
| 196 |
+
db = _rms_dbfs(frame)
|
| 197 |
+
|
| 198 |
+
# VAD with hysteresis + attack/release
|
| 199 |
+
if db >= VAD_DB_ON:
|
| 200 |
+
self.vad_above += 1
|
| 201 |
+
self.vad_below = 0
|
| 202 |
+
if not self.vad_on and self.vad_above >= ATTACK_FR:
|
| 203 |
+
self.vad_on = True
|
| 204 |
+
elif db <= VAD_DB_OFF:
|
| 205 |
+
self.vad_below += 1
|
| 206 |
+
self.vad_above = 0
|
| 207 |
+
if self.vad_on and self.vad_below >= RELEASE_FR:
|
| 208 |
+
self.vad_on = False
|
| 209 |
+
|
| 210 |
+
if self.vad_on:
|
| 211 |
+
self.sway_up = min(SWAY_ATTACK_FR, self.sway_up + 1)
|
| 212 |
+
self.sway_down = 0
|
| 213 |
+
else:
|
| 214 |
+
self.sway_down = min(SWAY_RELEASE_FR, self.sway_down + 1)
|
| 215 |
+
self.sway_up = 0
|
| 216 |
+
|
| 217 |
+
up = self.sway_up / SWAY_ATTACK_FR
|
| 218 |
+
down = 1.0 - (self.sway_down / SWAY_RELEASE_FR)
|
| 219 |
+
target = up if self.vad_on else down
|
| 220 |
+
self.sway_env += ENV_FOLLOW_GAIN * (target - self.sway_env)
|
| 221 |
+
# clamp
|
| 222 |
+
if self.sway_env < 0.0:
|
| 223 |
+
self.sway_env = 0.0
|
| 224 |
+
elif self.sway_env > 1.0:
|
| 225 |
+
self.sway_env = 1.0
|
| 226 |
+
|
| 227 |
+
loud = _loudness_gain(db) * SWAY_MASTER
|
| 228 |
+
env = self.sway_env
|
| 229 |
+
self.t += HOP_MS / 1000.0
|
| 230 |
+
|
| 231 |
+
# oscillators
|
| 232 |
+
pitch = (
|
| 233 |
+
math.radians(SWAY_A_PITCH_DEG)
|
| 234 |
+
* loud
|
| 235 |
+
* env
|
| 236 |
+
* math.sin(2 * math.pi * SWAY_F_PITCH * self.t + self.phase_pitch)
|
| 237 |
+
)
|
| 238 |
+
yaw = (
|
| 239 |
+
math.radians(SWAY_A_YAW_DEG)
|
| 240 |
+
* loud
|
| 241 |
+
* env
|
| 242 |
+
* math.sin(2 * math.pi * SWAY_F_YAW * self.t + self.phase_yaw)
|
| 243 |
+
)
|
| 244 |
+
roll = (
|
| 245 |
+
math.radians(SWAY_A_ROLL_DEG)
|
| 246 |
+
* loud
|
| 247 |
+
* env
|
| 248 |
+
* math.sin(2 * math.pi * SWAY_F_ROLL * self.t + self.phase_roll)
|
| 249 |
+
)
|
| 250 |
+
x_mm = SWAY_A_X_MM * loud * env * math.sin(2 * math.pi * SWAY_F_X * self.t + self.phase_x)
|
| 251 |
+
y_mm = SWAY_A_Y_MM * loud * env * math.sin(2 * math.pi * SWAY_F_Y * self.t + self.phase_y)
|
| 252 |
+
z_mm = SWAY_A_Z_MM * loud * env * math.sin(2 * math.pi * SWAY_F_Z * self.t + self.phase_z)
|
| 253 |
+
|
| 254 |
+
out.append(
|
| 255 |
+
{
|
| 256 |
+
"pitch_rad": pitch,
|
| 257 |
+
"yaw_rad": yaw,
|
| 258 |
+
"roll_rad": roll,
|
| 259 |
+
"pitch_deg": math.degrees(pitch),
|
| 260 |
+
"yaw_deg": math.degrees(yaw),
|
| 261 |
+
"roll_deg": math.degrees(roll),
|
| 262 |
+
"x_mm": x_mm,
|
| 263 |
+
"y_mm": y_mm,
|
| 264 |
+
"z_mm": z_mm,
|
| 265 |
+
},
|
| 266 |
+
)
|
| 267 |
+
|
| 268 |
+
return out
|
src/reachy_mini_receptionist/calendar_data.py
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Calendar data — appointments source for the receptionist.
|
| 2 |
+
|
| 3 |
+
Single source: a Google Calendar (or any iCal feed) configured via the
|
| 4 |
+
``RECEPTION_ICS_URL`` env var. Operators add events in Google Calendar;
|
| 5 |
+
the robot fetches every ~5 min via ``ical_calendar.py``. Title
|
| 6 |
+
convention: ``"<Visitor> with <Host>"`` (host resolved through
|
| 7 |
+
``employees.py``).
|
| 8 |
+
|
| 9 |
+
When ``RECEPTION_ICS_URL`` is unset OR the feed is unreachable, this
|
| 10 |
+
module returns an EMPTY calendar. That's intentional — the receptionist
|
| 11 |
+
supports exactly two visitor paths:
|
| 12 |
+
|
| 13 |
+
1. **Scheduled visitor** — appointment exists in the iCal feed; bot
|
| 14 |
+
matches by visitor name and emails the host.
|
| 15 |
+
2. **Walk-in to see an employee** — visitor names a host that lives in
|
| 16 |
+
the SQLite Employee directory (managed from the dashboard's
|
| 17 |
+
Employees panel); bot calls ``lookup_employee`` and emails the host.
|
| 18 |
+
|
| 19 |
+
There is intentionally no hardcoded demo schedule fallback. If you want
|
| 20 |
+
demo data, add it to Google Calendar; if you want a host-only flow, add
|
| 21 |
+
the host via the dashboard.
|
| 22 |
+
|
| 23 |
+
The ``visiting`` field on each returned appointment is always an email
|
| 24 |
+
address (resolved through the employee directory). If a host can't be
|
| 25 |
+
resolved, the original string is preserved so the LLM can flag it.
|
| 26 |
+
"""
|
| 27 |
+
from __future__ import annotations
|
| 28 |
+
|
| 29 |
+
import asyncio
|
| 30 |
+
import os
|
| 31 |
+
from datetime import datetime
|
| 32 |
+
from typing import Any, Dict, List, Optional
|
| 33 |
+
|
| 34 |
+
from reachy_mini_receptionist import employees
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def _resolve_visiting(visiting: str) -> str:
|
| 38 |
+
"""Return the email for a `visiting` reference, falling back to itself.
|
| 39 |
+
|
| 40 |
+
Looks up via the employee directory first; if `visiting` already contains
|
| 41 |
+
`@` (one-off external host), returns it unchanged.
|
| 42 |
+
"""
|
| 43 |
+
if not visiting:
|
| 44 |
+
return ""
|
| 45 |
+
if "@" in visiting:
|
| 46 |
+
return visiting
|
| 47 |
+
email = employees.find_email_for(visiting)
|
| 48 |
+
return email or visiting
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def _appointments_from_ical(ics_url: str) -> List[Dict[str, Any]]:
|
| 52 |
+
"""Pull today's appointments from an iCal feed and reshape them.
|
| 53 |
+
|
| 54 |
+
Drops the iCal-specific helper fields (``_host_query``, ``_dt``) and
|
| 55 |
+
resolves the host name to an email through the employee directory.
|
| 56 |
+
"""
|
| 57 |
+
from reachy_mini_receptionist import ical_calendar
|
| 58 |
+
|
| 59 |
+
raw = ical_calendar.fetch_appointments(ics_url)
|
| 60 |
+
out: List[Dict[str, Any]] = []
|
| 61 |
+
for ev in raw:
|
| 62 |
+
host_query = ev.get("_host_query", "")
|
| 63 |
+
out.append({
|
| 64 |
+
"time": ev.get("time", ""),
|
| 65 |
+
"name": ev.get("name", ""),
|
| 66 |
+
"note": ev.get("note", ""),
|
| 67 |
+
"visiting": _resolve_visiting(host_query) if host_query else "",
|
| 68 |
+
})
|
| 69 |
+
return out
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def get_appointments() -> List[dict]:
|
| 73 |
+
"""Return today's appointment list with ``visiting`` resolved to an email.
|
| 74 |
+
|
| 75 |
+
Pulls live from the iCal feed when ``RECEPTION_ICS_URL`` is set
|
| 76 |
+
(cached ~5 min). Returns an empty list otherwise — walk-in flow
|
| 77 |
+
(via ``lookup_employee``) handles visitors who aren't on the schedule.
|
| 78 |
+
|
| 79 |
+
Each item has:
|
| 80 |
+
time (str) — e.g. "11:00 AM"
|
| 81 |
+
name (str) — guest name
|
| 82 |
+
note (str) — short description (may be empty)
|
| 83 |
+
visiting (str) — host email (resolved from employee directory)
|
| 84 |
+
"""
|
| 85 |
+
ics_url = (os.getenv("RECEPTION_ICS_URL") or "").strip()
|
| 86 |
+
if not ics_url:
|
| 87 |
+
return []
|
| 88 |
+
return _appointments_from_ical(ics_url)
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
async def get_appointments_async() -> List[dict]:
|
| 92 |
+
"""Async wrapper for ``get_appointments`` — offloads the sync iCal HTTP
|
| 93 |
+
fetch to a worker thread so async callers (realtime audio loop, tool
|
| 94 |
+
completion handlers) don't block the event loop on a 10-second HTTP
|
| 95 |
+
timeout. Cache hits return immediately; only the underlying httpx.get
|
| 96 |
+
call gets thread-offloaded.
|
| 97 |
+
"""
|
| 98 |
+
return await asyncio.to_thread(get_appointments)
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def format_for_llm() -> str:
|
| 102 |
+
"""Return a human-readable calendar string for the LLM."""
|
| 103 |
+
today = datetime.now().strftime("%A, %B %d %Y")
|
| 104 |
+
appts = get_appointments()
|
| 105 |
+
if not appts:
|
| 106 |
+
return (
|
| 107 |
+
f"Today is {today}. No scheduled appointments in the calendar. "
|
| 108 |
+
"Walk-in visitors should be routed via lookup_employee."
|
| 109 |
+
)
|
| 110 |
+
lines = [f"Today is {today}. Appointments:"]
|
| 111 |
+
for appt in appts:
|
| 112 |
+
lines.append(f" {appt['time']}: {appt['name']} — {appt['note']}")
|
| 113 |
+
return "\n".join(lines)
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def get_appointment_for_name(name: str) -> Optional[dict]:
|
| 117 |
+
"""Find an appointment by guest name (case-insensitive)."""
|
| 118 |
+
target = (name or "").strip().lower()
|
| 119 |
+
for appt in get_appointments():
|
| 120 |
+
if appt["name"].lower() == target:
|
| 121 |
+
return appt
|
| 122 |
+
return None
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
def find_appointment_for_employee(employee_query: str) -> Optional[dict]:
|
| 126 |
+
"""Find today's appointment whose host matches ``employee_query``.
|
| 127 |
+
|
| 128 |
+
Resolves the query through the employee directory first so callers can
|
| 129 |
+
pass a name OR alias OR email. Returns the first matching appointment,
|
| 130 |
+
or None if nothing on today's schedule is for that host.
|
| 131 |
+
"""
|
| 132 |
+
target_email = employees.find_email_for(employee_query) or employee_query
|
| 133 |
+
target_email = (target_email or "").strip().lower()
|
| 134 |
+
if not target_email:
|
| 135 |
+
return None
|
| 136 |
+
for appt in get_appointments():
|
| 137 |
+
if (appt.get("visiting") or "").strip().lower() == target_email:
|
| 138 |
+
return appt
|
| 139 |
+
return None
|
src/reachy_mini_receptionist/camera_worker.py
ADDED
|
@@ -0,0 +1,241 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Camera worker thread with frame buffering and face tracking.
|
| 2 |
+
|
| 3 |
+
Ported from main_works.py camera_worker() function to provide:
|
| 4 |
+
- 30Hz+ camera polling with thread-safe frame buffering
|
| 5 |
+
- Face tracking integration with smooth interpolation
|
| 6 |
+
- Latest frame always available for tools
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import time
|
| 10 |
+
import logging
|
| 11 |
+
import threading
|
| 12 |
+
from typing import Any, List, Tuple
|
| 13 |
+
|
| 14 |
+
import numpy as np
|
| 15 |
+
from numpy.typing import NDArray
|
| 16 |
+
from scipy.spatial.transform import Rotation as R
|
| 17 |
+
|
| 18 |
+
from reachy_mini import ReachyMini
|
| 19 |
+
from reachy_mini.utils.interpolation import linear_pose_interpolation
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
logger = logging.getLogger(__name__)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class CameraWorker:
|
| 26 |
+
"""Thread-safe camera worker with frame buffering and face tracking."""
|
| 27 |
+
|
| 28 |
+
def __init__(self, reachy_mini: ReachyMini, head_tracker: Any = None) -> None:
|
| 29 |
+
"""Initialize."""
|
| 30 |
+
self.reachy_mini = reachy_mini
|
| 31 |
+
self.head_tracker = head_tracker
|
| 32 |
+
|
| 33 |
+
# Thread-safe frame storage
|
| 34 |
+
self.latest_frame: NDArray[np.uint8] | None = None
|
| 35 |
+
self.frame_lock = threading.Lock()
|
| 36 |
+
self._stop_event = threading.Event()
|
| 37 |
+
self._thread: threading.Thread | None = None
|
| 38 |
+
|
| 39 |
+
# Face tracking state
|
| 40 |
+
self.is_head_tracking_enabled = True
|
| 41 |
+
self.face_tracking_offsets: List[float] = [
|
| 42 |
+
0.0,
|
| 43 |
+
0.0,
|
| 44 |
+
0.0,
|
| 45 |
+
0.0,
|
| 46 |
+
0.0,
|
| 47 |
+
0.0,
|
| 48 |
+
] # x, y, z, roll, pitch, yaw
|
| 49 |
+
self.face_tracking_lock = threading.Lock()
|
| 50 |
+
|
| 51 |
+
# Face tracking timing variables (same as main_works.py)
|
| 52 |
+
self.last_face_detected_time: float | None = None
|
| 53 |
+
self.interpolation_start_time: float | None = None
|
| 54 |
+
self.interpolation_start_pose: NDArray[np.float32] | None = None
|
| 55 |
+
self.face_lost_delay = 2.0 # seconds to wait before starting interpolation
|
| 56 |
+
self.interpolation_duration = 1.0 # seconds to interpolate back to neutral
|
| 57 |
+
|
| 58 |
+
# Track state changes
|
| 59 |
+
self.previous_head_tracking_state = self.is_head_tracking_enabled
|
| 60 |
+
|
| 61 |
+
def get_latest_frame(self) -> NDArray[np.uint8] | None:
|
| 62 |
+
"""Get the latest frame (thread-safe)."""
|
| 63 |
+
with self.frame_lock:
|
| 64 |
+
if self.latest_frame is None:
|
| 65 |
+
return None
|
| 66 |
+
# Return a copy in original BGR format (OpenCV native)
|
| 67 |
+
return self.latest_frame.copy()
|
| 68 |
+
|
| 69 |
+
def get_face_tracking_offsets(
|
| 70 |
+
self,
|
| 71 |
+
) -> Tuple[float, float, float, float, float, float]:
|
| 72 |
+
"""Get current face tracking offsets (thread-safe)."""
|
| 73 |
+
with self.face_tracking_lock:
|
| 74 |
+
offsets = self.face_tracking_offsets
|
| 75 |
+
return (offsets[0], offsets[1], offsets[2], offsets[3], offsets[4], offsets[5])
|
| 76 |
+
|
| 77 |
+
def set_head_tracking_enabled(self, enabled: bool) -> None:
|
| 78 |
+
"""Enable/disable head tracking."""
|
| 79 |
+
self.is_head_tracking_enabled = enabled
|
| 80 |
+
logger.info(f"Head tracking {'enabled' if enabled else 'disabled'}")
|
| 81 |
+
|
| 82 |
+
def start(self) -> None:
|
| 83 |
+
"""Start the camera worker loop in a thread."""
|
| 84 |
+
self._stop_event.clear()
|
| 85 |
+
self._thread = threading.Thread(target=self.working_loop, daemon=True)
|
| 86 |
+
self._thread.start()
|
| 87 |
+
logger.debug("Camera worker started")
|
| 88 |
+
|
| 89 |
+
def stop(self) -> None:
|
| 90 |
+
"""Stop the camera worker loop."""
|
| 91 |
+
self._stop_event.set()
|
| 92 |
+
if self._thread is not None:
|
| 93 |
+
self._thread.join()
|
| 94 |
+
|
| 95 |
+
logger.debug("Camera worker stopped")
|
| 96 |
+
|
| 97 |
+
def working_loop(self) -> None:
|
| 98 |
+
"""Enable the camera worker loop.
|
| 99 |
+
|
| 100 |
+
Ported from main_works.py camera_worker() with same logic.
|
| 101 |
+
"""
|
| 102 |
+
logger.debug("Starting camera working loop")
|
| 103 |
+
|
| 104 |
+
# Initialize head tracker if available
|
| 105 |
+
neutral_pose = np.eye(4) # Neutral pose (identity matrix)
|
| 106 |
+
self.previous_head_tracking_state = self.is_head_tracking_enabled
|
| 107 |
+
|
| 108 |
+
while not self._stop_event.is_set():
|
| 109 |
+
try:
|
| 110 |
+
current_time = time.time()
|
| 111 |
+
|
| 112 |
+
# Get frame from robot
|
| 113 |
+
frame = self.reachy_mini.media.get_frame()
|
| 114 |
+
|
| 115 |
+
if frame is not None:
|
| 116 |
+
# Thread-safe frame storage
|
| 117 |
+
with self.frame_lock:
|
| 118 |
+
self.latest_frame = frame # .copy()
|
| 119 |
+
|
| 120 |
+
# Check if face tracking was just disabled
|
| 121 |
+
if self.previous_head_tracking_state and not self.is_head_tracking_enabled:
|
| 122 |
+
# Face tracking was just disabled - start interpolation to neutral
|
| 123 |
+
self.last_face_detected_time = current_time # Trigger the face-lost logic
|
| 124 |
+
self.interpolation_start_time = None # Will be set by the face-lost interpolation
|
| 125 |
+
self.interpolation_start_pose = None
|
| 126 |
+
|
| 127 |
+
# Update tracking state
|
| 128 |
+
self.previous_head_tracking_state = self.is_head_tracking_enabled
|
| 129 |
+
|
| 130 |
+
# Handle face tracking if enabled and head tracker available
|
| 131 |
+
if self.is_head_tracking_enabled and self.head_tracker is not None:
|
| 132 |
+
eye_center, _ = self.head_tracker.get_head_position(frame)
|
| 133 |
+
|
| 134 |
+
if eye_center is not None:
|
| 135 |
+
# Face detected - immediately switch to tracking
|
| 136 |
+
self.last_face_detected_time = current_time
|
| 137 |
+
self.interpolation_start_time = None # Stop any interpolation
|
| 138 |
+
|
| 139 |
+
# Convert normalized coordinates to pixel coordinates
|
| 140 |
+
h, w, _ = frame.shape
|
| 141 |
+
eye_center_norm = (eye_center + 1) / 2
|
| 142 |
+
eye_center_pixels = [
|
| 143 |
+
eye_center_norm[0] * w,
|
| 144 |
+
eye_center_norm[1] * h,
|
| 145 |
+
]
|
| 146 |
+
|
| 147 |
+
# Get the head pose needed to look at the target, but don't perform movement
|
| 148 |
+
target_pose = self.reachy_mini.look_at_image(
|
| 149 |
+
eye_center_pixels[0],
|
| 150 |
+
eye_center_pixels[1],
|
| 151 |
+
duration=0.0,
|
| 152 |
+
perform_movement=False,
|
| 153 |
+
)
|
| 154 |
+
|
| 155 |
+
# Extract translation and rotation from the target pose directly
|
| 156 |
+
translation = target_pose[:3, 3]
|
| 157 |
+
rotation = R.from_matrix(target_pose[:3, :3]).as_euler("xyz", degrees=False)
|
| 158 |
+
|
| 159 |
+
# Scale down translation and rotation because smaller FOV
|
| 160 |
+
translation *= 0.6
|
| 161 |
+
rotation *= 0.6
|
| 162 |
+
|
| 163 |
+
# Thread-safe update of face tracking offsets (use pose as-is)
|
| 164 |
+
with self.face_tracking_lock:
|
| 165 |
+
self.face_tracking_offsets = [
|
| 166 |
+
translation[0],
|
| 167 |
+
translation[1],
|
| 168 |
+
translation[2], # x, y, z
|
| 169 |
+
rotation[0],
|
| 170 |
+
rotation[1],
|
| 171 |
+
rotation[2], # roll, pitch, yaw
|
| 172 |
+
]
|
| 173 |
+
|
| 174 |
+
# No face detected while tracking enabled - set face lost timestamp
|
| 175 |
+
elif self.last_face_detected_time is None or self.last_face_detected_time == current_time:
|
| 176 |
+
# Only update if we haven't already set a face lost time
|
| 177 |
+
# (current_time check prevents overriding the disable-triggered timestamp)
|
| 178 |
+
pass
|
| 179 |
+
|
| 180 |
+
# Handle smooth interpolation (works for both face-lost and tracking-disabled cases)
|
| 181 |
+
if self.last_face_detected_time is not None:
|
| 182 |
+
time_since_face_lost = current_time - self.last_face_detected_time
|
| 183 |
+
|
| 184 |
+
if time_since_face_lost >= self.face_lost_delay:
|
| 185 |
+
# Start interpolation if not already started
|
| 186 |
+
if self.interpolation_start_time is None:
|
| 187 |
+
self.interpolation_start_time = current_time
|
| 188 |
+
# Capture current pose as start of interpolation
|
| 189 |
+
with self.face_tracking_lock:
|
| 190 |
+
current_translation = self.face_tracking_offsets[:3]
|
| 191 |
+
current_rotation_euler = self.face_tracking_offsets[3:]
|
| 192 |
+
# Convert to 4x4 pose matrix
|
| 193 |
+
pose_matrix = np.eye(4, dtype=np.float32)
|
| 194 |
+
pose_matrix[:3, 3] = current_translation
|
| 195 |
+
pose_matrix[:3, :3] = R.from_euler(
|
| 196 |
+
"xyz",
|
| 197 |
+
current_rotation_euler,
|
| 198 |
+
).as_matrix()
|
| 199 |
+
self.interpolation_start_pose = pose_matrix
|
| 200 |
+
|
| 201 |
+
# Calculate interpolation progress (t from 0 to 1)
|
| 202 |
+
elapsed_interpolation = current_time - self.interpolation_start_time
|
| 203 |
+
t = min(1.0, elapsed_interpolation / self.interpolation_duration)
|
| 204 |
+
|
| 205 |
+
# Interpolate between current pose and neutral pose
|
| 206 |
+
interpolated_pose = linear_pose_interpolation(
|
| 207 |
+
self.interpolation_start_pose,
|
| 208 |
+
neutral_pose,
|
| 209 |
+
t,
|
| 210 |
+
)
|
| 211 |
+
|
| 212 |
+
# Extract translation and rotation from interpolated pose
|
| 213 |
+
translation = interpolated_pose[:3, 3]
|
| 214 |
+
rotation = R.from_matrix(interpolated_pose[:3, :3]).as_euler("xyz", degrees=False)
|
| 215 |
+
|
| 216 |
+
# Thread-safe update of face tracking offsets
|
| 217 |
+
with self.face_tracking_lock:
|
| 218 |
+
self.face_tracking_offsets = [
|
| 219 |
+
translation[0],
|
| 220 |
+
translation[1],
|
| 221 |
+
translation[2], # x, y, z
|
| 222 |
+
rotation[0],
|
| 223 |
+
rotation[1],
|
| 224 |
+
rotation[2], # roll, pitch, yaw
|
| 225 |
+
]
|
| 226 |
+
|
| 227 |
+
# If interpolation is complete, reset timing
|
| 228 |
+
if t >= 1.0:
|
| 229 |
+
self.last_face_detected_time = None
|
| 230 |
+
self.interpolation_start_time = None
|
| 231 |
+
self.interpolation_start_pose = None
|
| 232 |
+
# else: Keep current offsets (within 2s delay period)
|
| 233 |
+
|
| 234 |
+
# Small sleep to prevent excessive CPU usage (same as main_works.py)
|
| 235 |
+
time.sleep(0.04)
|
| 236 |
+
|
| 237 |
+
except Exception as e:
|
| 238 |
+
logger.error(f"Camera worker error: {e}")
|
| 239 |
+
time.sleep(0.1) # Longer sleep on error
|
| 240 |
+
|
| 241 |
+
logger.debug("Camera worker thread exited")
|
src/reachy_mini_receptionist/config.py
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
import logging
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
from dotenv import find_dotenv, load_dotenv
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
# Locked profile: set to a profile name (e.g., "astronomer") to lock the app
|
| 10 |
+
# to that profile and disable all profile switching. Leave as None for normal behavior.
|
| 11 |
+
LOCKED_PROFILE: str | None = "_reachy_mini_receptionist_locked_profile"
|
| 12 |
+
DEFAULT_PROFILES_DIRECTORY = Path(__file__).parent / "profiles"
|
| 13 |
+
|
| 14 |
+
logger = logging.getLogger(__name__)
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def _env_flag(name: str, default: bool = False) -> bool:
|
| 18 |
+
"""Parse a boolean environment flag.
|
| 19 |
+
|
| 20 |
+
Accepted truthy values: 1, true, yes, on
|
| 21 |
+
Accepted falsy values: 0, false, no, off
|
| 22 |
+
"""
|
| 23 |
+
raw = os.getenv(name)
|
| 24 |
+
if raw is None:
|
| 25 |
+
return default
|
| 26 |
+
|
| 27 |
+
value = raw.strip().lower()
|
| 28 |
+
if value in {"1", "true", "yes", "on"}:
|
| 29 |
+
return True
|
| 30 |
+
if value in {"0", "false", "no", "off"}:
|
| 31 |
+
return False
|
| 32 |
+
|
| 33 |
+
logger.warning("Invalid boolean value for %s=%r, using default=%s", name, raw, default)
|
| 34 |
+
return default
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def _collect_profile_names(profiles_root: Path) -> set[str]:
|
| 38 |
+
"""Return profile folder names from a profiles root directory."""
|
| 39 |
+
if not profiles_root.exists() or not profiles_root.is_dir():
|
| 40 |
+
return set()
|
| 41 |
+
return {p.name for p in profiles_root.iterdir() if p.is_dir()}
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def _collect_tool_module_names(tools_root: Path) -> set[str]:
|
| 45 |
+
"""Return tool module names from a tools directory."""
|
| 46 |
+
if not tools_root.exists() or not tools_root.is_dir():
|
| 47 |
+
return set()
|
| 48 |
+
ignored = {"__init__", "core_tools"}
|
| 49 |
+
return {
|
| 50 |
+
p.stem
|
| 51 |
+
for p in tools_root.glob("*.py")
|
| 52 |
+
if p.is_file() and p.stem not in ignored
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def _raise_on_name_collisions(
|
| 57 |
+
*,
|
| 58 |
+
label: str,
|
| 59 |
+
external_root: Path,
|
| 60 |
+
internal_root: Path,
|
| 61 |
+
external_names: set[str],
|
| 62 |
+
internal_names: set[str],
|
| 63 |
+
) -> None:
|
| 64 |
+
"""Raise with a clear message when external/internal names collide."""
|
| 65 |
+
collisions = sorted(external_names & internal_names)
|
| 66 |
+
if not collisions:
|
| 67 |
+
return
|
| 68 |
+
|
| 69 |
+
raise RuntimeError(
|
| 70 |
+
f"Config.__init__(): Ambiguous {label} names found in both external and built-in libraries: {collisions}. "
|
| 71 |
+
f"External {label} root: {external_root}. Built-in {label} root: {internal_root}. "
|
| 72 |
+
f"Please rename the conflicting external {label}(s) to continue."
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
# Validate LOCKED_PROFILE at startup
|
| 77 |
+
if LOCKED_PROFILE is not None:
|
| 78 |
+
_profiles_dir = DEFAULT_PROFILES_DIRECTORY
|
| 79 |
+
_profile_path = _profiles_dir / LOCKED_PROFILE
|
| 80 |
+
_instructions_file = _profile_path / "instructions.txt"
|
| 81 |
+
if not _profile_path.is_dir():
|
| 82 |
+
print(f"Error: LOCKED_PROFILE '{LOCKED_PROFILE}' does not exist in {_profiles_dir}", file=sys.stderr)
|
| 83 |
+
sys.exit(1)
|
| 84 |
+
if not _instructions_file.is_file():
|
| 85 |
+
print(f"Error: LOCKED_PROFILE '{LOCKED_PROFILE}' has no instructions.txt", file=sys.stderr)
|
| 86 |
+
sys.exit(1)
|
| 87 |
+
|
| 88 |
+
_skip_dotenv = _env_flag("REACHY_MINI_SKIP_DOTENV", default=False)
|
| 89 |
+
|
| 90 |
+
if _skip_dotenv:
|
| 91 |
+
logger.info("Skipping .env loading because REACHY_MINI_SKIP_DOTENV is set")
|
| 92 |
+
else:
|
| 93 |
+
# Locate .env file (search upward from current working directory)
|
| 94 |
+
dotenv_path = find_dotenv(usecwd=True)
|
| 95 |
+
|
| 96 |
+
if dotenv_path:
|
| 97 |
+
# Load .env and override environment variables
|
| 98 |
+
load_dotenv(dotenv_path=dotenv_path, override=True)
|
| 99 |
+
logger.info(f"Configuration loaded from {dotenv_path}")
|
| 100 |
+
else:
|
| 101 |
+
logger.warning("No .env file found, using environment variables")
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
class Config:
|
| 105 |
+
"""Configuration class for the receptionist app."""
|
| 106 |
+
|
| 107 |
+
# Required
|
| 108 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") # The key is downloaded in console.py if needed
|
| 109 |
+
|
| 110 |
+
# Optional
|
| 111 |
+
MODEL_NAME = os.getenv("MODEL_NAME", "gpt-realtime")
|
| 112 |
+
HF_HOME = os.getenv("HF_HOME", "./cache")
|
| 113 |
+
LOCAL_VISION_MODEL = os.getenv("LOCAL_VISION_MODEL", "HuggingFaceTB/SmolVLM2-2.2B-Instruct")
|
| 114 |
+
HF_TOKEN = os.getenv("HF_TOKEN") # Optional, falls back to hf auth login if not set
|
| 115 |
+
|
| 116 |
+
logger.debug(f"Model: {MODEL_NAME}, HF_HOME: {HF_HOME}, Vision Model: {LOCAL_VISION_MODEL}")
|
| 117 |
+
|
| 118 |
+
_profiles_directory_env = os.getenv("REACHY_MINI_EXTERNAL_PROFILES_DIRECTORY")
|
| 119 |
+
PROFILES_DIRECTORY = (
|
| 120 |
+
Path(_profiles_directory_env) if _profiles_directory_env else Path(__file__).parent / "profiles"
|
| 121 |
+
)
|
| 122 |
+
_tools_directory_env = os.getenv("REACHY_MINI_EXTERNAL_TOOLS_DIRECTORY")
|
| 123 |
+
TOOLS_DIRECTORY = Path(_tools_directory_env) if _tools_directory_env else None
|
| 124 |
+
AUTOLOAD_EXTERNAL_TOOLS = _env_flag("AUTOLOAD_EXTERNAL_TOOLS", default=False)
|
| 125 |
+
REACHY_MINI_CUSTOM_PROFILE = LOCKED_PROFILE or os.getenv("REACHY_MINI_CUSTOM_PROFILE")
|
| 126 |
+
|
| 127 |
+
logger.debug(f"Custom Profile: {REACHY_MINI_CUSTOM_PROFILE}")
|
| 128 |
+
|
| 129 |
+
def __init__(self) -> None:
|
| 130 |
+
"""Initialize the configuration."""
|
| 131 |
+
if self.REACHY_MINI_CUSTOM_PROFILE and self.PROFILES_DIRECTORY != DEFAULT_PROFILES_DIRECTORY:
|
| 132 |
+
selected_profile_path = self.PROFILES_DIRECTORY / self.REACHY_MINI_CUSTOM_PROFILE
|
| 133 |
+
if not selected_profile_path.is_dir():
|
| 134 |
+
available_profiles = sorted(_collect_profile_names(self.PROFILES_DIRECTORY))
|
| 135 |
+
raise RuntimeError(
|
| 136 |
+
"Config.__init__(): Selected profile "
|
| 137 |
+
f"'{self.REACHY_MINI_CUSTOM_PROFILE}' was not found in external profiles root "
|
| 138 |
+
f"{self.PROFILES_DIRECTORY}. "
|
| 139 |
+
f"Available external profiles: {available_profiles}. "
|
| 140 |
+
"Either set 'REACHY_MINI_CUSTOM_PROFILE' to one of the available external profiles "
|
| 141 |
+
"or unset 'REACHY_MINI_EXTERNAL_PROFILES_DIRECTORY' to use built-in profiles."
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
if self.PROFILES_DIRECTORY != DEFAULT_PROFILES_DIRECTORY:
|
| 145 |
+
external_profiles = _collect_profile_names(self.PROFILES_DIRECTORY)
|
| 146 |
+
internal_profiles = _collect_profile_names(DEFAULT_PROFILES_DIRECTORY)
|
| 147 |
+
_raise_on_name_collisions(
|
| 148 |
+
label="profile",
|
| 149 |
+
external_root=self.PROFILES_DIRECTORY,
|
| 150 |
+
internal_root=DEFAULT_PROFILES_DIRECTORY,
|
| 151 |
+
external_names=external_profiles,
|
| 152 |
+
internal_names=internal_profiles,
|
| 153 |
+
)
|
| 154 |
+
|
| 155 |
+
if self.TOOLS_DIRECTORY is not None:
|
| 156 |
+
builtin_tools_root = Path(__file__).parent / "tools"
|
| 157 |
+
external_tools = _collect_tool_module_names(self.TOOLS_DIRECTORY)
|
| 158 |
+
internal_tools = _collect_tool_module_names(builtin_tools_root)
|
| 159 |
+
_raise_on_name_collisions(
|
| 160 |
+
label="tool",
|
| 161 |
+
external_root=self.TOOLS_DIRECTORY,
|
| 162 |
+
internal_root=builtin_tools_root,
|
| 163 |
+
external_names=external_tools,
|
| 164 |
+
internal_names=internal_tools,
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
if self.PROFILES_DIRECTORY != DEFAULT_PROFILES_DIRECTORY:
|
| 168 |
+
logger.warning(
|
| 169 |
+
"Environment variable 'REACHY_MINI_EXTERNAL_PROFILES_DIRECTORY' is set. "
|
| 170 |
+
"Profiles (instructions.txt, ...) will be loaded from %s.",
|
| 171 |
+
self.PROFILES_DIRECTORY,
|
| 172 |
+
)
|
| 173 |
+
else:
|
| 174 |
+
logger.info(
|
| 175 |
+
"'REACHY_MINI_EXTERNAL_PROFILES_DIRECTORY' is not set. "
|
| 176 |
+
"Using built-in profiles from %s.",
|
| 177 |
+
DEFAULT_PROFILES_DIRECTORY,
|
| 178 |
+
)
|
| 179 |
+
|
| 180 |
+
if self.TOOLS_DIRECTORY is not None:
|
| 181 |
+
logger.warning(
|
| 182 |
+
"Environment variable 'REACHY_MINI_EXTERNAL_TOOLS_DIRECTORY' is set. "
|
| 183 |
+
"External tools will be loaded from %s.",
|
| 184 |
+
self.TOOLS_DIRECTORY,
|
| 185 |
+
)
|
| 186 |
+
else:
|
| 187 |
+
logger.info(
|
| 188 |
+
"'REACHY_MINI_EXTERNAL_TOOLS_DIRECTORY' is not set. "
|
| 189 |
+
"Using built-in shared tools only."
|
| 190 |
+
)
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
config = Config()
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
def set_custom_profile(profile: str | None) -> None:
|
| 197 |
+
"""Update the selected custom profile at runtime and expose it via env.
|
| 198 |
+
|
| 199 |
+
This ensures modules that read `config` and code that inspects the
|
| 200 |
+
environment see a consistent value.
|
| 201 |
+
"""
|
| 202 |
+
if LOCKED_PROFILE is not None:
|
| 203 |
+
return
|
| 204 |
+
try:
|
| 205 |
+
config.REACHY_MINI_CUSTOM_PROFILE = profile
|
| 206 |
+
except Exception:
|
| 207 |
+
pass
|
| 208 |
+
try:
|
| 209 |
+
import os as _os
|
| 210 |
+
|
| 211 |
+
if profile:
|
| 212 |
+
_os.environ["REACHY_MINI_CUSTOM_PROFILE"] = profile
|
| 213 |
+
else:
|
| 214 |
+
# Remove to reflect default
|
| 215 |
+
_os.environ.pop("REACHY_MINI_CUSTOM_PROFILE", None)
|
| 216 |
+
except Exception:
|
| 217 |
+
pass
|
src/reachy_mini_receptionist/console.py
ADDED
|
@@ -0,0 +1,527 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Bidirectional local audio stream with optional settings UI.
|
| 2 |
+
|
| 3 |
+
In headless mode, there is no Gradio UI. If the OpenAI API key is not
|
| 4 |
+
available via environment/.env, we expose a minimal settings page via the
|
| 5 |
+
Reachy Mini Apps settings server to let non-technical users enter it.
|
| 6 |
+
|
| 7 |
+
The settings UI is served from this package's ``static/`` folder and offers a
|
| 8 |
+
single password field to set ``OPENAI_API_KEY``. Once set, we persist it to the
|
| 9 |
+
app instance's ``.env`` file (if available) and proceed to start streaming.
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import os
|
| 13 |
+
import sys
|
| 14 |
+
import time
|
| 15 |
+
import asyncio
|
| 16 |
+
import logging
|
| 17 |
+
from typing import List, Optional
|
| 18 |
+
from pathlib import Path
|
| 19 |
+
|
| 20 |
+
from fastrtc import AdditionalOutputs, audio_to_float32
|
| 21 |
+
from scipy.signal import resample
|
| 22 |
+
|
| 23 |
+
from reachy_mini import ReachyMini
|
| 24 |
+
from reachy_mini.media.media_manager import MediaBackend
|
| 25 |
+
from reachy_mini_receptionist.config import LOCKED_PROFILE, config
|
| 26 |
+
from reachy_mini_receptionist.openai_realtime import OpenaiRealtimeHandler
|
| 27 |
+
from reachy_mini_receptionist.headless_personality_ui import mount_personality_routes
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
try:
|
| 31 |
+
# FastAPI is provided by the Reachy Mini Apps runtime
|
| 32 |
+
from fastapi import FastAPI, Response
|
| 33 |
+
from pydantic import BaseModel
|
| 34 |
+
from fastapi.responses import FileResponse, JSONResponse, RedirectResponse
|
| 35 |
+
from starlette.staticfiles import StaticFiles
|
| 36 |
+
except Exception: # pragma: no cover - only loaded when settings_app is used
|
| 37 |
+
FastAPI = object # type: ignore
|
| 38 |
+
FileResponse = object # type: ignore
|
| 39 |
+
JSONResponse = object # type: ignore
|
| 40 |
+
StaticFiles = object # type: ignore
|
| 41 |
+
BaseModel = object # type: ignore
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
logger = logging.getLogger(__name__)
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
class LocalStream:
|
| 48 |
+
"""LocalStream using Reachy Mini's recorder/player."""
|
| 49 |
+
|
| 50 |
+
def __init__(
|
| 51 |
+
self,
|
| 52 |
+
handler: OpenaiRealtimeHandler,
|
| 53 |
+
robot: ReachyMini,
|
| 54 |
+
*,
|
| 55 |
+
settings_app: Optional[FastAPI] = None,
|
| 56 |
+
instance_path: Optional[str] = None,
|
| 57 |
+
):
|
| 58 |
+
"""Initialize the stream with an OpenAI realtime handler and pipelines.
|
| 59 |
+
|
| 60 |
+
- ``settings_app``: the Reachy Mini Apps FastAPI to attach settings endpoints.
|
| 61 |
+
- ``instance_path``: directory where per-instance ``.env`` should be stored.
|
| 62 |
+
"""
|
| 63 |
+
self.handler = handler
|
| 64 |
+
self._robot = robot
|
| 65 |
+
self._stop_event = asyncio.Event()
|
| 66 |
+
self._tasks: List[asyncio.Task[None]] = []
|
| 67 |
+
# Allow the handler to flush the player queue when appropriate.
|
| 68 |
+
self.handler._clear_queue = self.clear_audio_queue
|
| 69 |
+
self._settings_app: Optional[FastAPI] = settings_app
|
| 70 |
+
self._instance_path: Optional[str] = instance_path
|
| 71 |
+
self._settings_initialized = False
|
| 72 |
+
self._asyncio_loop = None
|
| 73 |
+
|
| 74 |
+
# ---- Settings UI (only when API key is missing) ----
|
| 75 |
+
def _read_env_lines(self, env_path: Path) -> list[str]:
|
| 76 |
+
"""Load env file contents or a template as a list of lines."""
|
| 77 |
+
inst = env_path.parent
|
| 78 |
+
try:
|
| 79 |
+
if env_path.exists():
|
| 80 |
+
try:
|
| 81 |
+
return env_path.read_text(encoding="utf-8").splitlines()
|
| 82 |
+
except Exception:
|
| 83 |
+
return []
|
| 84 |
+
template_text = None
|
| 85 |
+
ex = inst / ".env.example"
|
| 86 |
+
if ex.exists():
|
| 87 |
+
try:
|
| 88 |
+
template_text = ex.read_text(encoding="utf-8")
|
| 89 |
+
except Exception:
|
| 90 |
+
template_text = None
|
| 91 |
+
if template_text is None:
|
| 92 |
+
try:
|
| 93 |
+
cwd_example = Path.cwd() / ".env.example"
|
| 94 |
+
if cwd_example.exists():
|
| 95 |
+
template_text = cwd_example.read_text(encoding="utf-8")
|
| 96 |
+
except Exception:
|
| 97 |
+
template_text = None
|
| 98 |
+
if template_text is None:
|
| 99 |
+
packaged = Path(__file__).parent / ".env.example"
|
| 100 |
+
if packaged.exists():
|
| 101 |
+
try:
|
| 102 |
+
template_text = packaged.read_text(encoding="utf-8")
|
| 103 |
+
except Exception:
|
| 104 |
+
template_text = None
|
| 105 |
+
return template_text.splitlines() if template_text else []
|
| 106 |
+
except Exception:
|
| 107 |
+
return []
|
| 108 |
+
|
| 109 |
+
def _persist_api_key(self, key: str) -> None:
|
| 110 |
+
"""Persist API key to environment and instance ``.env`` if possible.
|
| 111 |
+
|
| 112 |
+
Behavior:
|
| 113 |
+
- Always sets ``OPENAI_API_KEY`` in process env and in-memory config.
|
| 114 |
+
- Writes/updates ``<instance_path>/.env``:
|
| 115 |
+
* If ``.env`` exists, replaces/append OPENAI_API_KEY line.
|
| 116 |
+
* Else, copies template from ``<instance_path>/.env.example`` when present,
|
| 117 |
+
otherwise falls back to the packaged template
|
| 118 |
+
``reachy_mini_receptionist/.env.example``.
|
| 119 |
+
* Ensures the resulting file contains the full template plus the key.
|
| 120 |
+
- Loads the written ``.env`` into the current process environment.
|
| 121 |
+
"""
|
| 122 |
+
k = (key or "").strip()
|
| 123 |
+
if not k:
|
| 124 |
+
return
|
| 125 |
+
# Update live process env and config so consumers see it immediately
|
| 126 |
+
try:
|
| 127 |
+
os.environ["OPENAI_API_KEY"] = k
|
| 128 |
+
except Exception: # best-effort
|
| 129 |
+
pass
|
| 130 |
+
try:
|
| 131 |
+
config.OPENAI_API_KEY = k
|
| 132 |
+
except Exception:
|
| 133 |
+
pass
|
| 134 |
+
|
| 135 |
+
if not self._instance_path:
|
| 136 |
+
return
|
| 137 |
+
try:
|
| 138 |
+
inst = Path(self._instance_path)
|
| 139 |
+
env_path = inst / ".env"
|
| 140 |
+
lines = self._read_env_lines(env_path)
|
| 141 |
+
replaced = False
|
| 142 |
+
for i, ln in enumerate(lines):
|
| 143 |
+
if ln.strip().startswith("OPENAI_API_KEY="):
|
| 144 |
+
lines[i] = f"OPENAI_API_KEY={k}"
|
| 145 |
+
replaced = True
|
| 146 |
+
break
|
| 147 |
+
if not replaced:
|
| 148 |
+
lines.append(f"OPENAI_API_KEY={k}")
|
| 149 |
+
final_text = "\n".join(lines) + "\n"
|
| 150 |
+
env_path.write_text(final_text, encoding="utf-8")
|
| 151 |
+
logger.info("Persisted OPENAI_API_KEY to %s", env_path)
|
| 152 |
+
|
| 153 |
+
# Load the newly written .env into this process to ensure downstream imports see it
|
| 154 |
+
try:
|
| 155 |
+
from dotenv import load_dotenv
|
| 156 |
+
|
| 157 |
+
load_dotenv(dotenv_path=str(env_path), override=True)
|
| 158 |
+
except Exception:
|
| 159 |
+
pass
|
| 160 |
+
except Exception as e:
|
| 161 |
+
logger.warning("Failed to persist OPENAI_API_KEY: %s", e)
|
| 162 |
+
|
| 163 |
+
def _persist_personality(self, profile: Optional[str]) -> None:
|
| 164 |
+
"""Persist the startup personality to the instance .env and config."""
|
| 165 |
+
if LOCKED_PROFILE is not None:
|
| 166 |
+
return
|
| 167 |
+
selection = (profile or "").strip() or None
|
| 168 |
+
try:
|
| 169 |
+
from reachy_mini_receptionist.config import set_custom_profile
|
| 170 |
+
|
| 171 |
+
set_custom_profile(selection)
|
| 172 |
+
except Exception:
|
| 173 |
+
pass
|
| 174 |
+
|
| 175 |
+
if not self._instance_path:
|
| 176 |
+
return
|
| 177 |
+
try:
|
| 178 |
+
env_path = Path(self._instance_path) / ".env"
|
| 179 |
+
lines = self._read_env_lines(env_path)
|
| 180 |
+
replaced = False
|
| 181 |
+
for i, ln in enumerate(list(lines)):
|
| 182 |
+
if ln.strip().startswith("REACHY_MINI_CUSTOM_PROFILE="):
|
| 183 |
+
if selection:
|
| 184 |
+
lines[i] = f"REACHY_MINI_CUSTOM_PROFILE={selection}"
|
| 185 |
+
else:
|
| 186 |
+
lines.pop(i)
|
| 187 |
+
replaced = True
|
| 188 |
+
break
|
| 189 |
+
if selection and not replaced:
|
| 190 |
+
lines.append(f"REACHY_MINI_CUSTOM_PROFILE={selection}")
|
| 191 |
+
if selection is None and not env_path.exists():
|
| 192 |
+
return
|
| 193 |
+
final_text = "\n".join(lines) + "\n"
|
| 194 |
+
env_path.write_text(final_text, encoding="utf-8")
|
| 195 |
+
logger.info("Persisted startup personality to %s", env_path)
|
| 196 |
+
try:
|
| 197 |
+
from dotenv import load_dotenv
|
| 198 |
+
|
| 199 |
+
load_dotenv(dotenv_path=str(env_path), override=True)
|
| 200 |
+
except Exception:
|
| 201 |
+
pass
|
| 202 |
+
except Exception as e:
|
| 203 |
+
logger.warning("Failed to persist REACHY_MINI_CUSTOM_PROFILE: %s", e)
|
| 204 |
+
|
| 205 |
+
def _read_persisted_personality(self) -> Optional[str]:
|
| 206 |
+
"""Read persisted startup personality from instance .env (if any)."""
|
| 207 |
+
if not self._instance_path:
|
| 208 |
+
return None
|
| 209 |
+
env_path = Path(self._instance_path) / ".env"
|
| 210 |
+
try:
|
| 211 |
+
if env_path.exists():
|
| 212 |
+
for ln in env_path.read_text(encoding="utf-8").splitlines():
|
| 213 |
+
if ln.strip().startswith("REACHY_MINI_CUSTOM_PROFILE="):
|
| 214 |
+
_, _, val = ln.partition("=")
|
| 215 |
+
v = val.strip()
|
| 216 |
+
return v or None
|
| 217 |
+
except Exception:
|
| 218 |
+
pass
|
| 219 |
+
return None
|
| 220 |
+
|
| 221 |
+
def _init_settings_ui_if_needed(self) -> None:
|
| 222 |
+
"""Attach minimal settings UI to the settings app.
|
| 223 |
+
|
| 224 |
+
Always mounts the UI when a settings_app is provided so that users
|
| 225 |
+
see a confirmation message even if the API key is already configured.
|
| 226 |
+
"""
|
| 227 |
+
if self._settings_initialized:
|
| 228 |
+
return
|
| 229 |
+
if self._settings_app is None:
|
| 230 |
+
return
|
| 231 |
+
|
| 232 |
+
static_dir = Path(__file__).parent / "static"
|
| 233 |
+
index_file = static_dir / "index.html"
|
| 234 |
+
|
| 235 |
+
if hasattr(self._settings_app, "mount"):
|
| 236 |
+
try:
|
| 237 |
+
# Serve /static/* assets
|
| 238 |
+
self._settings_app.mount("/static", StaticFiles(directory=str(static_dir)), name="static")
|
| 239 |
+
except Exception:
|
| 240 |
+
pass
|
| 241 |
+
|
| 242 |
+
class ApiKeyPayload(BaseModel):
|
| 243 |
+
openai_api_key: str
|
| 244 |
+
|
| 245 |
+
# GET / -> redirect to /dashboard (the receptionist control room)
|
| 246 |
+
@self._settings_app.get("/")
|
| 247 |
+
def _root() -> RedirectResponse:
|
| 248 |
+
return RedirectResponse(url="/dashboard")
|
| 249 |
+
|
| 250 |
+
# GET /favicon.ico -> optional, avoid noisy 404s on some browsers
|
| 251 |
+
@self._settings_app.get("/favicon.ico")
|
| 252 |
+
def _favicon() -> Response:
|
| 253 |
+
return Response(status_code=204)
|
| 254 |
+
|
| 255 |
+
# GET /status -> whether key is set
|
| 256 |
+
@self._settings_app.get("/status")
|
| 257 |
+
def _status() -> JSONResponse:
|
| 258 |
+
has_key = bool(config.OPENAI_API_KEY and str(config.OPENAI_API_KEY).strip())
|
| 259 |
+
return JSONResponse({"has_key": has_key})
|
| 260 |
+
|
| 261 |
+
# GET /ready -> whether backend finished loading tools
|
| 262 |
+
@self._settings_app.get("/ready")
|
| 263 |
+
def _ready() -> JSONResponse:
|
| 264 |
+
try:
|
| 265 |
+
mod = sys.modules.get("reachy_mini_receptionist.tools.core_tools")
|
| 266 |
+
ready = bool(getattr(mod, "_TOOLS_INITIALIZED", False)) if mod else False
|
| 267 |
+
except Exception:
|
| 268 |
+
ready = False
|
| 269 |
+
return JSONResponse({"ready": ready})
|
| 270 |
+
|
| 271 |
+
# POST /openai_api_key -> set/persist key
|
| 272 |
+
@self._settings_app.post("/openai_api_key")
|
| 273 |
+
def _set_key(payload: ApiKeyPayload) -> JSONResponse:
|
| 274 |
+
key = (payload.openai_api_key or "").strip()
|
| 275 |
+
if not key:
|
| 276 |
+
return JSONResponse({"ok": False, "error": "empty_key"}, status_code=400)
|
| 277 |
+
self._persist_api_key(key)
|
| 278 |
+
return JSONResponse({"ok": True})
|
| 279 |
+
|
| 280 |
+
# POST /validate_api_key -> validate key without persisting it
|
| 281 |
+
@self._settings_app.post("/validate_api_key")
|
| 282 |
+
async def _validate_key(payload: ApiKeyPayload) -> JSONResponse:
|
| 283 |
+
key = (payload.openai_api_key or "").strip()
|
| 284 |
+
if not key:
|
| 285 |
+
return JSONResponse({"valid": False, "error": "empty_key"}, status_code=400)
|
| 286 |
+
|
| 287 |
+
# Try to validate by checking if we can fetch the models
|
| 288 |
+
try:
|
| 289 |
+
import httpx
|
| 290 |
+
|
| 291 |
+
headers = {"Authorization": f"Bearer {key}", "Content-Type": "application/json"}
|
| 292 |
+
async with httpx.AsyncClient(timeout=10.0) as client:
|
| 293 |
+
response = await client.get("https://api.openai.com/v1/models", headers=headers)
|
| 294 |
+
if response.status_code == 200:
|
| 295 |
+
return JSONResponse({"valid": True})
|
| 296 |
+
elif response.status_code == 401:
|
| 297 |
+
return JSONResponse({"valid": False, "error": "invalid_api_key"}, status_code=401)
|
| 298 |
+
else:
|
| 299 |
+
return JSONResponse(
|
| 300 |
+
{"valid": False, "error": "validation_failed"}, status_code=response.status_code
|
| 301 |
+
)
|
| 302 |
+
except Exception as e:
|
| 303 |
+
logger.warning(f"API key validation failed: {e}")
|
| 304 |
+
return JSONResponse({"valid": False, "error": "validation_error"}, status_code=500)
|
| 305 |
+
|
| 306 |
+
self._settings_initialized = True
|
| 307 |
+
|
| 308 |
+
def launch(self) -> None:
|
| 309 |
+
"""Start the recorder/player and run the async processing loops.
|
| 310 |
+
|
| 311 |
+
If the OpenAI key is missing, expose a tiny settings UI via the
|
| 312 |
+
Reachy Mini settings server to collect it before starting streams.
|
| 313 |
+
"""
|
| 314 |
+
self._stop_event.clear()
|
| 315 |
+
|
| 316 |
+
# Try to load an existing instance .env first (covers subsequent runs)
|
| 317 |
+
if self._instance_path:
|
| 318 |
+
try:
|
| 319 |
+
from dotenv import load_dotenv
|
| 320 |
+
|
| 321 |
+
from reachy_mini_receptionist.config import set_custom_profile
|
| 322 |
+
|
| 323 |
+
env_path = Path(self._instance_path) / ".env"
|
| 324 |
+
if env_path.exists():
|
| 325 |
+
load_dotenv(dotenv_path=str(env_path), override=True)
|
| 326 |
+
# Update config with newly loaded values
|
| 327 |
+
new_key = os.getenv("OPENAI_API_KEY", "").strip()
|
| 328 |
+
if new_key:
|
| 329 |
+
try:
|
| 330 |
+
config.OPENAI_API_KEY = new_key
|
| 331 |
+
except Exception:
|
| 332 |
+
pass
|
| 333 |
+
if LOCKED_PROFILE is None:
|
| 334 |
+
new_profile = os.getenv("REACHY_MINI_CUSTOM_PROFILE")
|
| 335 |
+
if new_profile is not None:
|
| 336 |
+
try:
|
| 337 |
+
set_custom_profile(new_profile.strip() or None)
|
| 338 |
+
except Exception:
|
| 339 |
+
pass # Best-effort profile update
|
| 340 |
+
except Exception:
|
| 341 |
+
pass # Instance .env loading is optional; continue with defaults
|
| 342 |
+
|
| 343 |
+
# If key is still missing, try to download one from HuggingFace
|
| 344 |
+
if not (config.OPENAI_API_KEY and str(config.OPENAI_API_KEY).strip()):
|
| 345 |
+
logger.info("OPENAI_API_KEY not set, attempting to download from HuggingFace...")
|
| 346 |
+
try:
|
| 347 |
+
from gradio_client import Client
|
| 348 |
+
client = Client("HuggingFaceM4/gradium_setup", verbose=False)
|
| 349 |
+
key, status = client.predict(api_name="/claim_b_key")
|
| 350 |
+
if key and key.strip():
|
| 351 |
+
logger.info("Successfully downloaded API key from HuggingFace")
|
| 352 |
+
# Persist it immediately
|
| 353 |
+
self._persist_api_key(key)
|
| 354 |
+
except Exception as e:
|
| 355 |
+
logger.warning(f"Failed to download API key from HuggingFace: {e}")
|
| 356 |
+
|
| 357 |
+
# Always expose settings UI if a settings app is available
|
| 358 |
+
# (do this AFTER loading/downloading the key so status endpoint sees the right value)
|
| 359 |
+
self._init_settings_ui_if_needed()
|
| 360 |
+
|
| 361 |
+
# If key is still missing -> wait until provided via the settings UI
|
| 362 |
+
if not (config.OPENAI_API_KEY and str(config.OPENAI_API_KEY).strip()):
|
| 363 |
+
logger.warning("OPENAI_API_KEY not found. Open the app settings page to enter it.")
|
| 364 |
+
# Poll until the key becomes available (set via the settings UI)
|
| 365 |
+
try:
|
| 366 |
+
while not (config.OPENAI_API_KEY and str(config.OPENAI_API_KEY).strip()):
|
| 367 |
+
time.sleep(0.2)
|
| 368 |
+
except KeyboardInterrupt:
|
| 369 |
+
logger.info("Interrupted while waiting for API key.")
|
| 370 |
+
return
|
| 371 |
+
|
| 372 |
+
# Start media after key is set/available
|
| 373 |
+
self._robot.media.start_recording()
|
| 374 |
+
self._robot.media.start_playing()
|
| 375 |
+
time.sleep(1) # give some time to the pipelines to start
|
| 376 |
+
|
| 377 |
+
async def runner() -> None:
|
| 378 |
+
# Capture loop for cross-thread personality actions
|
| 379 |
+
loop = asyncio.get_running_loop()
|
| 380 |
+
self._asyncio_loop = loop # type: ignore[assignment]
|
| 381 |
+
# Mount personality routes now that loop and handler are available
|
| 382 |
+
try:
|
| 383 |
+
if self._settings_app is not None:
|
| 384 |
+
mount_personality_routes(
|
| 385 |
+
self._settings_app,
|
| 386 |
+
self.handler,
|
| 387 |
+
lambda: self._asyncio_loop,
|
| 388 |
+
persist_personality=self._persist_personality,
|
| 389 |
+
get_persisted_personality=self._read_persisted_personality,
|
| 390 |
+
)
|
| 391 |
+
except Exception:
|
| 392 |
+
pass
|
| 393 |
+
self._tasks = [
|
| 394 |
+
asyncio.create_task(self.handler.start_up(), name="openai-handler"),
|
| 395 |
+
asyncio.create_task(self.record_loop(), name="stream-record-loop"),
|
| 396 |
+
asyncio.create_task(self.play_loop(), name="stream-play-loop"),
|
| 397 |
+
]
|
| 398 |
+
try:
|
| 399 |
+
await asyncio.gather(*self._tasks)
|
| 400 |
+
except asyncio.CancelledError:
|
| 401 |
+
logger.info("Tasks cancelled during shutdown")
|
| 402 |
+
finally:
|
| 403 |
+
# Ensure handler connection is closed
|
| 404 |
+
await self.handler.shutdown()
|
| 405 |
+
|
| 406 |
+
asyncio.run(runner())
|
| 407 |
+
|
| 408 |
+
def close(self) -> None:
|
| 409 |
+
"""Stop the stream and underlying media pipelines.
|
| 410 |
+
|
| 411 |
+
This method:
|
| 412 |
+
- Stops audio recording and playback first
|
| 413 |
+
- Sets the stop event to signal async loops to terminate
|
| 414 |
+
- Cancels all pending async tasks (openai-handler, record-loop, play-loop)
|
| 415 |
+
"""
|
| 416 |
+
logger.info("Stopping LocalStream...")
|
| 417 |
+
|
| 418 |
+
# Stop media pipelines FIRST before cancelling async tasks
|
| 419 |
+
# This ensures clean shutdown before PortAudio cleanup
|
| 420 |
+
try:
|
| 421 |
+
self._robot.media.stop_recording()
|
| 422 |
+
except Exception as e:
|
| 423 |
+
logger.debug(f"Error stopping recording (may already be stopped): {e}")
|
| 424 |
+
|
| 425 |
+
try:
|
| 426 |
+
self._robot.media.stop_playing()
|
| 427 |
+
except Exception as e:
|
| 428 |
+
logger.debug(f"Error stopping playback (may already be stopped): {e}")
|
| 429 |
+
|
| 430 |
+
# Now signal async loops to stop
|
| 431 |
+
self._stop_event.set()
|
| 432 |
+
|
| 433 |
+
# Cancel all running tasks
|
| 434 |
+
for task in self._tasks:
|
| 435 |
+
if not task.done():
|
| 436 |
+
task.cancel()
|
| 437 |
+
|
| 438 |
+
def clear_audio_queue(self) -> None:
|
| 439 |
+
"""Flush the player's appsrc to drop any queued audio immediately."""
|
| 440 |
+
logger.info("User intervention: flushing player queue")
|
| 441 |
+
if self._robot.media.backend == MediaBackend.GSTREAMER:
|
| 442 |
+
# Directly flush gstreamer audio pipe
|
| 443 |
+
self._robot.media.audio.clear_player()
|
| 444 |
+
elif self._robot.media.backend == MediaBackend.DEFAULT or self._robot.media.backend == MediaBackend.DEFAULT_NO_VIDEO:
|
| 445 |
+
self._robot.media.audio.clear_output_buffer()
|
| 446 |
+
self.handler.output_queue = asyncio.Queue()
|
| 447 |
+
|
| 448 |
+
async def record_loop(self) -> None:
|
| 449 |
+
"""Read mic frames from the recorder and forward them to the handler."""
|
| 450 |
+
input_sample_rate = self._robot.media.get_input_audio_samplerate()
|
| 451 |
+
logger.debug(f"Audio recording started at {input_sample_rate} Hz")
|
| 452 |
+
|
| 453 |
+
while not self._stop_event.is_set():
|
| 454 |
+
audio_frame = self._robot.media.get_audio_sample()
|
| 455 |
+
if audio_frame is not None:
|
| 456 |
+
await self.handler.receive((input_sample_rate, audio_frame))
|
| 457 |
+
await asyncio.sleep(0) # avoid busy loop
|
| 458 |
+
|
| 459 |
+
async def play_loop(self) -> None:
|
| 460 |
+
"""Fetch outputs from the handler: log text and play audio frames."""
|
| 461 |
+
while not self._stop_event.is_set():
|
| 462 |
+
handler_output = await self.handler.emit()
|
| 463 |
+
|
| 464 |
+
if isinstance(handler_output, AdditionalOutputs):
|
| 465 |
+
for msg in handler_output.args:
|
| 466 |
+
content = msg.get("content", "")
|
| 467 |
+
if isinstance(content, str):
|
| 468 |
+
logger.info(
|
| 469 |
+
"role=%s content=%s",
|
| 470 |
+
msg.get("role"),
|
| 471 |
+
content if len(content) < 500 else content[:500] + "…",
|
| 472 |
+
)
|
| 473 |
+
|
| 474 |
+
elif isinstance(handler_output, tuple):
|
| 475 |
+
input_sample_rate, audio_data = handler_output
|
| 476 |
+
output_sample_rate = self._robot.media.get_output_audio_samplerate()
|
| 477 |
+
|
| 478 |
+
# Reshape if needed
|
| 479 |
+
if audio_data.ndim == 2:
|
| 480 |
+
# Scipy channels last convention
|
| 481 |
+
if audio_data.shape[1] > audio_data.shape[0]:
|
| 482 |
+
audio_data = audio_data.T
|
| 483 |
+
# Multiple channels -> Mono channel
|
| 484 |
+
if audio_data.shape[1] > 1:
|
| 485 |
+
audio_data = audio_data[:, 0]
|
| 486 |
+
|
| 487 |
+
# Cast if needed
|
| 488 |
+
audio_frame = audio_to_float32(audio_data)
|
| 489 |
+
|
| 490 |
+
# Drop empty / sub-sample chunks. Some Gemini Live preview
|
| 491 |
+
# models (e.g. gemini-3.1-flash-live-preview as of
|
| 492 |
+
# 2026-05-21) emit 2-byte placeholder chunks. Without
|
| 493 |
+
# this guard, scipy.signal.resample below does
|
| 494 |
+
# `len_in / len_out` and crashes with ZeroDivisionError
|
| 495 |
+
# when the resampled target length rounds to 0,
|
| 496 |
+
# killing the whole console play_loop and the app with
|
| 497 |
+
# it. Skipping is the safe behaviour — a truly empty
|
| 498 |
+
# chunk has nothing to play anyway.
|
| 499 |
+
if audio_frame.size == 0 or len(audio_frame) < 2:
|
| 500 |
+
logger.debug(
|
| 501 |
+
"play_loop: skipping near-empty audio frame "
|
| 502 |
+
"(len=%d, input_sr=%s, output_sr=%s)",
|
| 503 |
+
len(audio_frame), input_sample_rate, output_sample_rate,
|
| 504 |
+
)
|
| 505 |
+
await asyncio.sleep(0)
|
| 506 |
+
continue
|
| 507 |
+
|
| 508 |
+
# Resample if needed
|
| 509 |
+
if input_sample_rate != output_sample_rate:
|
| 510 |
+
target_len = int(len(audio_frame) * output_sample_rate / input_sample_rate)
|
| 511 |
+
if target_len < 1:
|
| 512 |
+
# Resample would divide by zero — skip rather than crash.
|
| 513 |
+
logger.debug(
|
| 514 |
+
"play_loop: skipping frame that would resample to 0 "
|
| 515 |
+
"samples (len=%d, %s->%s)",
|
| 516 |
+
len(audio_frame), input_sample_rate, output_sample_rate,
|
| 517 |
+
)
|
| 518 |
+
await asyncio.sleep(0)
|
| 519 |
+
continue
|
| 520 |
+
audio_frame = resample(audio_frame, target_len)
|
| 521 |
+
|
| 522 |
+
self._robot.media.push_audio_sample(audio_frame)
|
| 523 |
+
|
| 524 |
+
else:
|
| 525 |
+
logger.debug("Ignoring output type=%s", type(handler_output).__name__)
|
| 526 |
+
|
| 527 |
+
await asyncio.sleep(0) # yield to event loop
|
src/reachy_mini_receptionist/conversation_controller.py
ADDED
|
@@ -0,0 +1,586 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Conversation controller — translates events into session state transitions.
|
| 2 |
+
|
| 3 |
+
This is the workflow engine. It listens for:
|
| 4 |
+
- Face state events from FaceRecognitionWorker.
|
| 5 |
+
- Tool call completions from the realtime handler.
|
| 6 |
+
|
| 7 |
+
And decides which ReceptionState transition should fire on the SessionManager.
|
| 8 |
+
|
| 9 |
+
Also exposes ``next_action_hint(state)`` — short directives the realtime
|
| 10 |
+
handler appends to its session context push so the LLM gets per-state
|
| 11 |
+
workflow guidance dynamically, instead of having the whole flow baked into
|
| 12 |
+
the system prompt.
|
| 13 |
+
"""
|
| 14 |
+
from __future__ import annotations
|
| 15 |
+
|
| 16 |
+
import asyncio
|
| 17 |
+
import logging
|
| 18 |
+
from typing import Any, Optional
|
| 19 |
+
|
| 20 |
+
from reachy_mini_receptionist.receptionist_state import ReceptionState
|
| 21 |
+
from reachy_mini_receptionist.session_manager import SessionManager
|
| 22 |
+
|
| 23 |
+
logger = logging.getLogger(__name__)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
# States in which an "unknown" face transitioning to "known" should still
|
| 27 |
+
# short-circuit identification (we haven't yet confirmed the visitor's name
|
| 28 |
+
# through other channels).
|
| 29 |
+
_EARLY_IDENTIFICATION_STATES: frozenset[ReceptionState] = frozenset({
|
| 30 |
+
ReceptionState.IDLE,
|
| 31 |
+
ReceptionState.VISITOR_DETECTED,
|
| 32 |
+
ReceptionState.GREETING,
|
| 33 |
+
ReceptionState.ASK_NAME,
|
| 34 |
+
# Include MULTIPLE_PEOPLE so the controller transitions back out as
|
| 35 |
+
# soon as the crowd thins to one face.
|
| 36 |
+
ReceptionState.MULTIPLE_PEOPLE,
|
| 37 |
+
})
|
| 38 |
+
|
| 39 |
+
# States in which losing the face means the visitor walked away and we should
|
| 40 |
+
# reset the session for the next person. Inside flow states (e.g. ASK_NAME),
|
| 41 |
+
# losing the face briefly just means they turned their head — don't reset.
|
| 42 |
+
_RESET_ON_FACE_LOST_STATES: frozenset[ReceptionState] = frozenset({
|
| 43 |
+
ReceptionState.NOTIFIED,
|
| 44 |
+
ReceptionState.NO_APPOINTMENT,
|
| 45 |
+
ReceptionState.EMAIL_FAILED,
|
| 46 |
+
ReceptionState.COMPLETE,
|
| 47 |
+
ReceptionState.ERROR,
|
| 48 |
+
})
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
class ConversationController:
|
| 52 |
+
"""Wire face events + tool completions into session state transitions.
|
| 53 |
+
|
| 54 |
+
Stateless aside from the SessionManager it operates on. Safe to call
|
| 55 |
+
handlers from any thread because SessionManager handles locking.
|
| 56 |
+
|
| 57 |
+
Optionally records every completed visit to a ``VisitorLog`` when the
|
| 58 |
+
session resets out of a meaningful state (visitor name, recognized
|
| 59 |
+
face, or employee was set).
|
| 60 |
+
"""
|
| 61 |
+
|
| 62 |
+
def __init__(self, session_manager: SessionManager) -> None:
|
| 63 |
+
self._session = session_manager
|
| 64 |
+
|
| 65 |
+
# ------------------------------------------------------------------
|
| 66 |
+
# Face events
|
| 67 |
+
# ------------------------------------------------------------------
|
| 68 |
+
|
| 69 |
+
def on_face_event(self, event: dict[str, Any]) -> None:
|
| 70 |
+
"""Translate a ``face_state_changed`` event into a session transition.
|
| 71 |
+
|
| 72 |
+
Event shape (from ``FaceRecognitionWorker._update_stable_state``):
|
| 73 |
+
state: "no_face" | "unknown" | "known"
|
| 74 |
+
name: str | None (populated when state == "known")
|
| 75 |
+
previous_state, previous_name, lbph_confidence, detection_confidence
|
| 76 |
+
"""
|
| 77 |
+
state = event.get("state")
|
| 78 |
+
name = event.get("name")
|
| 79 |
+
current = self._session.current_state
|
| 80 |
+
snapshot = self._session.session
|
| 81 |
+
|
| 82 |
+
if state == "known" and name:
|
| 83 |
+
# Already-completed-flow guard: if we already emailed the host
|
| 84 |
+
# for this visitor, don't re-enter RECOGNIZED — that re-triggers
|
| 85 |
+
# the whole check-in (get_today_calendar + send_email) and
|
| 86 |
+
# produces duplicate emails when the face momentarily flickers
|
| 87 |
+
# to MULTIPLE_PEOPLE and back.
|
| 88 |
+
already_done = (
|
| 89 |
+
bool(snapshot.email_sent_to)
|
| 90 |
+
and (snapshot.visitor_name or "").strip().lower() == name.strip().lower()
|
| 91 |
+
)
|
| 92 |
+
if already_done:
|
| 93 |
+
if current != ReceptionState.NOTIFIED:
|
| 94 |
+
logger.info(
|
| 95 |
+
"Face %r returned after notified — restoring NOTIFIED instead of re-running flow",
|
| 96 |
+
name,
|
| 97 |
+
)
|
| 98 |
+
self._session.transition(
|
| 99 |
+
ReceptionState.NOTIFIED,
|
| 100 |
+
recognized_face_name=name,
|
| 101 |
+
)
|
| 102 |
+
else:
|
| 103 |
+
self._session.update(recognized_face_name=name)
|
| 104 |
+
return
|
| 105 |
+
|
| 106 |
+
if current in _EARLY_IDENTIFICATION_STATES:
|
| 107 |
+
# If the visitor has ALREADY told us a different name in
|
| 108 |
+
# this session (visitor_name set by register_guest), trust
|
| 109 |
+
# speech over face. The face recognizer can mis-match
|
| 110 |
+
# (LBPH on a single crop is noisy under different lighting)
|
| 111 |
+
# and the visitor explicitly correcting "no, I'm X" should
|
| 112 |
+
# override the camera. Only auto-promote face -> visitor
|
| 113 |
+
# when no speech-confirmed name exists yet.
|
| 114 |
+
speech_confirmed = (snapshot.visitor_name or "").strip()
|
| 115 |
+
if speech_confirmed and speech_confirmed.lower() != name.strip().lower():
|
| 116 |
+
logger.info(
|
| 117 |
+
"Face matched %r but visitor already confirmed %r — keeping speech",
|
| 118 |
+
name, speech_confirmed,
|
| 119 |
+
)
|
| 120 |
+
self._session.update(recognized_face_name=name)
|
| 121 |
+
return
|
| 122 |
+
self._session.transition(
|
| 123 |
+
ReceptionState.RECOGNIZED,
|
| 124 |
+
visitor_name=name,
|
| 125 |
+
recognized_face_name=name,
|
| 126 |
+
)
|
| 127 |
+
# Same auto-resolve as the register_guest path so the
|
| 128 |
+
# face-recognition shortcut also reaches APPOINTMENT_MATCHED
|
| 129 |
+
# without depending on the LLM to call get_today_calendar.
|
| 130 |
+
try:
|
| 131 |
+
self._auto_resolve_appointment(name)
|
| 132 |
+
except Exception as e:
|
| 133 |
+
logger.warning(
|
| 134 |
+
"Auto-resolve appointment after face match failed: %s", e,
|
| 135 |
+
)
|
| 136 |
+
else:
|
| 137 |
+
# Past identification — just record the face match.
|
| 138 |
+
self._session.update(recognized_face_name=name)
|
| 139 |
+
return
|
| 140 |
+
|
| 141 |
+
if state == "unknown":
|
| 142 |
+
if current in (ReceptionState.IDLE, ReceptionState.MULTIPLE_PEOPLE):
|
| 143 |
+
self._session.transition(ReceptionState.VISITOR_DETECTED)
|
| 144 |
+
return
|
| 145 |
+
|
| 146 |
+
if state == "multiple":
|
| 147 |
+
if current != ReceptionState.MULTIPLE_PEOPLE:
|
| 148 |
+
self._session.transition(ReceptionState.MULTIPLE_PEOPLE)
|
| 149 |
+
return
|
| 150 |
+
|
| 151 |
+
if state == "no_face":
|
| 152 |
+
if current in _RESET_ON_FACE_LOST_STATES:
|
| 153 |
+
logger.info("Face lost in terminal state %s — resetting session", current.value)
|
| 154 |
+
# SessionManager.reset() handles persisting the pre-reset
|
| 155 |
+
# snapshot to the visitor log on its own.
|
| 156 |
+
self._session.reset()
|
| 157 |
+
return
|
| 158 |
+
|
| 159 |
+
# ------------------------------------------------------------------
|
| 160 |
+
# Tool completions
|
| 161 |
+
# ------------------------------------------------------------------
|
| 162 |
+
|
| 163 |
+
async def on_tool_completed_async(
|
| 164 |
+
self,
|
| 165 |
+
tool_name: str,
|
| 166 |
+
args: dict[str, Any],
|
| 167 |
+
result: dict[str, Any],
|
| 168 |
+
) -> None:
|
| 169 |
+
"""Async-safe wrapper for ``on_tool_completed``.
|
| 170 |
+
|
| 171 |
+
The realtime event loop reaches the controller via this method.
|
| 172 |
+
Tools whose handlers need an iCal fetch (``register_guest`` triggers
|
| 173 |
+
``_auto_resolve_appointment``) pre-fetch the calendar via
|
| 174 |
+
``asyncio.to_thread`` so the audio loop never blocks on the
|
| 175 |
+
synchronous httpx call inside ``ical_calendar.fetch_appointments``.
|
| 176 |
+
"""
|
| 177 |
+
if not isinstance(result, dict):
|
| 178 |
+
return
|
| 179 |
+
explicit_failure = "error" in result and result.get("success") is False
|
| 180 |
+
if explicit_failure:
|
| 181 |
+
self.on_tool_completed(tool_name, args, result)
|
| 182 |
+
return
|
| 183 |
+
|
| 184 |
+
appointments: Optional[list[dict[str, Any]]] = None
|
| 185 |
+
if tool_name in ("register_guest", "lookup_employee"):
|
| 186 |
+
try:
|
| 187 |
+
from reachy_mini_receptionist import calendar_data
|
| 188 |
+
appointments = await calendar_data.get_appointments_async()
|
| 189 |
+
except Exception as e:
|
| 190 |
+
logger.debug("Pre-fetch appointments failed: %s", e)
|
| 191 |
+
appointments = None
|
| 192 |
+
|
| 193 |
+
self._dispatch_tool_completion(tool_name, args, result, appointments)
|
| 194 |
+
|
| 195 |
+
def on_tool_completed(
|
| 196 |
+
self,
|
| 197 |
+
tool_name: str,
|
| 198 |
+
args: dict[str, Any],
|
| 199 |
+
result: dict[str, Any],
|
| 200 |
+
) -> None:
|
| 201 |
+
"""Translate a successful tool call into a session transition.
|
| 202 |
+
|
| 203 |
+
Failures are logged but never transition the session into ERROR
|
| 204 |
+
automatically — that's the caller's policy choice.
|
| 205 |
+
|
| 206 |
+
Synchronous variant — calls into ``_auto_resolve_appointment`` will
|
| 207 |
+
block on the iCal HTTP fetch. Safe from background threads (face
|
| 208 |
+
worker). Async callers on the realtime event loop should use
|
| 209 |
+
``on_tool_completed_async`` instead so the iCal call gets
|
| 210 |
+
off-thread.
|
| 211 |
+
"""
|
| 212 |
+
if not isinstance(result, dict):
|
| 213 |
+
return
|
| 214 |
+
explicit_failure = "error" in result and result.get("success") is False
|
| 215 |
+
if explicit_failure:
|
| 216 |
+
logger.debug("Tool %s reported failure: %s", tool_name, result.get("error"))
|
| 217 |
+
return
|
| 218 |
+
self._dispatch_tool_completion(tool_name, args, result, None)
|
| 219 |
+
|
| 220 |
+
def _dispatch_tool_completion(
|
| 221 |
+
self,
|
| 222 |
+
tool_name: str,
|
| 223 |
+
args: dict[str, Any],
|
| 224 |
+
result: dict[str, Any],
|
| 225 |
+
appointments: Optional[list[dict[str, Any]]],
|
| 226 |
+
) -> None:
|
| 227 |
+
"""Core transition logic shared by sync + async entry points.
|
| 228 |
+
|
| 229 |
+
``appointments`` is the optional pre-fetched calendar (set by the
|
| 230 |
+
async entry point so the iCal HTTP call doesn't run on the
|
| 231 |
+
realtime audio loop). When ``None``, ``_auto_resolve_appointment``
|
| 232 |
+
falls back to its own sync fetch.
|
| 233 |
+
"""
|
| 234 |
+
if tool_name == "register_guest":
|
| 235 |
+
# Only transition on actual SUCCESS. If register_guest was
|
| 236 |
+
# blocked (no_confirmation, name_is_filler, hallucinated
|
| 237 |
+
# chatter, no_face, etc.) it returns success=False — we
|
| 238 |
+
# MUST NOT advance the session in that case, or the visitor
|
| 239 |
+
# ends up locked into a bogus name like "Community" with
|
| 240 |
+
# no path to fix it.
|
| 241 |
+
if not result.get("success"):
|
| 242 |
+
logger.debug(
|
| 243 |
+
"register_guest returned success=False (reason=%r) — not transitioning",
|
| 244 |
+
result.get("blocked_reason") or result.get("error"),
|
| 245 |
+
)
|
| 246 |
+
return
|
| 247 |
+
name = (args.get("name") or "").strip()
|
| 248 |
+
if name:
|
| 249 |
+
self._session.transition(
|
| 250 |
+
ReceptionState.RECOGNIZED,
|
| 251 |
+
visitor_name=name,
|
| 252 |
+
recognized_face_name=name,
|
| 253 |
+
)
|
| 254 |
+
# The LLM was supposed to follow the RECOGNIZED hint with a
|
| 255 |
+
# get_today_calendar tool call, but it kept asking the visitor
|
| 256 |
+
# "who are you here to see?" instead — emails never went out.
|
| 257 |
+
# Pull the calendar synchronously from the backend and dispatch
|
| 258 |
+
# APPOINTMENT_MATCHED / NO_APPOINTMENT ourselves so the bot is
|
| 259 |
+
# never blocked on the LLM remembering to look something up.
|
| 260 |
+
try:
|
| 261 |
+
self._auto_resolve_appointment(name, appointments)
|
| 262 |
+
except Exception as e:
|
| 263 |
+
logger.warning(
|
| 264 |
+
"Auto-resolve appointment after register_guest failed: %s", e,
|
| 265 |
+
)
|
| 266 |
+
|
| 267 |
+
elif tool_name == "get_today_calendar":
|
| 268 |
+
calendar = result.get("calendar") or []
|
| 269 |
+
snap = self._session.session
|
| 270 |
+
# Prefer explicit visitor_name (operator typed/spoke it), fall
|
| 271 |
+
# back to a recognized face match (returning guest whose name
|
| 272 |
+
# we already trust because LBPH matched their saved crop).
|
| 273 |
+
visitor_name = snap.visitor_name or snap.recognized_face_name
|
| 274 |
+
if not visitor_name:
|
| 275 |
+
# LLM fetched the calendar as a generic lookup (often during
|
| 276 |
+
# idle exploration) before identifying the visitor. There is
|
| 277 |
+
# no name to match against yet, so don't change state.
|
| 278 |
+
logger.debug(
|
| 279 |
+
"get_today_calendar fired without a visitor_name — skipping transition",
|
| 280 |
+
)
|
| 281 |
+
return
|
| 282 |
+
matched = self._match_appointment(calendar, visitor_name)
|
| 283 |
+
updates: dict[str, Any] = {}
|
| 284 |
+
# If we matched purely off the face name, promote it into
|
| 285 |
+
# visitor_name so downstream (send_email guard, dashboard,
|
| 286 |
+
# visitor log) treats it as a confirmed identity.
|
| 287 |
+
if not snap.visitor_name:
|
| 288 |
+
updates["visitor_name"] = visitor_name
|
| 289 |
+
if matched:
|
| 290 |
+
updates["matched_appointment"] = matched
|
| 291 |
+
updates["employee_name"] = matched.get("visiting")
|
| 292 |
+
self._session.transition(ReceptionState.APPOINTMENT_MATCHED, **updates)
|
| 293 |
+
else:
|
| 294 |
+
updates["error_message"] = f"No appointment found for {visitor_name!r}"
|
| 295 |
+
self._session.transition(ReceptionState.NO_APPOINTMENT, **updates)
|
| 296 |
+
|
| 297 |
+
elif tool_name == "send_email":
|
| 298 |
+
# Only flip to NOTIFIED if the tool actually succeeded. The
|
| 299 |
+
# send_email tool can refuse (placeholder address, no visitor
|
| 300 |
+
# identity yet, duplicate-blocked, Resend HTTP error) — those
|
| 301 |
+
# all return success=False, and the dashboard must not lie
|
| 302 |
+
# "NOTIFIED" when no email left the system.
|
| 303 |
+
to = (args.get("to") or "").strip()
|
| 304 |
+
send_ok = bool(result.get("success"))
|
| 305 |
+
if to and send_ok:
|
| 306 |
+
self._session.transition(
|
| 307 |
+
ReceptionState.NOTIFIED,
|
| 308 |
+
email_sent_to=to,
|
| 309 |
+
)
|
| 310 |
+
elif to and not send_ok:
|
| 311 |
+
logger.info(
|
| 312 |
+
"send_email returned success=False (blocked_reason=%s) — "
|
| 313 |
+
"NOT transitioning to NOTIFIED",
|
| 314 |
+
result.get("blocked_reason") or result.get("error"),
|
| 315 |
+
)
|
| 316 |
+
|
| 317 |
+
elif tool_name == "lookup_employee":
|
| 318 |
+
# Walk-in path: visitor named the host instead of themselves.
|
| 319 |
+
# On hit, drop the synthetic appointment into the session so the
|
| 320 |
+
# existing APPOINTMENT_MATCHED -> send_email -> NOTIFIED path
|
| 321 |
+
# works unchanged. On miss, surface UNKNOWN_EMPLOYEE so the
|
| 322 |
+
# bot tells the visitor that name isn't on the list.
|
| 323 |
+
found = bool(result.get("found"))
|
| 324 |
+
if found:
|
| 325 |
+
emp = result.get("employee") or {}
|
| 326 |
+
emp_email = (emp.get("email") or "").strip()
|
| 327 |
+
emp_name = (emp.get("name") or args.get("name") or "").strip()
|
| 328 |
+
if emp_email:
|
| 329 |
+
snap = self._session.session
|
| 330 |
+
# Trust the face DB: if LBPH already recognised this
|
| 331 |
+
# visitor (recognized_face_name), promote that into
|
| 332 |
+
# visitor_name so send_email's identity guard passes
|
| 333 |
+
# without forcing the bot to ask the name again. This
|
| 334 |
+
# is the "returning known guest came back to see X"
|
| 335 |
+
# path — they shouldn't be re-prompted for their name.
|
| 336 |
+
visitor = snap.visitor_name or snap.recognized_face_name
|
| 337 |
+
|
| 338 |
+
# If the visitor is known AND today's calendar has a real
|
| 339 |
+
# appointment for them with this host, prefer that real
|
| 340 |
+
# appointment over a synthetic walk-in. Otherwise the
|
| 341 |
+
# host's notification email loses the scheduled time/note
|
| 342 |
+
# and reads "Walk-in visitor has arrived" for a meeting
|
| 343 |
+
# that was actually on the calendar.
|
| 344 |
+
real_appt: Optional[dict[str, Any]] = None
|
| 345 |
+
if visitor and appointments:
|
| 346 |
+
candidate = self._match_appointment(appointments, visitor)
|
| 347 |
+
if (
|
| 348 |
+
candidate
|
| 349 |
+
and (candidate.get("visiting") or "").strip().lower()
|
| 350 |
+
== emp_email.lower()
|
| 351 |
+
):
|
| 352 |
+
real_appt = candidate
|
| 353 |
+
|
| 354 |
+
if real_appt is not None:
|
| 355 |
+
matched_appt = real_appt
|
| 356 |
+
else:
|
| 357 |
+
matched_appt = {
|
| 358 |
+
"time": "now",
|
| 359 |
+
"name": visitor or "Walk-in visitor",
|
| 360 |
+
"note": f"Walk-in to see {emp_name}",
|
| 361 |
+
"visiting": emp_email,
|
| 362 |
+
}
|
| 363 |
+
updates: dict[str, Any] = {
|
| 364 |
+
"matched_appointment": matched_appt,
|
| 365 |
+
"employee_name": emp_email,
|
| 366 |
+
}
|
| 367 |
+
if visitor and not snap.visitor_name:
|
| 368 |
+
updates["visitor_name"] = visitor
|
| 369 |
+
self._session.transition(ReceptionState.APPOINTMENT_MATCHED, **updates)
|
| 370 |
+
else:
|
| 371 |
+
query = (args.get("name") or "").strip()
|
| 372 |
+
self._session.transition(
|
| 373 |
+
ReceptionState.UNKNOWN_EMPLOYEE,
|
| 374 |
+
error_message=f"No directory match for {query!r}",
|
| 375 |
+
)
|
| 376 |
+
|
| 377 |
+
@staticmethod
|
| 378 |
+
def _match_appointment(
|
| 379 |
+
calendar: list[dict[str, Any]],
|
| 380 |
+
visitor_name: Optional[str],
|
| 381 |
+
) -> Optional[dict[str, Any]]:
|
| 382 |
+
"""Case-insensitive name match against today's calendar entries.
|
| 383 |
+
|
| 384 |
+
Matching is layered so a visitor who says just their first name
|
| 385 |
+
("Rohan") still resolves to a calendar entry like "Rohan Verma":
|
| 386 |
+
|
| 387 |
+
1. Exact match on the full string.
|
| 388 |
+
2. Calendar entry's first whitespace-delimited token equals the
|
| 389 |
+
visitor string ("Rohan" == first(\"Rohan Verma\")).
|
| 390 |
+
3. Substring of the calendar entry (\"rohan\" in \"rohan verma\").
|
| 391 |
+
|
| 392 |
+
Each layer returns the FIRST hit so we never silently switch which
|
| 393 |
+
calendar entry a visitor is mapped to. The minimum length guard
|
| 394 |
+
(>= 2 chars) keeps single-letter transcripts from matching half
|
| 395 |
+
the calendar.
|
| 396 |
+
"""
|
| 397 |
+
if not visitor_name:
|
| 398 |
+
return None
|
| 399 |
+
target = visitor_name.strip().lower()
|
| 400 |
+
if len(target) < 2:
|
| 401 |
+
return None
|
| 402 |
+
for appt in calendar:
|
| 403 |
+
if (appt.get("name") or "").strip().lower() == target:
|
| 404 |
+
return appt
|
| 405 |
+
for appt in calendar:
|
| 406 |
+
name = (appt.get("name") or "").strip().lower()
|
| 407 |
+
tokens = name.split()
|
| 408 |
+
if tokens and tokens[0] == target:
|
| 409 |
+
return appt
|
| 410 |
+
for appt in calendar:
|
| 411 |
+
name = (appt.get("name") or "").strip().lower()
|
| 412 |
+
if target in name:
|
| 413 |
+
return appt
|
| 414 |
+
return None
|
| 415 |
+
|
| 416 |
+
def _auto_resolve_appointment(
|
| 417 |
+
self,
|
| 418 |
+
visitor_name: str,
|
| 419 |
+
appointments: Optional[list[dict[str, Any]]] = None,
|
| 420 |
+
) -> None:
|
| 421 |
+
"""Look up today's calendar for ``visitor_name`` and dispatch.
|
| 422 |
+
|
| 423 |
+
Called from the RECOGNIZED transition in both the register_guest
|
| 424 |
+
path and the face-recognition path so the bot doesn't have to
|
| 425 |
+
depend on the LLM calling get_today_calendar — production showed
|
| 426 |
+
the LLM acknowledging the visitor and then improvising next-step
|
| 427 |
+
questions instead of running the tool, so the email never went out.
|
| 428 |
+
|
| 429 |
+
``appointments`` is an optional pre-fetched list. Async callers on
|
| 430 |
+
the realtime event loop preload it via ``calendar_data.get_appointments_async``
|
| 431 |
+
so the synchronous iCal HTTP call doesn't block the audio loop here.
|
| 432 |
+
When omitted, falls back to a sync fetch (safe from background threads
|
| 433 |
+
like the face worker).
|
| 434 |
+
|
| 435 |
+
Dispatches:
|
| 436 |
+
- APPOINTMENT_MATCHED with the matched appointment + host email,
|
| 437 |
+
if today's iCal has an entry whose ``name`` matches.
|
| 438 |
+
- NO_APPOINTMENT otherwise. The bot then offers to take a message
|
| 439 |
+
or route via lookup_employee.
|
| 440 |
+
"""
|
| 441 |
+
if not visitor_name:
|
| 442 |
+
return
|
| 443 |
+
if appointments is None:
|
| 444 |
+
from reachy_mini_receptionist import calendar_data
|
| 445 |
+
appointments = calendar_data.get_appointments()
|
| 446 |
+
matched = self._match_appointment(appointments, visitor_name)
|
| 447 |
+
if matched:
|
| 448 |
+
self._session.transition(
|
| 449 |
+
ReceptionState.APPOINTMENT_MATCHED,
|
| 450 |
+
matched_appointment=matched,
|
| 451 |
+
employee_name=matched.get("visiting"),
|
| 452 |
+
)
|
| 453 |
+
logger.info(
|
| 454 |
+
"Auto-resolved appointment for %r -> %s",
|
| 455 |
+
visitor_name, matched.get("visiting"),
|
| 456 |
+
)
|
| 457 |
+
else:
|
| 458 |
+
self._session.transition(
|
| 459 |
+
ReceptionState.NO_APPOINTMENT,
|
| 460 |
+
error_message=f"No appointment found for {visitor_name!r}",
|
| 461 |
+
)
|
| 462 |
+
logger.info("Auto-resolve: no appointment for %r", visitor_name)
|
| 463 |
+
|
| 464 |
+
|
| 465 |
+
# ----------------------------------------------------------------------
|
| 466 |
+
# Per-state workflow hints
|
| 467 |
+
# ----------------------------------------------------------------------
|
| 468 |
+
# These get appended to the session context push so the LLM knows what to
|
| 469 |
+
# do next — without the workflow being hardcoded in the system prompt.
|
| 470 |
+
# Keep each hint short (one or two sentences) and concrete. The LLM has
|
| 471 |
+
# already been told to wait for the user to speak before responding; these
|
| 472 |
+
# hints describe what to do *when* the user speaks.
|
| 473 |
+
|
| 474 |
+
_NEXT_ACTION_HINTS: dict[ReceptionState, str] = {
|
| 475 |
+
# IDLE is the "no visitor yet" state — normally we stay silent. BUT if a
|
| 476 |
+
# visitor speaks before the face worker has stabilised (camera obscured,
|
| 477 |
+
# off-angle, dim light), the bot would otherwise just greet generically
|
| 478 |
+
# and never advance. So when the user speaks during IDLE, treat the
|
| 479 |
+
# utterance as the start of the flow and dispatch immediately to the
|
| 480 |
+
# right tool based on what they said.
|
| 481 |
+
ReceptionState.IDLE: (
|
| 482 |
+
"If the visitor speaks first, just be conversational — greet them "
|
| 483 |
+
"and figure out who they are or who they want to see. If they named "
|
| 484 |
+
"themselves, confirm once ('I heard <name>, right?') then call "
|
| 485 |
+
"register_guest(name, confirmed=true). If they named a host, call "
|
| 486 |
+
"lookup_employee. ALWAYS respond — never go silent. If you mishear, "
|
| 487 |
+
"say so and ask again naturally; don't lecture."
|
| 488 |
+
),
|
| 489 |
+
ReceptionState.VISITOR_DETECTED: (
|
| 490 |
+
"Greet the visitor warmly. Ask their name or who they're here to see "
|
| 491 |
+
"if they haven't said yet. When they tell you a name, repeat it back "
|
| 492 |
+
"briefly to confirm; if they say yes, call register_guest with the "
|
| 493 |
+
"EXACT name you heard from the visitor and confirmed=true. Never "
|
| 494 |
+
"invent a name. If they're here to see someone, call lookup_employee. "
|
| 495 |
+
"Be conversational — short, friendly replies. ALWAYS respond to "
|
| 496 |
+
"whatever they say; never go silent."
|
| 497 |
+
),
|
| 498 |
+
ReceptionState.GREETING: (
|
| 499 |
+
"Greet the visitor. If they haven't said why they're here yet, "
|
| 500 |
+
"ask whether they have an appointment or are here to see someone."
|
| 501 |
+
),
|
| 502 |
+
ReceptionState.ASK_NAME: (
|
| 503 |
+
"Ask the visitor their name or who they're here to see. When they "
|
| 504 |
+
"answer, confirm the name back briefly and if they say yes, call "
|
| 505 |
+
"register_guest(confirmed=true). If you genuinely couldn't hear, "
|
| 506 |
+
"ask once more naturally. Keep replies short and friendly."
|
| 507 |
+
),
|
| 508 |
+
ReceptionState.MULTIPLE_PEOPLE: (
|
| 509 |
+
"More than one face is in view. Say 'I see more than one person — could "
|
| 510 |
+
"whoever's checking in step forward please?' Do NOT call register_guest, "
|
| 511 |
+
"lookup_employee, or send_email until the state changes back."
|
| 512 |
+
),
|
| 513 |
+
ReceptionState.RECOGNIZED: (
|
| 514 |
+
"Acknowledge the visitor by name. "
|
| 515 |
+
"Then call get_today_calendar to look up their appointment."
|
| 516 |
+
),
|
| 517 |
+
ReceptionState.CHECKING_APPOINTMENT: (
|
| 518 |
+
"Briefly let the visitor know you're checking the schedule."
|
| 519 |
+
),
|
| 520 |
+
ReceptionState.APPOINTMENT_MATCHED: (
|
| 521 |
+
"Use the appointment= and employee= values from the context above. "
|
| 522 |
+
"Say something like: 'Great, I have you down for <appointment> with "
|
| 523 |
+
"<employee> — I'll let them know you're here.' Then immediately call "
|
| 524 |
+
"send_email to that host. If visitor= is empty, ask their name first."
|
| 525 |
+
),
|
| 526 |
+
ReceptionState.NO_APPOINTMENT: (
|
| 527 |
+
"Politely tell the visitor you don't have them on today's schedule. "
|
| 528 |
+
"Offer to take a message or notify someone."
|
| 529 |
+
),
|
| 530 |
+
ReceptionState.NOTIFYING_EMPLOYEE: (
|
| 531 |
+
"Briefly tell the visitor you're notifying their host."
|
| 532 |
+
),
|
| 533 |
+
ReceptionState.NOTIFIED: (
|
| 534 |
+
"Use the employee= from context. Say: 'Done — I've notified <employee>. "
|
| 535 |
+
"Please have a seat, they'll be with you shortly.' Be warm, not robotic."
|
| 536 |
+
),
|
| 537 |
+
ReceptionState.EMAIL_FAILED: (
|
| 538 |
+
"Apologize that you couldn't reach the host right now. "
|
| 539 |
+
"Suggest the visitor wait briefly while you try again."
|
| 540 |
+
),
|
| 541 |
+
ReceptionState.WAITING: "",
|
| 542 |
+
ReceptionState.COMPLETE: (
|
| 543 |
+
"Thank the visitor warmly and wish them a good day."
|
| 544 |
+
),
|
| 545 |
+
ReceptionState.UNKNOWN_EMPLOYEE: (
|
| 546 |
+
"Tell the visitor that name isn't in your directory. "
|
| 547 |
+
"Offer to find someone else who can help."
|
| 548 |
+
),
|
| 549 |
+
ReceptionState.ERROR: (
|
| 550 |
+
"Apologize for the issue and ask the visitor to wait a moment."
|
| 551 |
+
),
|
| 552 |
+
}
|
| 553 |
+
|
| 554 |
+
|
| 555 |
+
def next_action_hint(state: ReceptionState) -> str:
|
| 556 |
+
"""Return a short workflow directive for the LLM based on the current state."""
|
| 557 |
+
return _NEXT_ACTION_HINTS.get(state, "")
|
| 558 |
+
|
| 559 |
+
|
| 560 |
+
# States that require the bot to speak IMMEDIATELY when entered, because the
|
| 561 |
+
# transition was triggered by an in-flight LLM response cycle (tool returned,
|
| 562 |
+
# state advanced — visitor is waiting for the bot to finish what it started).
|
| 563 |
+
# All others (face events, idle/reset) wait for the visitor to speak first.
|
| 564 |
+
_SPEAK_NOW_STATES: frozenset[ReceptionState] = frozenset({
|
| 565 |
+
# Greet the visitor as soon as the face is detected. Previously the bot
|
| 566 |
+
# would silently wait for the visitor to speak first, which gave the
|
| 567 |
+
# impression of an unresponsive robot — visitors often hesitate when
|
| 568 |
+
# they don't know if the bot is "on" yet.
|
| 569 |
+
ReceptionState.VISITOR_DETECTED,
|
| 570 |
+
ReceptionState.RECOGNIZED,
|
| 571 |
+
ReceptionState.APPOINTMENT_MATCHED,
|
| 572 |
+
ReceptionState.NO_APPOINTMENT,
|
| 573 |
+
ReceptionState.NOTIFIED,
|
| 574 |
+
ReceptionState.EMAIL_FAILED,
|
| 575 |
+
ReceptionState.UNKNOWN_EMPLOYEE,
|
| 576 |
+
})
|
| 577 |
+
|
| 578 |
+
|
| 579 |
+
def should_speak_immediately(state: ReceptionState) -> bool:
|
| 580 |
+
"""True if entering ``state`` should trigger an immediate spoken response.
|
| 581 |
+
|
| 582 |
+
For these states the LLM is mid-flow (just ran a tool, state advanced)
|
| 583 |
+
and the visitor is waiting. For all other states (face events,
|
| 584 |
+
timeouts, manual resets) the bot waits for the visitor to speak.
|
| 585 |
+
"""
|
| 586 |
+
return state in _SPEAK_NOW_STATES
|
src/reachy_mini_receptionist/dance_emotion_moves.py
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Dance and emotion moves for the movement queue system.
|
| 2 |
+
|
| 3 |
+
This module implements dance moves and emotions as Move objects that can be queued
|
| 4 |
+
and executed sequentially by the MovementManager.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
import logging
|
| 9 |
+
from typing import Tuple
|
| 10 |
+
|
| 11 |
+
import numpy as np
|
| 12 |
+
from numpy.typing import NDArray
|
| 13 |
+
|
| 14 |
+
from reachy_mini.motion.move import Move
|
| 15 |
+
from reachy_mini.motion.recorded_move import RecordedMoves
|
| 16 |
+
from reachy_mini_dances_library.dance_move import DanceMove
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
logger = logging.getLogger(__name__)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class DanceQueueMove(Move): # type: ignore
|
| 23 |
+
"""Wrapper for dance moves to work with the movement queue system."""
|
| 24 |
+
|
| 25 |
+
def __init__(self, move_name: str):
|
| 26 |
+
"""Initialize a DanceQueueMove."""
|
| 27 |
+
self.dance_move = DanceMove(move_name)
|
| 28 |
+
self.move_name = move_name
|
| 29 |
+
|
| 30 |
+
@property
|
| 31 |
+
def duration(self) -> float:
|
| 32 |
+
"""Duration property required by official Move interface."""
|
| 33 |
+
return float(self.dance_move.duration)
|
| 34 |
+
|
| 35 |
+
def evaluate(self, t: float) -> tuple[NDArray[np.float64] | None, NDArray[np.float64] | None, float | None]:
|
| 36 |
+
"""Evaluate dance move at time t."""
|
| 37 |
+
try:
|
| 38 |
+
# Get the pose from the dance move
|
| 39 |
+
head_pose, antennas, body_yaw = self.dance_move.evaluate(t)
|
| 40 |
+
|
| 41 |
+
# Convert to numpy array if antennas is tuple and return in official Move format
|
| 42 |
+
if isinstance(antennas, tuple):
|
| 43 |
+
antennas = np.array([antennas[0], antennas[1]])
|
| 44 |
+
|
| 45 |
+
return (head_pose, antennas, body_yaw)
|
| 46 |
+
|
| 47 |
+
except Exception as e:
|
| 48 |
+
logger.error(f"Error evaluating dance move '{self.move_name}' at t={t}: {e}")
|
| 49 |
+
# Return neutral pose on error
|
| 50 |
+
from reachy_mini.utils import create_head_pose
|
| 51 |
+
|
| 52 |
+
neutral_head_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
|
| 53 |
+
return (neutral_head_pose, np.array([0.0, 0.0], dtype=np.float64), 0.0)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
class EmotionQueueMove(Move): # type: ignore
|
| 57 |
+
"""Wrapper for emotion moves to work with the movement queue system."""
|
| 58 |
+
|
| 59 |
+
def __init__(self, emotion_name: str, recorded_moves: RecordedMoves):
|
| 60 |
+
"""Initialize an EmotionQueueMove."""
|
| 61 |
+
self.emotion_move = recorded_moves.get(emotion_name)
|
| 62 |
+
self.emotion_name = emotion_name
|
| 63 |
+
|
| 64 |
+
@property
|
| 65 |
+
def duration(self) -> float:
|
| 66 |
+
"""Duration property required by official Move interface."""
|
| 67 |
+
return float(self.emotion_move.duration)
|
| 68 |
+
|
| 69 |
+
def evaluate(self, t: float) -> tuple[NDArray[np.float64] | None, NDArray[np.float64] | None, float | None]:
|
| 70 |
+
"""Evaluate emotion move at time t."""
|
| 71 |
+
try:
|
| 72 |
+
# Get the pose from the emotion move
|
| 73 |
+
head_pose, antennas, body_yaw = self.emotion_move.evaluate(t)
|
| 74 |
+
|
| 75 |
+
# Convert to numpy array if antennas is tuple and return in official Move format
|
| 76 |
+
if isinstance(antennas, tuple):
|
| 77 |
+
antennas = np.array([antennas[0], antennas[1]])
|
| 78 |
+
|
| 79 |
+
return (head_pose, antennas, body_yaw)
|
| 80 |
+
|
| 81 |
+
except Exception as e:
|
| 82 |
+
logger.error(f"Error evaluating emotion '{self.emotion_name}' at t={t}: {e}")
|
| 83 |
+
# Return neutral pose on error
|
| 84 |
+
from reachy_mini.utils import create_head_pose
|
| 85 |
+
|
| 86 |
+
neutral_head_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
|
| 87 |
+
return (neutral_head_pose, np.array([0.0, 0.0], dtype=np.float64), 0.0)
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
class GotoQueueMove(Move): # type: ignore
|
| 91 |
+
"""Wrapper for goto moves to work with the movement queue system."""
|
| 92 |
+
|
| 93 |
+
def __init__(
|
| 94 |
+
self,
|
| 95 |
+
target_head_pose: NDArray[np.float32],
|
| 96 |
+
start_head_pose: NDArray[np.float32] | None = None,
|
| 97 |
+
target_antennas: Tuple[float, float] = (0, 0),
|
| 98 |
+
start_antennas: Tuple[float, float] | None = None,
|
| 99 |
+
target_body_yaw: float = 0,
|
| 100 |
+
start_body_yaw: float | None = None,
|
| 101 |
+
duration: float = 1.0,
|
| 102 |
+
):
|
| 103 |
+
"""Initialize a GotoQueueMove."""
|
| 104 |
+
self._duration = duration
|
| 105 |
+
self.target_head_pose = target_head_pose
|
| 106 |
+
self.start_head_pose = start_head_pose
|
| 107 |
+
self.target_antennas = target_antennas
|
| 108 |
+
self.start_antennas = start_antennas or (0, 0)
|
| 109 |
+
self.target_body_yaw = target_body_yaw
|
| 110 |
+
self.start_body_yaw = start_body_yaw or 0
|
| 111 |
+
|
| 112 |
+
@property
|
| 113 |
+
def duration(self) -> float:
|
| 114 |
+
"""Duration property required by official Move interface."""
|
| 115 |
+
return self._duration
|
| 116 |
+
|
| 117 |
+
def evaluate(self, t: float) -> tuple[NDArray[np.float64] | None, NDArray[np.float64] | None, float | None]:
|
| 118 |
+
"""Evaluate goto move at time t using linear interpolation."""
|
| 119 |
+
try:
|
| 120 |
+
from reachy_mini.utils import create_head_pose
|
| 121 |
+
from reachy_mini.utils.interpolation import linear_pose_interpolation
|
| 122 |
+
|
| 123 |
+
# Clamp t to [0, 1] for interpolation
|
| 124 |
+
t_clamped = max(0, min(1, t / self.duration))
|
| 125 |
+
|
| 126 |
+
# Use start pose if available, otherwise neutral
|
| 127 |
+
if self.start_head_pose is not None:
|
| 128 |
+
start_pose = self.start_head_pose
|
| 129 |
+
else:
|
| 130 |
+
start_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
|
| 131 |
+
|
| 132 |
+
# Interpolate head pose
|
| 133 |
+
head_pose = linear_pose_interpolation(start_pose, self.target_head_pose, t_clamped)
|
| 134 |
+
|
| 135 |
+
# Interpolate antennas - return as numpy array
|
| 136 |
+
antennas = np.array(
|
| 137 |
+
[
|
| 138 |
+
self.start_antennas[0] + (self.target_antennas[0] - self.start_antennas[0]) * t_clamped,
|
| 139 |
+
self.start_antennas[1] + (self.target_antennas[1] - self.start_antennas[1]) * t_clamped,
|
| 140 |
+
],
|
| 141 |
+
dtype=np.float64,
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
# Interpolate body yaw
|
| 145 |
+
body_yaw = self.start_body_yaw + (self.target_body_yaw - self.start_body_yaw) * t_clamped
|
| 146 |
+
|
| 147 |
+
return (head_pose, antennas, body_yaw)
|
| 148 |
+
|
| 149 |
+
except Exception as e:
|
| 150 |
+
logger.error(f"Error evaluating goto move at t={t}: {e}")
|
| 151 |
+
# Return target pose on error - convert to float64
|
| 152 |
+
target_head_pose_f64 = self.target_head_pose.astype(np.float64)
|
| 153 |
+
target_antennas_array = np.array([self.target_antennas[0], self.target_antennas[1]], dtype=np.float64)
|
| 154 |
+
return (target_head_pose_f64, target_antennas_array, self.target_body_yaw)
|
src/reachy_mini_receptionist/employees.py
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Employee directory — read API used by tools, calendar, and the LLM.
|
| 2 |
+
|
| 3 |
+
Two-tier source:
|
| 4 |
+
|
| 5 |
+
1. **Primary**: ``EmployeeStore`` (SQLite), populated and edited via the
|
| 6 |
+
dashboard's Employees panel.
|
| 7 |
+
2. **Fallback**: the hardcoded ``_SEED_EMPLOYEES`` constant below. This
|
| 8 |
+
only takes effect when the store is unset (e.g. tests that import
|
| 9 |
+
``employees`` directly) OR when the store is empty. On a real
|
| 10 |
+
deployment, ``main.py`` constructs the store and seeds it with
|
| 11 |
+
``_SEED_EMPLOYEES`` on first run; after that, edits via the
|
| 12 |
+
dashboard are the source of truth.
|
| 13 |
+
|
| 14 |
+
Consumers (``lookup_employee``, ``find_email_for``, ``get_all_employees``,
|
| 15 |
+
``format_for_llm``) keep the same signatures so ``calendar_data`` and the
|
| 16 |
+
``lookup_employee`` tool need zero changes.
|
| 17 |
+
"""
|
| 18 |
+
from __future__ import annotations
|
| 19 |
+
|
| 20 |
+
import logging
|
| 21 |
+
from typing import Any, List, Optional, TypedDict
|
| 22 |
+
|
| 23 |
+
logger = logging.getLogger(__name__)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class Employee(TypedDict, total=False):
|
| 27 |
+
name: str
|
| 28 |
+
email: str
|
| 29 |
+
aliases: List[str]
|
| 30 |
+
title: Optional[str]
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
# Seed list intentionally empty. A brand-new install starts with zero
|
| 34 |
+
# employees and the dashboard's Employees panel shows an empty state
|
| 35 |
+
# with a "+ Add" button — that's a clearer first-run UX than pre-loading
|
| 36 |
+
# dummy entries the operator has to delete one by one before adding
|
| 37 |
+
# their real team. The seed-load mechanism in employees_store.py is
|
| 38 |
+
# preserved for callers that want to bundle a seed list (e.g. a future
|
| 39 |
+
# import-from-CSV path).
|
| 40 |
+
_SEED_EMPLOYEES: List[Employee] = []
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
# Process-wide reference to the active store. ``main.py`` calls
|
| 44 |
+
# ``set_store(...)`` after construction. Kept ``None`` in tests / imports
|
| 45 |
+
# that don't go through main, in which case we fall back to the seed list.
|
| 46 |
+
_store: Any = None
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def set_store(store: Any) -> None:
|
| 50 |
+
"""Register the EmployeeStore the module should read from."""
|
| 51 |
+
global _store
|
| 52 |
+
_store = store
|
| 53 |
+
logger.info("employees: bound to store (count=%s)", store.count() if store else "n/a")
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def _strip_internal(emp: dict[str, Any]) -> Employee:
|
| 57 |
+
"""Drop SQLite-side fields the LLM doesn't need (id, created_at, etc)."""
|
| 58 |
+
return { # type: ignore[return-value]
|
| 59 |
+
"name": emp.get("name", ""),
|
| 60 |
+
"email": emp.get("email", ""),
|
| 61 |
+
"aliases": list(emp.get("aliases") or []),
|
| 62 |
+
"title": emp.get("title"),
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def _normalize(s: str) -> str:
|
| 67 |
+
return (s or "").strip().lower()
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def get_all_employees() -> List[Employee]:
|
| 71 |
+
"""Return a snapshot of the full directory."""
|
| 72 |
+
if _store is not None:
|
| 73 |
+
try:
|
| 74 |
+
rows = _store.list_all()
|
| 75 |
+
if rows:
|
| 76 |
+
return [_strip_internal(r) for r in rows]
|
| 77 |
+
except Exception as e:
|
| 78 |
+
logger.warning("employees.get_all_employees: store read failed (%s); using seed", e)
|
| 79 |
+
return [dict(e) for e in _SEED_EMPLOYEES] # type: ignore[misc]
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def lookup_employee(query: str) -> Optional[Employee]:
|
| 83 |
+
"""Find an employee by name or alias (case-insensitive, exact-only)."""
|
| 84 |
+
if not (query or "").strip():
|
| 85 |
+
return None
|
| 86 |
+
if _store is not None:
|
| 87 |
+
try:
|
| 88 |
+
hit = _store.lookup(query)
|
| 89 |
+
if hit:
|
| 90 |
+
return _strip_internal(hit)
|
| 91 |
+
except Exception as e:
|
| 92 |
+
logger.warning("employees.lookup_employee: store read failed (%s); using seed", e)
|
| 93 |
+
q = _normalize(query)
|
| 94 |
+
if not q:
|
| 95 |
+
return None
|
| 96 |
+
for emp in _SEED_EMPLOYEES:
|
| 97 |
+
if _normalize(emp.get("name", "")) == q:
|
| 98 |
+
return dict(emp) # type: ignore[return-value]
|
| 99 |
+
for alias in emp.get("aliases", []) or []:
|
| 100 |
+
if _normalize(alias) == q:
|
| 101 |
+
return dict(emp) # type: ignore[return-value]
|
| 102 |
+
return None
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def find_email_for(query: str) -> Optional[str]:
|
| 106 |
+
"""Convenience: resolve a name/alias to an email, or None."""
|
| 107 |
+
emp = lookup_employee(query)
|
| 108 |
+
return emp.get("email") if emp else None
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
def format_for_llm() -> str:
|
| 112 |
+
"""Render the directory as a short string the LLM can reference."""
|
| 113 |
+
employees = get_all_employees()
|
| 114 |
+
if not employees:
|
| 115 |
+
return "Employee directory is empty."
|
| 116 |
+
lines = ["Employee directory:"]
|
| 117 |
+
for emp in employees:
|
| 118 |
+
aliases = emp.get("aliases") or []
|
| 119 |
+
alias_str = f" (also: {', '.join(aliases)})" if aliases else ""
|
| 120 |
+
lines.append(f" - {emp.get('name', '?')}{alias_str}")
|
| 121 |
+
return "\n".join(lines)
|
src/reachy_mini_receptionist/employees_store.py
ADDED
|
@@ -0,0 +1,342 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""SQLite-backed employee directory CRUD.
|
| 2 |
+
|
| 3 |
+
The dashboard's Employees panel uses this; ``employees.py`` reads from
|
| 4 |
+
this store too (with a fall-through to the hardcoded ``_EMPLOYEES`` list
|
| 5 |
+
when the store is empty, which only happens on a brand-new install
|
| 6 |
+
before the seed runs).
|
| 7 |
+
|
| 8 |
+
Schema matches the operator's mental model: name, email, optional title,
|
| 9 |
+
optional list of aliases the bot might hear (e.g. "AJ" for "Arjun
|
| 10 |
+
Mehta"). Name uniqueness is enforced case-insensitively so the bot can
|
| 11 |
+
never end up with two "Mukul" entries that route differently.
|
| 12 |
+
|
| 13 |
+
Lives next to ``visitor_log.db`` in the app's instance directory. Uses
|
| 14 |
+
the same WAL + per-call connection pattern.
|
| 15 |
+
"""
|
| 16 |
+
from __future__ import annotations
|
| 17 |
+
|
| 18 |
+
import json
|
| 19 |
+
import logging
|
| 20 |
+
import sqlite3
|
| 21 |
+
import threading
|
| 22 |
+
from datetime import datetime
|
| 23 |
+
from pathlib import Path
|
| 24 |
+
from typing import Any, Iterable, List, Optional
|
| 25 |
+
|
| 26 |
+
logger = logging.getLogger(__name__)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
_SCHEMA = """
|
| 30 |
+
CREATE TABLE IF NOT EXISTS employees (
|
| 31 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 32 |
+
name TEXT NOT NULL,
|
| 33 |
+
email TEXT NOT NULL,
|
| 34 |
+
title TEXT,
|
| 35 |
+
aliases TEXT,
|
| 36 |
+
created_at TEXT NOT NULL,
|
| 37 |
+
updated_at TEXT NOT NULL
|
| 38 |
+
);
|
| 39 |
+
|
| 40 |
+
CREATE UNIQUE INDEX IF NOT EXISTS idx_employees_name_lower
|
| 41 |
+
ON employees (LOWER(name));
|
| 42 |
+
"""
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
class EmployeeExistsError(Exception):
|
| 46 |
+
"""Raised when an employee name (case-insensitive) already exists."""
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
class EmployeeNotFoundError(Exception):
|
| 50 |
+
"""Raised when a CRUD operation targets a missing employee id."""
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
class EmployeeStore:
|
| 54 |
+
"""Thread-safe employee directory backed by a single SQLite file."""
|
| 55 |
+
|
| 56 |
+
def __init__(self, db_path: str | Path) -> None:
|
| 57 |
+
self._db_path = Path(db_path)
|
| 58 |
+
self._db_path.parent.mkdir(parents=True, exist_ok=True)
|
| 59 |
+
self._lock = threading.Lock()
|
| 60 |
+
self._init_schema()
|
| 61 |
+
logger.info("EmployeeStore initialised at %s", self._db_path)
|
| 62 |
+
|
| 63 |
+
# ------------------------------------------------------------------
|
| 64 |
+
# Connection / schema
|
| 65 |
+
# ------------------------------------------------------------------
|
| 66 |
+
|
| 67 |
+
def _connect(self) -> sqlite3.Connection:
|
| 68 |
+
conn = sqlite3.connect(self._db_path, check_same_thread=False, timeout=5.0)
|
| 69 |
+
conn.row_factory = sqlite3.Row
|
| 70 |
+
conn.execute("PRAGMA synchronous=NORMAL")
|
| 71 |
+
return conn
|
| 72 |
+
|
| 73 |
+
def _init_schema(self) -> None:
|
| 74 |
+
with self._lock:
|
| 75 |
+
conn = self._connect()
|
| 76 |
+
try:
|
| 77 |
+
conn.execute("PRAGMA journal_mode=WAL")
|
| 78 |
+
conn.executescript(_SCHEMA)
|
| 79 |
+
conn.commit()
|
| 80 |
+
finally:
|
| 81 |
+
conn.close()
|
| 82 |
+
|
| 83 |
+
# ------------------------------------------------------------------
|
| 84 |
+
# Row <-> dict helpers
|
| 85 |
+
# ------------------------------------------------------------------
|
| 86 |
+
|
| 87 |
+
@staticmethod
|
| 88 |
+
def _row_to_dict(row: sqlite3.Row) -> dict[str, Any]:
|
| 89 |
+
aliases_raw = row["aliases"] or "[]"
|
| 90 |
+
try:
|
| 91 |
+
aliases = json.loads(aliases_raw)
|
| 92 |
+
if not isinstance(aliases, list):
|
| 93 |
+
aliases = []
|
| 94 |
+
except Exception:
|
| 95 |
+
aliases = []
|
| 96 |
+
return {
|
| 97 |
+
"id": row["id"],
|
| 98 |
+
"name": row["name"],
|
| 99 |
+
"email": row["email"],
|
| 100 |
+
"title": row["title"],
|
| 101 |
+
"aliases": aliases,
|
| 102 |
+
"created_at": row["created_at"],
|
| 103 |
+
"updated_at": row["updated_at"],
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
@staticmethod
|
| 107 |
+
def _aliases_to_text(aliases: Optional[Iterable[str]]) -> str:
|
| 108 |
+
cleaned = []
|
| 109 |
+
seen: set[str] = set()
|
| 110 |
+
for a in aliases or []:
|
| 111 |
+
s = (a or "").strip()
|
| 112 |
+
if not s:
|
| 113 |
+
continue
|
| 114 |
+
key = s.lower()
|
| 115 |
+
if key in seen:
|
| 116 |
+
continue
|
| 117 |
+
seen.add(key)
|
| 118 |
+
cleaned.append(s)
|
| 119 |
+
return json.dumps(cleaned)
|
| 120 |
+
|
| 121 |
+
# ------------------------------------------------------------------
|
| 122 |
+
# Reads
|
| 123 |
+
# ------------------------------------------------------------------
|
| 124 |
+
|
| 125 |
+
def list_all(self) -> List[dict[str, Any]]:
|
| 126 |
+
with self._lock:
|
| 127 |
+
conn = self._connect()
|
| 128 |
+
try:
|
| 129 |
+
rows = conn.execute(
|
| 130 |
+
"SELECT * FROM employees ORDER BY LOWER(name)",
|
| 131 |
+
).fetchall()
|
| 132 |
+
finally:
|
| 133 |
+
conn.close()
|
| 134 |
+
return [self._row_to_dict(r) for r in rows]
|
| 135 |
+
|
| 136 |
+
def count(self) -> int:
|
| 137 |
+
with self._lock:
|
| 138 |
+
conn = self._connect()
|
| 139 |
+
try:
|
| 140 |
+
row = conn.execute("SELECT COUNT(*) AS n FROM employees").fetchone()
|
| 141 |
+
finally:
|
| 142 |
+
conn.close()
|
| 143 |
+
return int(row["n"]) if row else 0
|
| 144 |
+
|
| 145 |
+
def get_by_id(self, employee_id: int) -> Optional[dict[str, Any]]:
|
| 146 |
+
with self._lock:
|
| 147 |
+
conn = self._connect()
|
| 148 |
+
try:
|
| 149 |
+
row = conn.execute(
|
| 150 |
+
"SELECT * FROM employees WHERE id = ?",
|
| 151 |
+
(int(employee_id),),
|
| 152 |
+
).fetchone()
|
| 153 |
+
finally:
|
| 154 |
+
conn.close()
|
| 155 |
+
return self._row_to_dict(row) if row else None
|
| 156 |
+
|
| 157 |
+
def lookup(self, query: str) -> Optional[dict[str, Any]]:
|
| 158 |
+
"""Find an employee by name OR alias (case-insensitive, exact match).
|
| 159 |
+
|
| 160 |
+
Mirrors the original ``employees.lookup_employee`` semantics — exact
|
| 161 |
+
match only so that "Sam" can never silently route to "Samira".
|
| 162 |
+
"""
|
| 163 |
+
q = (query or "").strip().lower()
|
| 164 |
+
if not q:
|
| 165 |
+
return None
|
| 166 |
+
with self._lock:
|
| 167 |
+
conn = self._connect()
|
| 168 |
+
try:
|
| 169 |
+
rows = conn.execute(
|
| 170 |
+
"SELECT * FROM employees WHERE LOWER(name) = ?",
|
| 171 |
+
(q,),
|
| 172 |
+
).fetchall()
|
| 173 |
+
if rows:
|
| 174 |
+
return self._row_to_dict(rows[0])
|
| 175 |
+
all_rows = conn.execute("SELECT * FROM employees").fetchall()
|
| 176 |
+
finally:
|
| 177 |
+
conn.close()
|
| 178 |
+
for row in all_rows:
|
| 179 |
+
d = self._row_to_dict(row)
|
| 180 |
+
for alias in d.get("aliases") or []:
|
| 181 |
+
if (alias or "").strip().lower() == q:
|
| 182 |
+
return d
|
| 183 |
+
return None
|
| 184 |
+
|
| 185 |
+
# ------------------------------------------------------------------
|
| 186 |
+
# Writes
|
| 187 |
+
# ------------------------------------------------------------------
|
| 188 |
+
|
| 189 |
+
def create(
|
| 190 |
+
self,
|
| 191 |
+
name: str,
|
| 192 |
+
email: str,
|
| 193 |
+
aliases: Optional[Iterable[str]] = None,
|
| 194 |
+
title: Optional[str] = None,
|
| 195 |
+
) -> dict[str, Any]:
|
| 196 |
+
name = (name or "").strip()
|
| 197 |
+
email = (email or "").strip()
|
| 198 |
+
if not name:
|
| 199 |
+
raise ValueError("name is required")
|
| 200 |
+
if not email:
|
| 201 |
+
raise ValueError("email is required")
|
| 202 |
+
aliases_text = self._aliases_to_text(aliases)
|
| 203 |
+
now = datetime.utcnow().isoformat(timespec="seconds")
|
| 204 |
+
with self._lock:
|
| 205 |
+
conn = self._connect()
|
| 206 |
+
try:
|
| 207 |
+
try:
|
| 208 |
+
cur = conn.execute(
|
| 209 |
+
"""
|
| 210 |
+
INSERT INTO employees (name, email, title, aliases, created_at, updated_at)
|
| 211 |
+
VALUES (?, ?, ?, ?, ?, ?)
|
| 212 |
+
""",
|
| 213 |
+
(name, email, (title or None), aliases_text, now, now),
|
| 214 |
+
)
|
| 215 |
+
conn.commit()
|
| 216 |
+
new_id = cur.lastrowid
|
| 217 |
+
except sqlite3.IntegrityError as e:
|
| 218 |
+
raise EmployeeExistsError(
|
| 219 |
+
f"An employee named {name!r} already exists (case-insensitive)"
|
| 220 |
+
) from e
|
| 221 |
+
row = conn.execute(
|
| 222 |
+
"SELECT * FROM employees WHERE id = ?",
|
| 223 |
+
(new_id,),
|
| 224 |
+
).fetchone()
|
| 225 |
+
finally:
|
| 226 |
+
conn.close()
|
| 227 |
+
logger.info("EmployeeStore.create: id=%s name=%r email=%r", new_id, name, email)
|
| 228 |
+
return self._row_to_dict(row)
|
| 229 |
+
|
| 230 |
+
def update(
|
| 231 |
+
self,
|
| 232 |
+
employee_id: int,
|
| 233 |
+
*,
|
| 234 |
+
name: Optional[str] = None,
|
| 235 |
+
email: Optional[str] = None,
|
| 236 |
+
aliases: Optional[Iterable[str]] = None,
|
| 237 |
+
title: Optional[str] = None,
|
| 238 |
+
) -> Optional[dict[str, Any]]:
|
| 239 |
+
sets: List[str] = []
|
| 240 |
+
params: List[Any] = []
|
| 241 |
+
if name is not None:
|
| 242 |
+
cleaned = name.strip()
|
| 243 |
+
if not cleaned:
|
| 244 |
+
raise ValueError("name cannot be empty")
|
| 245 |
+
sets.append("name = ?")
|
| 246 |
+
params.append(cleaned)
|
| 247 |
+
if email is not None:
|
| 248 |
+
cleaned = email.strip()
|
| 249 |
+
if not cleaned:
|
| 250 |
+
raise ValueError("email cannot be empty")
|
| 251 |
+
sets.append("email = ?")
|
| 252 |
+
params.append(cleaned)
|
| 253 |
+
if aliases is not None:
|
| 254 |
+
sets.append("aliases = ?")
|
| 255 |
+
params.append(self._aliases_to_text(aliases))
|
| 256 |
+
if title is not None:
|
| 257 |
+
sets.append("title = ?")
|
| 258 |
+
params.append(title.strip() or None)
|
| 259 |
+
if not sets:
|
| 260 |
+
# Nothing to update; return current row.
|
| 261 |
+
return self.get_by_id(employee_id)
|
| 262 |
+
sets.append("updated_at = ?")
|
| 263 |
+
params.append(datetime.utcnow().isoformat(timespec="seconds"))
|
| 264 |
+
params.append(int(employee_id))
|
| 265 |
+
with self._lock:
|
| 266 |
+
conn = self._connect()
|
| 267 |
+
try:
|
| 268 |
+
try:
|
| 269 |
+
cur = conn.execute(
|
| 270 |
+
f"UPDATE employees SET {', '.join(sets)} WHERE id = ?",
|
| 271 |
+
params,
|
| 272 |
+
)
|
| 273 |
+
conn.commit()
|
| 274 |
+
if cur.rowcount == 0:
|
| 275 |
+
return None
|
| 276 |
+
except sqlite3.IntegrityError as e:
|
| 277 |
+
raise EmployeeExistsError(
|
| 278 |
+
"Another employee already uses that name (case-insensitive)"
|
| 279 |
+
) from e
|
| 280 |
+
row = conn.execute(
|
| 281 |
+
"SELECT * FROM employees WHERE id = ?",
|
| 282 |
+
(int(employee_id),),
|
| 283 |
+
).fetchone()
|
| 284 |
+
finally:
|
| 285 |
+
conn.close()
|
| 286 |
+
if row:
|
| 287 |
+
logger.info("EmployeeStore.update: id=%s -> %s", employee_id, dict(row))
|
| 288 |
+
return self._row_to_dict(row)
|
| 289 |
+
return None
|
| 290 |
+
|
| 291 |
+
def delete(self, employee_id: int) -> bool:
|
| 292 |
+
with self._lock:
|
| 293 |
+
conn = self._connect()
|
| 294 |
+
try:
|
| 295 |
+
cur = conn.execute(
|
| 296 |
+
"DELETE FROM employees WHERE id = ?",
|
| 297 |
+
(int(employee_id),),
|
| 298 |
+
)
|
| 299 |
+
conn.commit()
|
| 300 |
+
removed = cur.rowcount > 0
|
| 301 |
+
finally:
|
| 302 |
+
conn.close()
|
| 303 |
+
if removed:
|
| 304 |
+
logger.info("EmployeeStore.delete: id=%s", employee_id)
|
| 305 |
+
return removed
|
| 306 |
+
|
| 307 |
+
# ------------------------------------------------------------------
|
| 308 |
+
# Seeding
|
| 309 |
+
# ------------------------------------------------------------------
|
| 310 |
+
|
| 311 |
+
def seed_if_empty(self, employees: Iterable[dict[str, Any]]) -> int:
|
| 312 |
+
"""Bulk-insert ``employees`` only if the table is currently empty.
|
| 313 |
+
|
| 314 |
+
Returns the number inserted. Idempotent across restarts — the
|
| 315 |
+
seed runs once on a brand-new install and is then a no-op.
|
| 316 |
+
Duplicate-name conflicts inside the seed list are skipped (the
|
| 317 |
+
first occurrence wins) so partial seeds don't abort the whole
|
| 318 |
+
batch.
|
| 319 |
+
"""
|
| 320 |
+
if self.count() > 0:
|
| 321 |
+
return 0
|
| 322 |
+
inserted = 0
|
| 323 |
+
for emp in employees:
|
| 324 |
+
try:
|
| 325 |
+
self.create(
|
| 326 |
+
name=emp.get("name", ""),
|
| 327 |
+
email=emp.get("email", ""),
|
| 328 |
+
aliases=emp.get("aliases") or [],
|
| 329 |
+
title=emp.get("title"),
|
| 330 |
+
)
|
| 331 |
+
inserted += 1
|
| 332 |
+
except EmployeeExistsError:
|
| 333 |
+
logger.warning(
|
| 334 |
+
"Seed: skipping duplicate %r", emp.get("name"),
|
| 335 |
+
)
|
| 336 |
+
except Exception as e:
|
| 337 |
+
logger.warning(
|
| 338 |
+
"Seed: failed to insert %r: %s", emp.get("name"), e,
|
| 339 |
+
)
|
| 340 |
+
if inserted:
|
| 341 |
+
logger.info("EmployeeStore: seeded %d employee(s)", inserted)
|
| 342 |
+
return inserted
|
src/reachy_mini_receptionist/face_db.py
ADDED
|
@@ -0,0 +1,184 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""File-based face store for OpenCV LBPH recognition.
|
| 2 |
+
|
| 3 |
+
Design decisions:
|
| 4 |
+
- Stores 100×100 grayscale face crops as PNG files named after the guest.
|
| 5 |
+
e.g. guests/Beyonce.png, guests/Elon Musk.png
|
| 6 |
+
- No database required — files are the database. Easy to inspect, edit,
|
| 7 |
+
or delete with any file manager.
|
| 8 |
+
- Max 100 guests. When full, the oldest file (by mtime) is replaced (FIFO).
|
| 9 |
+
- Thread-safe: all writes use a threading.Lock.
|
| 10 |
+
- The guests/ directory lives in the app instance directory so it persists
|
| 11 |
+
across restarts.
|
| 12 |
+
"""
|
| 13 |
+
from __future__ import annotations
|
| 14 |
+
|
| 15 |
+
import logging
|
| 16 |
+
import threading
|
| 17 |
+
from datetime import datetime
|
| 18 |
+
from pathlib import Path
|
| 19 |
+
from typing import List, Optional, Tuple
|
| 20 |
+
|
| 21 |
+
import cv2
|
| 22 |
+
import numpy as np
|
| 23 |
+
|
| 24 |
+
logger = logging.getLogger(__name__)
|
| 25 |
+
|
| 26 |
+
MAX_GUESTS = 100
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def _safe_filename(name: str) -> str:
|
| 30 |
+
"""Sanitise a guest name so it is safe to use as a filename."""
|
| 31 |
+
# Replace characters that are problematic on Windows/Linux/macOS
|
| 32 |
+
for ch in r'\/:*?"<>|':
|
| 33 |
+
name = name.replace(ch, "_")
|
| 34 |
+
return name.strip() or "unknown"
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
class FaceDatabase:
|
| 38 |
+
"""File-based face crop store (PNG per guest).
|
| 39 |
+
|
| 40 |
+
Public API is intentionally identical to the previous SQLite version so
|
| 41 |
+
that all callers (FaceRecognitionWorker, register_guest tool, API
|
| 42 |
+
endpoints) need zero changes.
|
| 43 |
+
"""
|
| 44 |
+
|
| 45 |
+
def __init__(self, db_path: str | Path) -> None:
|
| 46 |
+
# Accept the old ``guests.db`` path and derive the sibling directory
|
| 47 |
+
# from it so the call-site in main.py doesn't need to change.
|
| 48 |
+
db_path = Path(db_path)
|
| 49 |
+
self._guests_dir = db_path.parent / "guests"
|
| 50 |
+
self._guests_dir.mkdir(parents=True, exist_ok=True)
|
| 51 |
+
self._lock = threading.Lock()
|
| 52 |
+
logger.info("FaceDatabase (file-based) initialised at %s", self._guests_dir)
|
| 53 |
+
|
| 54 |
+
# ------------------------------------------------------------------
|
| 55 |
+
# Internal helpers
|
| 56 |
+
# ------------------------------------------------------------------
|
| 57 |
+
|
| 58 |
+
def _path_for(self, name: str) -> Path:
|
| 59 |
+
return self._guests_dir / f"{_safe_filename(name)}.png"
|
| 60 |
+
|
| 61 |
+
def _all_png_files(self) -> List[Path]:
|
| 62 |
+
"""Return all .png files sorted newest-first (by mtime)."""
|
| 63 |
+
files = list(self._guests_dir.glob("*.png"))
|
| 64 |
+
files.sort(key=lambda p: p.stat().st_mtime, reverse=True)
|
| 65 |
+
return files
|
| 66 |
+
|
| 67 |
+
# ------------------------------------------------------------------
|
| 68 |
+
# Write
|
| 69 |
+
# ------------------------------------------------------------------
|
| 70 |
+
|
| 71 |
+
def add_or_update_guest(self, name: str, face_crop: np.ndarray) -> None:
|
| 72 |
+
"""Save a 100×100 grayscale face crop as ``<guests_dir>/<name>.png``.
|
| 73 |
+
|
| 74 |
+
If the directory already has MAX_GUESTS different entries and ``name``
|
| 75 |
+
is new, the oldest file (by mtime) is deleted first (FIFO eviction).
|
| 76 |
+
"""
|
| 77 |
+
target = self._path_for(name)
|
| 78 |
+
|
| 79 |
+
with self._lock:
|
| 80 |
+
if not target.exists():
|
| 81 |
+
files = list(self._guests_dir.glob("*.png"))
|
| 82 |
+
if len(files) >= MAX_GUESTS:
|
| 83 |
+
# Evict the oldest file
|
| 84 |
+
oldest = min(files, key=lambda p: p.stat().st_mtime)
|
| 85 |
+
oldest.unlink()
|
| 86 |
+
logger.info("Evicted oldest guest file: %s (capacity=%d)", oldest.name, MAX_GUESTS)
|
| 87 |
+
|
| 88 |
+
ok = cv2.imwrite(str(target), face_crop)
|
| 89 |
+
if ok:
|
| 90 |
+
logger.info("Saved guest '%s' → %s", name, target)
|
| 91 |
+
else:
|
| 92 |
+
raise RuntimeError(f"cv2.imwrite failed for path: {target}")
|
| 93 |
+
|
| 94 |
+
# ------------------------------------------------------------------
|
| 95 |
+
# Read
|
| 96 |
+
# ------------------------------------------------------------------
|
| 97 |
+
|
| 98 |
+
def get_all_guests(self) -> List[dict]:
|
| 99 |
+
"""Return all guests as dicts with keys: name, timestamp, thumbnail_url."""
|
| 100 |
+
with self._lock:
|
| 101 |
+
files = self._all_png_files()
|
| 102 |
+
result = []
|
| 103 |
+
for f in files:
|
| 104 |
+
mtime = f.stat().st_mtime
|
| 105 |
+
ts = datetime.fromtimestamp(mtime).strftime("%Y-%m-%d %H:%M:%S")
|
| 106 |
+
guest_name = f.stem # filename without .png extension
|
| 107 |
+
result.append({
|
| 108 |
+
"name": guest_name,
|
| 109 |
+
"timestamp": ts,
|
| 110 |
+
"thumbnail_url": f"/guest_images/{f.name}",
|
| 111 |
+
})
|
| 112 |
+
return result
|
| 113 |
+
|
| 114 |
+
def get_all_guests_with_crops(self) -> List[Tuple[str, np.ndarray]]:
|
| 115 |
+
"""Return list of (name, face_crop) for LBPH recognizer training."""
|
| 116 |
+
with self._lock:
|
| 117 |
+
files = self._all_png_files()
|
| 118 |
+
result = []
|
| 119 |
+
for f in files:
|
| 120 |
+
crop = cv2.imread(str(f), cv2.IMREAD_GRAYSCALE)
|
| 121 |
+
if crop is not None:
|
| 122 |
+
result.append((f.stem, crop))
|
| 123 |
+
else:
|
| 124 |
+
logger.warning("Could not read face crop from %s — skipping", f)
|
| 125 |
+
return result
|
| 126 |
+
|
| 127 |
+
def count(self) -> int:
|
| 128 |
+
with self._lock:
|
| 129 |
+
return len(list(self._guests_dir.glob("*.png")))
|
| 130 |
+
|
| 131 |
+
def clear(self) -> None:
|
| 132 |
+
"""Wipe all guest images (useful for demo reset)."""
|
| 133 |
+
with self._lock:
|
| 134 |
+
for f in self._guests_dir.glob("*.png"):
|
| 135 |
+
f.unlink()
|
| 136 |
+
logger.info("FaceDatabase cleared")
|
| 137 |
+
|
| 138 |
+
def cleanup_older_than(self, max_age_days: float) -> int:
|
| 139 |
+
"""Delete guest PNGs whose mtime is older than ``max_age_days``.
|
| 140 |
+
|
| 141 |
+
Returns the number of files removed. Pass 0 or negative to disable
|
| 142 |
+
cleanup (returns immediately). Failures to remove individual files
|
| 143 |
+
are logged but do not raise — TTL is best-effort.
|
| 144 |
+
"""
|
| 145 |
+
import time as _t
|
| 146 |
+
|
| 147 |
+
if max_age_days <= 0:
|
| 148 |
+
return 0
|
| 149 |
+
cutoff = _t.time() - (max_age_days * 86400.0)
|
| 150 |
+
removed = 0
|
| 151 |
+
with self._lock:
|
| 152 |
+
for f in list(self._guests_dir.glob("*.png")):
|
| 153 |
+
try:
|
| 154 |
+
if f.stat().st_mtime < cutoff:
|
| 155 |
+
f.unlink()
|
| 156 |
+
removed += 1
|
| 157 |
+
logger.info(
|
| 158 |
+
"Face TTL: removed %s (older than %.1f days)",
|
| 159 |
+
f.name, max_age_days,
|
| 160 |
+
)
|
| 161 |
+
except Exception as e:
|
| 162 |
+
logger.warning("Face TTL: could not remove %s: %s", f, e)
|
| 163 |
+
return removed
|
| 164 |
+
|
| 165 |
+
def delete_guest(self, name: str) -> bool:
|
| 166 |
+
"""Delete one guest PNG by name.
|
| 167 |
+
|
| 168 |
+
Returns True if the file existed and was removed, False otherwise.
|
| 169 |
+
"""
|
| 170 |
+
target = self._path_for(name)
|
| 171 |
+
with self._lock:
|
| 172 |
+
if not target.exists():
|
| 173 |
+
return False
|
| 174 |
+
target.unlink()
|
| 175 |
+
logger.info("Deleted guest '%s' -> %s", name, target)
|
| 176 |
+
return True
|
| 177 |
+
|
| 178 |
+
# ------------------------------------------------------------------
|
| 179 |
+
# Expose the guests directory path (needed by main.py to mount statics)
|
| 180 |
+
# ------------------------------------------------------------------
|
| 181 |
+
|
| 182 |
+
@property
|
| 183 |
+
def guests_dir(self) -> Path:
|
| 184 |
+
return self._guests_dir
|
src/reachy_mini_receptionist/face_recognition_worker.py
ADDED
|
@@ -0,0 +1,698 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Background face recognition worker — OpenCV YuNet detection + LBPH recognition.
|
| 2 |
+
|
| 3 |
+
Design decisions:
|
| 4 |
+
- Uses cv2.FaceDetectorYN (YuNet) for face detection: returns a real confidence
|
| 5 |
+
score (0–1) per bounding box. Model (~400 KB ONNX) is downloaded to
|
| 6 |
+
~/.cache/reachy_mini/ on first use.
|
| 7 |
+
- Uses OpenCV LBPH (Local Binary Pattern Histogram) recognizer for identification.
|
| 8 |
+
Same algorithm used in embedded/microcontroller face recognition.
|
| 9 |
+
- Runs in a daemon thread so it never blocks the audio/LLM loop.
|
| 10 |
+
- Processes every Nth frame to keep CPU usage low.
|
| 11 |
+
- Annotates frames with bounding boxes + labels for the MJPEG dashboard stream.
|
| 12 |
+
- All shared state is protected by threading.Lock so tools can read safely.
|
| 13 |
+
- Frames are pulled from CameraWorker (robot camera via SDK) rather than opening
|
| 14 |
+
a raw cv2.VideoCapture, so the correct camera is always used on Lite setups.
|
| 15 |
+
|
| 16 |
+
Detection quality metrics:
|
| 17 |
+
- Detection confidence (YuNet score 0–1): higher = more certain this is a face.
|
| 18 |
+
Used as a multiplier in the quality score so uncertain detections rank lower.
|
| 19 |
+
- Blur score (Laplacian variance): cv2.Laplacian(crop, cv2.CV_64F).var()
|
| 20 |
+
High value = sharp / lots of edge detail → good crop.
|
| 21 |
+
Low value = blurry / uniform → bad crop (head mid-motion, etc.).
|
| 22 |
+
Crops below _MIN_BLUR_SCORE are considered low-quality, but they are still
|
| 23 |
+
stored so dashboard previews and fallback checks can return "best available"
|
| 24 |
+
evidence instead of empty results.
|
| 25 |
+
- Face area (w×h pixels) is kept as a secondary tiebreaker: among comparably sharp
|
| 26 |
+
crops the closer/larger face is still preferred.
|
| 27 |
+
- Combined quality score: blur_score × log(face_area) × max(det_confidence, 0.01)
|
| 28 |
+
|
| 29 |
+
Requirements: opencv-contrib-python (pip install opencv-contrib-python)
|
| 30 |
+
The contrib package is a superset of opencv-python — don't install both.
|
| 31 |
+
"""
|
| 32 |
+
from __future__ import annotations
|
| 33 |
+
|
| 34 |
+
import logging
|
| 35 |
+
import math
|
| 36 |
+
import pathlib
|
| 37 |
+
import threading
|
| 38 |
+
import time
|
| 39 |
+
import urllib.request
|
| 40 |
+
from collections import deque
|
| 41 |
+
from typing import Any, Callable, Optional
|
| 42 |
+
|
| 43 |
+
import cv2
|
| 44 |
+
import numpy as np
|
| 45 |
+
|
| 46 |
+
logger = logging.getLogger(__name__)
|
| 47 |
+
|
| 48 |
+
# How many frames to skip between recognition passes
|
| 49 |
+
_PROCESS_EVERY_N_FRAMES = 10
|
| 50 |
+
# LBPH confidence threshold: LOWER = stricter match (distance, not similarity).
|
| 51 |
+
#
|
| 52 |
+
# Calibration notes (single 100×100 grayscale crop per guest):
|
| 53 |
+
# ≤ 50 almost certainly the same person (well-lit, same angle)
|
| 54 |
+
# 50 - 75 plausible match (different angle/lighting OK)
|
| 55 |
+
# 75 - 100 weak — often false positives on similar-looking people
|
| 56 |
+
# 100 - 160 printed photos, strangers who happen to resemble a guest
|
| 57 |
+
# > 160 unrelated face / no match
|
| 58 |
+
#
|
| 59 |
+
# Default was 110 which silently mis-recognised strangers as registered
|
| 60 |
+
# guests (we saw a stranger scored 103 and got greeted as "Jon"). 75 is
|
| 61 |
+
# the right starting point: prefers "I don't recognise you, please tell
|
| 62 |
+
# me your name" over a wrong-greeting failure. Operators can tune via
|
| 63 |
+
# the FACE_LBPH_THRESHOLD env var if their lighting/angles demand it.
|
| 64 |
+
_DEFAULT_CONFIDENCE_THRESHOLD = 75.0
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def _resolve_threshold() -> float:
|
| 68 |
+
"""Read FACE_LBPH_THRESHOLD env var, falling back to the default."""
|
| 69 |
+
import os
|
| 70 |
+
try:
|
| 71 |
+
raw = os.getenv("FACE_LBPH_THRESHOLD")
|
| 72 |
+
if raw is None or not str(raw).strip():
|
| 73 |
+
return _DEFAULT_CONFIDENCE_THRESHOLD
|
| 74 |
+
val = float(raw)
|
| 75 |
+
if val <= 0:
|
| 76 |
+
return _DEFAULT_CONFIDENCE_THRESHOLD
|
| 77 |
+
return val
|
| 78 |
+
except Exception:
|
| 79 |
+
return _DEFAULT_CONFIDENCE_THRESHOLD
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
_CONFIDENCE_THRESHOLD = _resolve_threshold()
|
| 83 |
+
# Minimum Laplacian variance for a crop to be accepted into the detection window.
|
| 84 |
+
# Laplacian variance measures image sharpness: high = sharp, low = blurry.
|
| 85 |
+
# Typical values: sharp well-lit face 150–400, soft/distant face 60–120,
|
| 86 |
+
# head mid-motion 5–40, total blur < 5.
|
| 87 |
+
# Crops below this threshold are considered low quality and ranked lower.
|
| 88 |
+
_MIN_BLUR_SCORE = 80.0 # lower to ~50 if valid faces get rejected; raise to ~120 if blur sneaks through
|
| 89 |
+
|
| 90 |
+
# Central detection zone: fractions of frame dimensions.
|
| 91 |
+
# Faces whose centre falls outside this zone are ignored.
|
| 92 |
+
# 0.25 margin → active zone = middle 50% horizontally and 80% vertically.
|
| 93 |
+
_ZONE_X_MARGIN = 0.25 # 25% margin on each side → 50% wide centre zone
|
| 94 |
+
_ZONE_Y_MARGIN = 0.10 # 10% margin on top/bottom → 80% tall centre zone
|
| 95 |
+
|
| 96 |
+
# Rolling window duration (seconds) for selecting the best recent face crop.
|
| 97 |
+
_BEST_FACE_WINDOW_SECONDS = 5.0
|
| 98 |
+
# Minimum spread (seconds) between first and last detection in the window.
|
| 99 |
+
# A face must have been continuously present for this long to be returned by
|
| 100 |
+
# best_recent_face(). Prevents a briefly passing face from overriding the
|
| 101 |
+
# person who has been standing in front of the robot for several seconds.
|
| 102 |
+
_MIN_DWELL_SECONDS = 1.5
|
| 103 |
+
|
| 104 |
+
# Stable identity transition settings for external face context events.
|
| 105 |
+
# Observed identity must remain unchanged for _FACE_STATE_CONFIRM_SECONDS before
|
| 106 |
+
# becoming the stable state, except no-face which uses a longer grace period.
|
| 107 |
+
_FACE_STATE_CONFIRM_SECONDS = 1.2
|
| 108 |
+
_NO_FACE_CONFIRM_SECONDS = 2.5
|
| 109 |
+
# Slightly longer dwell for multi-person before promoting — people walking
|
| 110 |
+
# past should not trip MULTIPLE_PEOPLE.
|
| 111 |
+
_MULTIPLE_PEOPLE_CONFIRM_SECONDS = 1.5
|
| 112 |
+
# Minimum interval between emitted external face events.
|
| 113 |
+
_FACE_EVENT_COOLDOWN_SECONDS = 5.0
|
| 114 |
+
# Minimum number of in-zone faces to be considered "multiple".
|
| 115 |
+
# Set absurdly high to effectively DISABLE MULTIPLE_PEOPLE state — in the
|
| 116 |
+
# pilot lobby, background people / posters were tripping the state too
|
| 117 |
+
# easily and the bot would go silent mid-conversation. The state has no
|
| 118 |
+
# SPEAK_NOW cue, so once triggered the visitor's speech is ignored until
|
| 119 |
+
# the camera clears. For a single-receptionist deployment the largest-
|
| 120 |
+
# face heuristic the worker already uses is enough to pick the visitor.
|
| 121 |
+
_MULTIPLE_PEOPLE_THRESHOLD = 999
|
| 122 |
+
|
| 123 |
+
# YuNet (cv2.FaceDetectorYN) detection settings
|
| 124 |
+
_YUNET_SCORE_THRESHOLD = 0.6 # minimum per-face detection confidence (0–1)
|
| 125 |
+
_YUNET_NMS_THRESHOLD = 0.3 # non-max suppression overlap threshold
|
| 126 |
+
_YUNET_TOP_K = 5000 # max candidate detections before NMS
|
| 127 |
+
_YUNET_MODEL_URL = (
|
| 128 |
+
"https://github.com/opencv/opencv_zoo/raw/main/models/face_detection_yunet/"
|
| 129 |
+
"face_detection_yunet_2023mar.onnx"
|
| 130 |
+
)
|
| 131 |
+
_YUNET_CACHE_PATH = pathlib.Path.home() / ".cache" / "reachy_mini" / "face_detection_yunet_2023mar.onnx"
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
def _ensure_yunet_model() -> pathlib.Path:
|
| 135 |
+
"""Download the YuNet ONNX model (~400 KB) to cache if not already present."""
|
| 136 |
+
if not _YUNET_CACHE_PATH.exists():
|
| 137 |
+
_YUNET_CACHE_PATH.parent.mkdir(parents=True, exist_ok=True)
|
| 138 |
+
logger.info("Downloading YuNet model → %s", _YUNET_CACHE_PATH)
|
| 139 |
+
urllib.request.urlretrieve(_YUNET_MODEL_URL, _YUNET_CACHE_PATH)
|
| 140 |
+
logger.info("YuNet model downloaded.")
|
| 141 |
+
return _YUNET_CACHE_PATH
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
# Check LBPH recognizer availability (needs opencv-contrib-python)
|
| 145 |
+
_LBPH_AVAILABLE = hasattr(cv2, "face") and hasattr(cv2.face, "LBPHFaceRecognizer_create")
|
| 146 |
+
if not _LBPH_AVAILABLE:
|
| 147 |
+
logger.warning(
|
| 148 |
+
"cv2.face.LBPHFaceRecognizer_create not found. "
|
| 149 |
+
"Install opencv-contrib-python: pip install opencv-contrib-python\n"
|
| 150 |
+
"Face recognition (identification) will be disabled; detection still works."
|
| 151 |
+
)
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
def _build_lbph_recognizer(label_crops: list[tuple[int, np.ndarray]]) -> "cv2.face.LBPHFaceRecognizer | None":
|
| 155 |
+
"""Train and return an LBPH recognizer from a list of (label_int, gray_crop) pairs."""
|
| 156 |
+
if not _LBPH_AVAILABLE or not label_crops:
|
| 157 |
+
return None
|
| 158 |
+
recognizer = cv2.face.LBPHFaceRecognizer_create()
|
| 159 |
+
labels = np.array([lc[0] for lc in label_crops], dtype=np.int32)
|
| 160 |
+
crops = [lc[1] for lc in label_crops]
|
| 161 |
+
recognizer.train(crops, labels)
|
| 162 |
+
return recognizer
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
class FaceRecognitionWorker:
|
| 166 |
+
"""Background thread that continuously detects and recognises faces.
|
| 167 |
+
|
| 168 |
+
Public API (thread-safe):
|
| 169 |
+
worker.current_name → str "Unknown" or registered name
|
| 170 |
+
worker.current_encoding → Optional[np.ndarray] raw face crop (grayscale)
|
| 171 |
+
worker.latest_annotated_jpeg → Optional[bytes] MJPEG frame
|
| 172 |
+
worker.confidence → float 0–100 LBPH confidence (lower = better)
|
| 173 |
+
worker.best_recent_face() → (name, lbph_conf, crop) from 5-second window
|
| 174 |
+
"""
|
| 175 |
+
|
| 176 |
+
def __init__(
|
| 177 |
+
self,
|
| 178 |
+
face_db, # FaceDatabase instance
|
| 179 |
+
camera_worker=None, # CameraWorker instance (pulls robot frames via SDK)
|
| 180 |
+
process_every_n: int = _PROCESS_EVERY_N_FRAMES,
|
| 181 |
+
confidence_threshold: float = _CONFIDENCE_THRESHOLD,
|
| 182 |
+
) -> None:
|
| 183 |
+
self._face_db = face_db
|
| 184 |
+
self._camera_worker = camera_worker
|
| 185 |
+
self._process_every_n = process_every_n
|
| 186 |
+
self._confidence_threshold = confidence_threshold
|
| 187 |
+
|
| 188 |
+
# Shared state (read by tools and dashboard)
|
| 189 |
+
self._lock = threading.Lock()
|
| 190 |
+
self._current_name: str = "Unknown"
|
| 191 |
+
self._current_encoding: Optional[np.ndarray] = None # grayscale face crop
|
| 192 |
+
self._latest_annotated_jpeg: Optional[bytes] = None
|
| 193 |
+
self._confidence: float = 0.0
|
| 194 |
+
|
| 195 |
+
# Stable face state for context event emission.
|
| 196 |
+
# state in {"no_face", "unknown", "known"}
|
| 197 |
+
now = time.monotonic()
|
| 198 |
+
self._stable_state: str = "no_face"
|
| 199 |
+
self._stable_name: str = "Unknown"
|
| 200 |
+
self._stable_since: float = now
|
| 201 |
+
self._candidate_state: str = "no_face"
|
| 202 |
+
self._candidate_name: str = "Unknown"
|
| 203 |
+
self._candidate_since: float = now
|
| 204 |
+
self._last_event_sent_at: float = 0.0
|
| 205 |
+
self._face_event_callback: Optional[Callable[[dict[str, Any]], None]] = None
|
| 206 |
+
|
| 207 |
+
# Rolling 5-second detection window: each entry is
|
| 208 |
+
# (timestamp: float, face_area: int, blur_score: float, crop: np.ndarray, det_confidence: float)
|
| 209 |
+
# face_area = w * h in pixels — larger = more prominent / closer face
|
| 210 |
+
# blur_score = Laplacian variance — higher = sharper crop
|
| 211 |
+
# det_confidence = YuNet score 0–1 — higher = more confident detection
|
| 212 |
+
# Entries with blur_score < _MIN_BLUR_SCORE are still added, but rank lower.
|
| 213 |
+
self._detection_window: deque = deque()
|
| 214 |
+
|
| 215 |
+
# Log buffer for dashboard debug panel (ring buffer, max 200 lines)
|
| 216 |
+
self._log_buffer: list[str] = []
|
| 217 |
+
self._log_lock = threading.Lock()
|
| 218 |
+
|
| 219 |
+
# LBPH recognizer — rebuilt whenever guests are added/changed
|
| 220 |
+
self._recognizer_lock = threading.Lock()
|
| 221 |
+
self._recognizer: Optional[object] = None # cv2.face.LBPHFaceRecognizer
|
| 222 |
+
self._label_map: dict[int, str] = {} # int label → guest name
|
| 223 |
+
|
| 224 |
+
self._stop_event = threading.Event()
|
| 225 |
+
self._thread: Optional[threading.Thread] = None
|
| 226 |
+
|
| 227 |
+
# YuNet face detector — initialized in start()
|
| 228 |
+
self._detector: Optional[object] = None # cv2.FaceDetectorYN
|
| 229 |
+
self._detector_input_size: tuple = (0, 0)
|
| 230 |
+
|
| 231 |
+
# ------------------------------------------------------------------
|
| 232 |
+
# Public read properties (thread-safe)
|
| 233 |
+
# ------------------------------------------------------------------
|
| 234 |
+
|
| 235 |
+
@property
|
| 236 |
+
def current_name(self) -> str:
|
| 237 |
+
with self._lock:
|
| 238 |
+
return self._current_name
|
| 239 |
+
|
| 240 |
+
@property
|
| 241 |
+
def current_encoding(self) -> Optional[np.ndarray]:
|
| 242 |
+
"""Returns the latest detected face crop (grayscale numpy array) or None."""
|
| 243 |
+
with self._lock:
|
| 244 |
+
return self._current_encoding.copy() if self._current_encoding is not None else None
|
| 245 |
+
|
| 246 |
+
@property
|
| 247 |
+
def latest_annotated_jpeg(self) -> Optional[bytes]:
|
| 248 |
+
with self._lock:
|
| 249 |
+
return self._latest_annotated_jpeg
|
| 250 |
+
|
| 251 |
+
@property
|
| 252 |
+
def confidence(self) -> float:
|
| 253 |
+
with self._lock:
|
| 254 |
+
return self._confidence
|
| 255 |
+
|
| 256 |
+
def get_recent_logs(self, n: int = 50) -> list[str]:
|
| 257 |
+
with self._log_lock:
|
| 258 |
+
return list(self._log_buffer[-n:])
|
| 259 |
+
|
| 260 |
+
def set_face_event_callback(self, callback: Optional[Callable[[dict[str, Any]], None]]) -> None:
|
| 261 |
+
"""Register a callback for stable face-state transition events."""
|
| 262 |
+
with self._lock:
|
| 263 |
+
self._face_event_callback = callback
|
| 264 |
+
|
| 265 |
+
def best_recent_face(
|
| 266 |
+
self,
|
| 267 |
+
window_seconds: float = _BEST_FACE_WINDOW_SECONDS,
|
| 268 |
+
require_dwell: bool = True,
|
| 269 |
+
) -> tuple[str, float, Optional[np.ndarray]]:
|
| 270 |
+
"""Return the best face seen in the last `window_seconds` seconds.
|
| 271 |
+
|
| 272 |
+
Selection strategy:
|
| 273 |
+
- Among all entries in the rolling detection window (entries with
|
| 274 |
+
blur_score < _MIN_BLUR_SCORE are never stored), pick the one with the
|
| 275 |
+
highest combined quality score: blur_score * log(face_area)
|
| 276 |
+
This ranks sharpness first while using face area as a tiebreaker so
|
| 277 |
+
a large sharp face beats both a tiny sharp face and a large blurry one.
|
| 278 |
+
- Run LBPH recognition on that crop and return the result.
|
| 279 |
+
- If the window is empty (all recent crops were too blurry, or no face
|
| 280 |
+
seen) → return ("Unknown", 0.0, None).
|
| 281 |
+
|
| 282 |
+
This is intentionally called at tool-invocation time (not in the
|
| 283 |
+
background loop) so the LLM always queries the best available evidence
|
| 284 |
+
rather than a momentary snapshot.
|
| 285 |
+
|
| 286 |
+
Returns:
|
| 287 |
+
(name, lbph_confidence, crop)
|
| 288 |
+
name — registered guest name or "Unknown"
|
| 289 |
+
lbph_confidence — LBPH distance (lower = more certain match; 0 if no guests)
|
| 290 |
+
crop — 100×100 grayscale numpy array, or None if window is empty
|
| 291 |
+
"""
|
| 292 |
+
now = time.monotonic()
|
| 293 |
+
with self._lock:
|
| 294 |
+
# Prune stale entries
|
| 295 |
+
while self._detection_window and (now - self._detection_window[0][0]) > window_seconds:
|
| 296 |
+
self._detection_window.popleft()
|
| 297 |
+
|
| 298 |
+
if not self._detection_window:
|
| 299 |
+
return "Unknown", 0.0, None
|
| 300 |
+
|
| 301 |
+
# Optional dwell guard: for tool-calls we prefer stable evidence;
|
| 302 |
+
# for dashboard preview we may choose best available immediately.
|
| 303 |
+
if require_dwell:
|
| 304 |
+
timestamps = [e[0] for e in self._detection_window]
|
| 305 |
+
if max(timestamps) - min(timestamps) < _MIN_DWELL_SECONDS:
|
| 306 |
+
return "Unknown", 0.0, None
|
| 307 |
+
|
| 308 |
+
# Pick the entry with the best combined quality: sharpness × log(area)
|
| 309 |
+
best = max(self._detection_window, key=lambda e: e[2] * math.log(max(e[1], 1)) * max(e[4], 0.01))
|
| 310 |
+
_ts, face_area, blur_score, crop, det_conf = best
|
| 311 |
+
crop_copy = crop.copy()
|
| 312 |
+
|
| 313 |
+
# Run LBPH recognition outside the lock (can be slow)
|
| 314 |
+
name, conf = self._recognize(crop_copy)
|
| 315 |
+
logger.debug(
|
| 316 |
+
"best_recent_face: face_area=%d blur=%.1f det_conf=%.2f name=%s lbph_conf=%.1f",
|
| 317 |
+
face_area, blur_score, det_conf, name, conf,
|
| 318 |
+
)
|
| 319 |
+
self._add_log(f"check_current_face → {name} (area={face_area}px², blur={blur_score:.1f}, det={det_conf:.2f}, lbph={conf:.1f})")
|
| 320 |
+
return name, conf, crop_copy
|
| 321 |
+
|
| 322 |
+
# ------------------------------------------------------------------
|
| 323 |
+
# Recognizer rebuild (called by register_guest tool after adding a guest)
|
| 324 |
+
# ------------------------------------------------------------------
|
| 325 |
+
|
| 326 |
+
def rebuild_recognizer(self) -> None:
|
| 327 |
+
"""Rebuild the LBPH recognizer from the current guest database.
|
| 328 |
+
|
| 329 |
+
Call this after registering or updating a guest so the worker
|
| 330 |
+
immediately starts recognising the new face.
|
| 331 |
+
"""
|
| 332 |
+
if not _LBPH_AVAILABLE:
|
| 333 |
+
return
|
| 334 |
+
guests = self._face_db.get_all_guests_with_crops()
|
| 335 |
+
if not guests:
|
| 336 |
+
with self._recognizer_lock:
|
| 337 |
+
self._recognizer = None
|
| 338 |
+
self._label_map = {}
|
| 339 |
+
self._add_log("Recognizer cleared (no guests)")
|
| 340 |
+
return
|
| 341 |
+
|
| 342 |
+
label_map: dict[int, str] = {}
|
| 343 |
+
label_crops: list[tuple[int, np.ndarray]] = []
|
| 344 |
+
for idx, (name, crop) in enumerate(guests):
|
| 345 |
+
label_map[idx] = name
|
| 346 |
+
label_crops.append((idx, crop))
|
| 347 |
+
|
| 348 |
+
recognizer = _build_lbph_recognizer(label_crops)
|
| 349 |
+
with self._recognizer_lock:
|
| 350 |
+
self._recognizer = recognizer
|
| 351 |
+
self._label_map = label_map
|
| 352 |
+
self._add_log(f"Recognizer rebuilt ({len(label_map)} guest(s))")
|
| 353 |
+
logger.info("LBPH recognizer rebuilt with %d guest(s)", len(label_map))
|
| 354 |
+
|
| 355 |
+
# ------------------------------------------------------------------
|
| 356 |
+
# Lifecycle
|
| 357 |
+
# ------------------------------------------------------------------
|
| 358 |
+
|
| 359 |
+
def start(self) -> None:
|
| 360 |
+
# Initialize YuNet detector (downloads model on first run)
|
| 361 |
+
try:
|
| 362 |
+
model_path = _ensure_yunet_model()
|
| 363 |
+
self._detector = cv2.FaceDetectorYN.create(
|
| 364 |
+
model=str(model_path),
|
| 365 |
+
config="",
|
| 366 |
+
input_size=(640, 480),
|
| 367 |
+
score_threshold=_YUNET_SCORE_THRESHOLD,
|
| 368 |
+
nms_threshold=_YUNET_NMS_THRESHOLD,
|
| 369 |
+
top_k=_YUNET_TOP_K,
|
| 370 |
+
)
|
| 371 |
+
self._detector_input_size = (640, 480)
|
| 372 |
+
logger.info("YuNet face detector initialized")
|
| 373 |
+
self._add_log("YuNet face detector initialized ✓")
|
| 374 |
+
except Exception as e:
|
| 375 |
+
logger.error("Failed to initialize YuNet detector: %s", e)
|
| 376 |
+
self._add_log(f"WARNING: YuNet detector failed to initialize: {e}")
|
| 377 |
+
|
| 378 |
+
# Build recognizer from existing DB on startup
|
| 379 |
+
self.rebuild_recognizer()
|
| 380 |
+
self._stop_event.clear()
|
| 381 |
+
self._thread = threading.Thread(target=self._run, daemon=True, name="face-recognition-worker")
|
| 382 |
+
self._thread.start()
|
| 383 |
+
logger.info("FaceRecognitionWorker started (camera_worker=%s)", self._camera_worker)
|
| 384 |
+
|
| 385 |
+
def stop(self) -> None:
|
| 386 |
+
self._stop_event.set()
|
| 387 |
+
if self._thread:
|
| 388 |
+
self._thread.join(timeout=3.0)
|
| 389 |
+
logger.info("FaceRecognitionWorker stopped")
|
| 390 |
+
|
| 391 |
+
# ------------------------------------------------------------------
|
| 392 |
+
# Internal helpers
|
| 393 |
+
# ------------------------------------------------------------------
|
| 394 |
+
|
| 395 |
+
def _add_log(self, msg: str) -> None:
|
| 396 |
+
ts = time.strftime("%H:%M:%S")
|
| 397 |
+
entry = f"[{ts}] {msg}"
|
| 398 |
+
with self._log_lock:
|
| 399 |
+
self._log_buffer.append(entry)
|
| 400 |
+
if len(self._log_buffer) > 200:
|
| 401 |
+
self._log_buffer = self._log_buffer[-200:]
|
| 402 |
+
|
| 403 |
+
def _emit_face_state_event(self, event: dict[str, Any]) -> None:
|
| 404 |
+
"""Best-effort callback dispatch for external face context events."""
|
| 405 |
+
callback = None
|
| 406 |
+
with self._lock:
|
| 407 |
+
callback = self._face_event_callback
|
| 408 |
+
if callback is None:
|
| 409 |
+
return
|
| 410 |
+
try:
|
| 411 |
+
callback(event)
|
| 412 |
+
except Exception as e:
|
| 413 |
+
logger.warning("Failed to dispatch face state event: %s", e)
|
| 414 |
+
|
| 415 |
+
def _update_stable_state(
|
| 416 |
+
self,
|
| 417 |
+
observed_state: str,
|
| 418 |
+
observed_name: str,
|
| 419 |
+
lbph_confidence: float,
|
| 420 |
+
det_confidence: float,
|
| 421 |
+
) -> None:
|
| 422 |
+
"""Promote observed state into stable state with dwell and cooldown guards."""
|
| 423 |
+
now = time.monotonic()
|
| 424 |
+
|
| 425 |
+
with self._lock:
|
| 426 |
+
# Stage candidate transitions first.
|
| 427 |
+
candidate_changed = (
|
| 428 |
+
observed_state != self._candidate_state
|
| 429 |
+
or (observed_state == "known" and observed_name != self._candidate_name)
|
| 430 |
+
)
|
| 431 |
+
if candidate_changed:
|
| 432 |
+
self._candidate_state = observed_state
|
| 433 |
+
self._candidate_name = observed_name
|
| 434 |
+
self._candidate_since = now
|
| 435 |
+
return
|
| 436 |
+
|
| 437 |
+
if observed_state == "no_face":
|
| 438 |
+
required = _NO_FACE_CONFIRM_SECONDS
|
| 439 |
+
elif observed_state == "multiple":
|
| 440 |
+
required = _MULTIPLE_PEOPLE_CONFIRM_SECONDS
|
| 441 |
+
else:
|
| 442 |
+
required = _FACE_STATE_CONFIRM_SECONDS
|
| 443 |
+
if (now - self._candidate_since) < required:
|
| 444 |
+
return
|
| 445 |
+
|
| 446 |
+
previous_state = self._stable_state
|
| 447 |
+
previous_name = self._stable_name
|
| 448 |
+
stable_changed = (
|
| 449 |
+
observed_state != previous_state
|
| 450 |
+
or (observed_state == "known" and observed_name != previous_name)
|
| 451 |
+
)
|
| 452 |
+
if not stable_changed:
|
| 453 |
+
return
|
| 454 |
+
|
| 455 |
+
self._stable_state = observed_state
|
| 456 |
+
self._stable_name = observed_name
|
| 457 |
+
self._stable_since = now
|
| 458 |
+
|
| 459 |
+
# Public current_* values follow the stable state, not instantaneous observations.
|
| 460 |
+
self._current_name = observed_name if observed_state == "known" else "Unknown"
|
| 461 |
+
self._confidence = 0.0 if observed_state == "no_face" else float(lbph_confidence)
|
| 462 |
+
|
| 463 |
+
can_emit_event = (now - self._last_event_sent_at) >= _FACE_EVENT_COOLDOWN_SECONDS
|
| 464 |
+
if can_emit_event:
|
| 465 |
+
self._last_event_sent_at = now
|
| 466 |
+
|
| 467 |
+
self._add_log(
|
| 468 |
+
"Stable face: %s(%s) -> %s(%s)"
|
| 469 |
+
% (previous_state, previous_name, observed_state, observed_name)
|
| 470 |
+
)
|
| 471 |
+
|
| 472 |
+
if not can_emit_event:
|
| 473 |
+
self._add_log("Face context event skipped (cooldown)")
|
| 474 |
+
return
|
| 475 |
+
|
| 476 |
+
event_payload = {
|
| 477 |
+
"event": "face_state_changed",
|
| 478 |
+
"state": observed_state,
|
| 479 |
+
"name": observed_name if observed_state == "known" else None,
|
| 480 |
+
"previous_state": previous_state,
|
| 481 |
+
"previous_name": previous_name if previous_state == "known" else None,
|
| 482 |
+
"lbph_confidence": round(float(lbph_confidence), 2),
|
| 483 |
+
"detection_confidence": round(float(det_confidence), 3),
|
| 484 |
+
"timestamp": time.time(),
|
| 485 |
+
}
|
| 486 |
+
self._emit_face_state_event(event_payload)
|
| 487 |
+
|
| 488 |
+
def _detect_faces(self, frame: np.ndarray) -> list[tuple[int, int, int, int, int, float]]:
|
| 489 |
+
"""Return list of (x, y, w, h, area, det_confidence) sorted by area descending.
|
| 490 |
+
|
| 491 |
+
Uses YuNet (cv2.FaceDetectorYN) which returns a real confidence score (0–1)
|
| 492 |
+
per bounding box. The detector input size is updated dynamically per frame.
|
| 493 |
+
"""
|
| 494 |
+
if self._detector is None:
|
| 495 |
+
return []
|
| 496 |
+
|
| 497 |
+
h, w = frame.shape[:2]
|
| 498 |
+
if (w, h) != self._detector_input_size:
|
| 499 |
+
self._detector.setInputSize((w, h))
|
| 500 |
+
self._detector_input_size = (w, h)
|
| 501 |
+
|
| 502 |
+
_, faces = self._detector.detect(frame)
|
| 503 |
+
if faces is None or len(faces) == 0:
|
| 504 |
+
return []
|
| 505 |
+
|
| 506 |
+
results = [
|
| 507 |
+
(int(f[0]), int(f[1]), int(f[2]), int(f[3]), int(f[2] * f[3]), float(f[14]))
|
| 508 |
+
for f in faces
|
| 509 |
+
]
|
| 510 |
+
results.sort(key=lambda r: r[4], reverse=True)
|
| 511 |
+
return results
|
| 512 |
+
|
| 513 |
+
def _recognize(self, gray_crop: np.ndarray) -> tuple[str, float]:
|
| 514 |
+
"""Return (name, confidence) for a face crop. confidence is LBPH distance (lower = better match)."""
|
| 515 |
+
with self._recognizer_lock:
|
| 516 |
+
recognizer = self._recognizer
|
| 517 |
+
label_map = self._label_map
|
| 518 |
+
|
| 519 |
+
if recognizer is None or not label_map:
|
| 520 |
+
return "Unknown", 0.0
|
| 521 |
+
|
| 522 |
+
try:
|
| 523 |
+
resized = cv2.resize(gray_crop, (100, 100))
|
| 524 |
+
label_int, confidence = recognizer.predict(resized)
|
| 525 |
+
name = label_map.get(label_int, "Unknown")
|
| 526 |
+
if confidence <= self._confidence_threshold:
|
| 527 |
+
return name, float(confidence)
|
| 528 |
+
else:
|
| 529 |
+
return "Unknown", float(confidence)
|
| 530 |
+
except Exception as e:
|
| 531 |
+
logger.debug("LBPH predict error: %s", e)
|
| 532 |
+
return "Unknown", 0.0
|
| 533 |
+
|
| 534 |
+
# ------------------------------------------------------------------
|
| 535 |
+
# Main loop
|
| 536 |
+
# ------------------------------------------------------------------
|
| 537 |
+
|
| 538 |
+
def _run(self) -> None:
|
| 539 |
+
if self._camera_worker is None:
|
| 540 |
+
logger.warning("FaceRecognitionWorker: no camera_worker provided — face recognition disabled")
|
| 541 |
+
self._add_log("WARNING: No camera_worker — face recognition disabled")
|
| 542 |
+
return
|
| 543 |
+
|
| 544 |
+
self._add_log("Camera worker attached ✓ (OpenCV Haar + LBPH, robot camera)")
|
| 545 |
+
|
| 546 |
+
frame_count = 0
|
| 547 |
+
last_face_rects: list[tuple[int, int, int, int]] = [] # (x, y, w, h)
|
| 548 |
+
last_face_labels: list[str] = []
|
| 549 |
+
|
| 550 |
+
try:
|
| 551 |
+
while not self._stop_event.is_set():
|
| 552 |
+
frame = self._camera_worker.get_latest_frame()
|
| 553 |
+
if frame is None:
|
| 554 |
+
time.sleep(0.05)
|
| 555 |
+
continue
|
| 556 |
+
|
| 557 |
+
frame_count += 1
|
| 558 |
+
do_recognition = frame_count % self._process_every_n == 0
|
| 559 |
+
|
| 560 |
+
if do_recognition:
|
| 561 |
+
rects = self._detect_faces(frame) # sorted by area descending; (x,y,w,h,area,det_conf)
|
| 562 |
+
|
| 563 |
+
# Filter to faces whose centre falls inside the central detection zone
|
| 564 |
+
fh_full, fw_full = frame.shape[:2]
|
| 565 |
+
_zx1 = fw_full * _ZONE_X_MARGIN
|
| 566 |
+
_zx2 = fw_full * (1 - _ZONE_X_MARGIN)
|
| 567 |
+
_zy1 = fh_full * _ZONE_Y_MARGIN
|
| 568 |
+
_zy2 = fh_full * (1 - _ZONE_Y_MARGIN)
|
| 569 |
+
rects = [
|
| 570 |
+
(x, y, w, h, area, det_conf)
|
| 571 |
+
for x, y, w, h, area, det_conf in rects
|
| 572 |
+
if _zx1 <= (x + w / 2) <= _zx2 and _zy1 <= (y + h / 2) <= _zy2
|
| 573 |
+
]
|
| 574 |
+
|
| 575 |
+
# Strip area/conf for annotation (keep (x,y,w,h) tuples)
|
| 576 |
+
last_face_rects = [(x, y, w, h) for x, y, w, h, _area, _conf in rects]
|
| 577 |
+
last_face_labels = []
|
| 578 |
+
|
| 579 |
+
multiple_in_zone = len(rects) >= _MULTIPLE_PEOPLE_THRESHOLD
|
| 580 |
+
if multiple_in_zone:
|
| 581 |
+
# Multiple people in the central zone — defer
|
| 582 |
+
# single-person identification until one comes forward.
|
| 583 |
+
self._update_stable_state("multiple", "Unknown", 0.0, 0.0)
|
| 584 |
+
|
| 585 |
+
if rects:
|
| 586 |
+
# Best face = largest area (index 0 after sort)
|
| 587 |
+
px, py, pw, ph, face_area, det_conf = rects[0]
|
| 588 |
+
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
| 589 |
+
fh, fw = gray.shape[:2]
|
| 590 |
+
# Clamp bbox to frame bounds (YuNet can return coords outside the frame)
|
| 591 |
+
x1 = max(0, px)
|
| 592 |
+
y1 = max(0, py)
|
| 593 |
+
x2 = min(fw, px + pw)
|
| 594 |
+
y2 = min(fh, py + ph)
|
| 595 |
+
crop = gray[y1:y2, x1:x2]
|
| 596 |
+
if crop.size == 0:
|
| 597 |
+
last_face_labels.append("Face (bad crop)")
|
| 598 |
+
continue
|
| 599 |
+
resized_crop = cv2.resize(crop, (100, 100))
|
| 600 |
+
|
| 601 |
+
# Compute sharpness via Laplacian variance (higher = sharper)
|
| 602 |
+
blur_score = cv2.Laplacian(resized_crop, cv2.CV_64F).var()
|
| 603 |
+
|
| 604 |
+
# Add every valid crop to the detection window; low blur crops
|
| 605 |
+
# are naturally ranked lower by the quality score.
|
| 606 |
+
now = time.monotonic()
|
| 607 |
+
with self._lock:
|
| 608 |
+
self._detection_window.append((now, face_area, blur_score, resized_crop.copy(), det_conf))
|
| 609 |
+
# Prune entries older than the configured window
|
| 610 |
+
while self._detection_window and (now - self._detection_window[0][0]) > _BEST_FACE_WINDOW_SECONDS:
|
| 611 |
+
self._detection_window.popleft()
|
| 612 |
+
# Keep current_encoding pointing to the best-quality crop in the window
|
| 613 |
+
best_entry = max(self._detection_window, key=lambda e: e[2] * math.log(max(e[1], 1)) * max(e[4], 0.01))
|
| 614 |
+
self._current_encoding = best_entry[3].copy()
|
| 615 |
+
|
| 616 |
+
if blur_score < _MIN_BLUR_SCORE:
|
| 617 |
+
logger.debug("Face crop rejected: blur_score=%.1f < threshold=%.1f", blur_score, _MIN_BLUR_SCORE)
|
| 618 |
+
|
| 619 |
+
# Run LBPH recognition on the current crop for live display
|
| 620 |
+
name, conf = self._recognize(resized_crop)
|
| 621 |
+
|
| 622 |
+
# When 2+ faces are in the zone we already emitted
|
| 623 |
+
# "multiple"; skip the per-person state emission so
|
| 624 |
+
# we don't oscillate.
|
| 625 |
+
if not multiple_in_zone:
|
| 626 |
+
observed_state = "known" if name != "Unknown" else "unknown"
|
| 627 |
+
observed_name = name if observed_state == "known" else "Unknown"
|
| 628 |
+
self._update_stable_state(observed_state, observed_name, conf, det_conf)
|
| 629 |
+
|
| 630 |
+
# Build labels for all detected faces
|
| 631 |
+
for i, (x, y, w, h, area, conf_i) in enumerate(rects):
|
| 632 |
+
if i == 0:
|
| 633 |
+
det_tag = f"det={det_conf:.2f}"
|
| 634 |
+
if name != "Unknown":
|
| 635 |
+
last_face_labels.append(f"Known: {name} (lbph={conf:.0f}) {det_tag}")
|
| 636 |
+
else:
|
| 637 |
+
last_face_labels.append(f"Unknown (lbph={conf:.0f}) {det_tag}")
|
| 638 |
+
else:
|
| 639 |
+
last_face_labels.append(f"Face (det={conf_i:.2f})")
|
| 640 |
+
else:
|
| 641 |
+
# Prune the window even when no face detected
|
| 642 |
+
now = time.monotonic()
|
| 643 |
+
with self._lock:
|
| 644 |
+
while self._detection_window and (now - self._detection_window[0][0]) > _BEST_FACE_WINDOW_SECONDS:
|
| 645 |
+
self._detection_window.popleft()
|
| 646 |
+
if not self._detection_window:
|
| 647 |
+
self._current_encoding = None
|
| 648 |
+
|
| 649 |
+
self._update_stable_state("no_face", "No face", 0.0, 0.0)
|
| 650 |
+
|
| 651 |
+
# Annotate frame and encode as JPEG
|
| 652 |
+
annotated = self._annotate_frame(frame, last_face_rects, last_face_labels)
|
| 653 |
+
_, jpeg = cv2.imencode(".jpg", annotated, [cv2.IMWRITE_JPEG_QUALITY, 70])
|
| 654 |
+
with self._lock:
|
| 655 |
+
self._latest_annotated_jpeg = jpeg.tobytes()
|
| 656 |
+
|
| 657 |
+
except Exception as e:
|
| 658 |
+
logger.exception("FaceRecognitionWorker crashed: %s", e)
|
| 659 |
+
self._add_log(f"CRASH: {e}")
|
| 660 |
+
finally:
|
| 661 |
+
logger.info("FaceRecognitionWorker: stopped")
|
| 662 |
+
|
| 663 |
+
def _annotate_frame(
|
| 664 |
+
self,
|
| 665 |
+
frame: np.ndarray,
|
| 666 |
+
face_rects: list[tuple[int, int, int, int]],
|
| 667 |
+
labels: list[str],
|
| 668 |
+
) -> np.ndarray:
|
| 669 |
+
"""Draw bounding boxes and labels on the frame."""
|
| 670 |
+
out = frame.copy()
|
| 671 |
+
|
| 672 |
+
# Draw the active central detection zone
|
| 673 |
+
fh, fw = out.shape[:2]
|
| 674 |
+
zone_x1 = int(fw * _ZONE_X_MARGIN)
|
| 675 |
+
zone_y1 = int(fh * _ZONE_Y_MARGIN)
|
| 676 |
+
zone_x2 = int(fw * (1 - _ZONE_X_MARGIN))
|
| 677 |
+
zone_y2 = int(fh * (1 - _ZONE_Y_MARGIN))
|
| 678 |
+
cv2.rectangle(out, (zone_x1, zone_y1), (zone_x2, zone_y2), (0, 210, 210), 2)
|
| 679 |
+
cv2.putText(out, "Detection Zone", (zone_x1 + 4, zone_y1 + 18),
|
| 680 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 210, 210), 1)
|
| 681 |
+
|
| 682 |
+
for i, (x, y, w, h) in enumerate(face_rects):
|
| 683 |
+
label = labels[i] if i < len(labels) else "Face"
|
| 684 |
+
is_known = label.startswith("Known:")
|
| 685 |
+
color = (0, 200, 0) if is_known else (0, 100, 255) # green / orange
|
| 686 |
+
|
| 687 |
+
cv2.rectangle(out, (x, y), (x + w, y + h), color, 2)
|
| 688 |
+
cv2.rectangle(out, (x, y + h - 28), (x + w, y + h), color, cv2.FILLED)
|
| 689 |
+
cv2.putText(
|
| 690 |
+
out,
|
| 691 |
+
label,
|
| 692 |
+
(x + 4, y + h - 8),
|
| 693 |
+
cv2.FONT_HERSHEY_SIMPLEX,
|
| 694 |
+
0.5,
|
| 695 |
+
(255, 255, 255),
|
| 696 |
+
1,
|
| 697 |
+
)
|
| 698 |
+
return out
|
src/reachy_mini_receptionist/gemini_live.py
ADDED
|
@@ -0,0 +1,754 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Gemini Live API handler — drop-in replacement for OpenaiRealtimeHandler.
|
| 2 |
+
|
| 3 |
+
Same public surface as ``openai_realtime.OpenaiRealtimeHandler`` so the
|
| 4 |
+
rest of the app (main.py, console.py, headless_personality_ui.py) can
|
| 5 |
+
switch backends with one env var (``VOICE_BACKEND=gemini``) without
|
| 6 |
+
code changes.
|
| 7 |
+
|
| 8 |
+
Audio I/O is bidirectional PCM via Gemini's Live websocket. Tool
|
| 9 |
+
calling, VAD, voice synthesis all delegated to Gemini.
|
| 10 |
+
|
| 11 |
+
This module imports ``google.genai`` LAZILY inside ``start_up`` so the
|
| 12 |
+
absence of the SDK does not break OpenAI-backend installs.
|
| 13 |
+
"""
|
| 14 |
+
from __future__ import annotations
|
| 15 |
+
|
| 16 |
+
import asyncio
|
| 17 |
+
import base64
|
| 18 |
+
import json
|
| 19 |
+
import logging
|
| 20 |
+
import os
|
| 21 |
+
import threading
|
| 22 |
+
import time
|
| 23 |
+
import uuid
|
| 24 |
+
from datetime import datetime
|
| 25 |
+
from typing import Any, Final, Literal, Optional, Tuple
|
| 26 |
+
|
| 27 |
+
import numpy as np
|
| 28 |
+
from fastrtc import AdditionalOutputs, AsyncStreamHandler, wait_for_item, audio_to_int16
|
| 29 |
+
from numpy.typing import NDArray
|
| 30 |
+
from scipy.signal import resample
|
| 31 |
+
|
| 32 |
+
from reachy_mini_receptionist.config import config
|
| 33 |
+
from reachy_mini_receptionist.prompts import get_session_voice, get_session_instructions
|
| 34 |
+
from reachy_mini_receptionist.tools.core_tools import (
|
| 35 |
+
ToolDependencies,
|
| 36 |
+
get_tool_specs,
|
| 37 |
+
)
|
| 38 |
+
from reachy_mini_receptionist.tools.background_tool_manager import (
|
| 39 |
+
ToolCallRoutine,
|
| 40 |
+
ToolNotification,
|
| 41 |
+
BackgroundToolManager,
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
logger = logging.getLogger(__name__)
|
| 45 |
+
|
| 46 |
+
# Gemini Live expects 16 kHz PCM16 mono input; outputs 24 kHz PCM16.
|
| 47 |
+
GEMINI_INPUT_SAMPLE_RATE: Final[int] = 16000
|
| 48 |
+
GEMINI_OUTPUT_SAMPLE_RATE: Final[int] = 24000
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def _gemini_model_name() -> str:
|
| 52 |
+
"""Resolve the Gemini Live model id (env-overridable).
|
| 53 |
+
|
| 54 |
+
Default is ``gemini-2.5-flash-native-audio-latest`` — confirmed
|
| 55 |
+
bidiGenerateContent-capable on the project's API key (queried
|
| 56 |
+
2026-05-20 against models.list endpoint). Set GEMINI_LIVE_MODEL
|
| 57 |
+
in .env to switch to e.g. gemini-2.5-flash-native-audio-preview-09-2025
|
| 58 |
+
or whatever else your project has allowlisted.
|
| 59 |
+
"""
|
| 60 |
+
return (os.getenv("GEMINI_LIVE_MODEL") or "gemini-2.5-flash-native-audio-latest").strip()
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def _openai_tools_to_gemini(openai_tools: list[dict]) -> list[dict]:
|
| 64 |
+
"""Translate OpenAI function-tool specs to Gemini function declarations.
|
| 65 |
+
|
| 66 |
+
OpenAI format: {"type": "function", "name": "...", "description": "...",
|
| 67 |
+
"parameters": {"type": "object", ...}}
|
| 68 |
+
Gemini format: {"function_declarations": [
|
| 69 |
+
{"name": "...", "description": "...",
|
| 70 |
+
"parameters": {"type": "OBJECT", ...}}, ...]}
|
| 71 |
+
"""
|
| 72 |
+
declarations: list[dict] = []
|
| 73 |
+
for tool in openai_tools or []:
|
| 74 |
+
if tool.get("type") != "function":
|
| 75 |
+
continue
|
| 76 |
+
name = tool.get("name")
|
| 77 |
+
if not name:
|
| 78 |
+
continue
|
| 79 |
+
decl = {
|
| 80 |
+
"name": name,
|
| 81 |
+
"description": tool.get("description", ""),
|
| 82 |
+
}
|
| 83 |
+
params = tool.get("parameters")
|
| 84 |
+
if params:
|
| 85 |
+
decl["parameters"] = params
|
| 86 |
+
declarations.append(decl)
|
| 87 |
+
return [{"function_declarations": declarations}] if declarations else []
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
class GeminiLiveHandler(AsyncStreamHandler):
|
| 91 |
+
"""Gemini Live API handler — mirror of OpenaiRealtimeHandler.
|
| 92 |
+
|
| 93 |
+
Same public surface as the OpenAI handler so the rest of the app
|
| 94 |
+
can switch backends via ``VOICE_BACKEND=gemini`` without code
|
| 95 |
+
changes elsewhere.
|
| 96 |
+
"""
|
| 97 |
+
|
| 98 |
+
def __init__(
|
| 99 |
+
self,
|
| 100 |
+
deps: ToolDependencies,
|
| 101 |
+
gradio_mode: bool = False,
|
| 102 |
+
instance_path: Optional[str] = None,
|
| 103 |
+
session_manager: Any | None = None,
|
| 104 |
+
controller: Any | None = None,
|
| 105 |
+
):
|
| 106 |
+
super().__init__(
|
| 107 |
+
expected_layout="mono",
|
| 108 |
+
output_sample_rate=GEMINI_OUTPUT_SAMPLE_RATE,
|
| 109 |
+
input_sample_rate=GEMINI_INPUT_SAMPLE_RATE,
|
| 110 |
+
)
|
| 111 |
+
self.deps = deps
|
| 112 |
+
self.gradio_mode = gradio_mode
|
| 113 |
+
self.instance_path = instance_path
|
| 114 |
+
self._session_manager = session_manager
|
| 115 |
+
self._controller = controller
|
| 116 |
+
|
| 117 |
+
self.output_queue: "asyncio.Queue[Tuple[int, NDArray[np.int16]] | AdditionalOutputs]" = asyncio.Queue()
|
| 118 |
+
|
| 119 |
+
self.session: Any = None # google.genai live session, set in start_up
|
| 120 |
+
self._client: Any = None
|
| 121 |
+
self._runtime_loop: asyncio.AbstractEventLoop | None = None
|
| 122 |
+
self._shutdown_requested: bool = False
|
| 123 |
+
|
| 124 |
+
# Cumulative cost tracker (Gemini doesn't expose token costs the same way)
|
| 125 |
+
self.cumulative_cost: float = 0.0
|
| 126 |
+
self.start_time = asyncio.get_event_loop().time()
|
| 127 |
+
self.last_activity_time = self.start_time
|
| 128 |
+
|
| 129 |
+
# Background tool manager (same as OpenAI handler)
|
| 130 |
+
self.tool_manager = BackgroundToolManager()
|
| 131 |
+
|
| 132 |
+
# Tool-call args stash keyed by call_id, so the controller can see
|
| 133 |
+
# both args + result when the tool completes.
|
| 134 |
+
self._tool_call_args: dict[str, dict[str, Any]] = {}
|
| 135 |
+
|
| 136 |
+
# Last face event + session event sent to model (for /api endpoints)
|
| 137 |
+
self._face_event_lock = threading.Lock()
|
| 138 |
+
self._last_face_event_sent: dict[str, Any] | None = None
|
| 139 |
+
self._session_event_lock = threading.Lock()
|
| 140 |
+
self._last_session_event_sent: dict[str, Any] | None = None
|
| 141 |
+
|
| 142 |
+
# Pending events waiting for session to be ready
|
| 143 |
+
self._pending_face_event_lock = threading.Lock()
|
| 144 |
+
self._pending_face_event: dict[str, Any] | None = None
|
| 145 |
+
self._pending_session_event_lock = threading.Lock()
|
| 146 |
+
self._pending_session_event: dict[str, Any] | None = None
|
| 147 |
+
|
| 148 |
+
# idle-speech cue dedupe
|
| 149 |
+
self._idle_speech_cue_pushed: bool = False
|
| 150 |
+
|
| 151 |
+
# ------------------------------------------------------------------
|
| 152 |
+
# AsyncStreamHandler required methods
|
| 153 |
+
# ------------------------------------------------------------------
|
| 154 |
+
|
| 155 |
+
def copy(self) -> "GeminiLiveHandler":
|
| 156 |
+
return GeminiLiveHandler(
|
| 157 |
+
self.deps,
|
| 158 |
+
self.gradio_mode,
|
| 159 |
+
self.instance_path,
|
| 160 |
+
session_manager=self._session_manager,
|
| 161 |
+
controller=self._controller,
|
| 162 |
+
)
|
| 163 |
+
|
| 164 |
+
async def start_up(self) -> None:
|
| 165 |
+
"""Connect to Gemini Live and run the event loop until shutdown."""
|
| 166 |
+
self._runtime_loop = asyncio.get_running_loop()
|
| 167 |
+
|
| 168 |
+
# Lazy import — only required when this backend is active.
|
| 169 |
+
try:
|
| 170 |
+
from google import genai
|
| 171 |
+
from google.genai import types as genai_types
|
| 172 |
+
except ImportError as e:
|
| 173 |
+
logger.error(
|
| 174 |
+
"google-genai SDK not installed. Install with: "
|
| 175 |
+
"pip install google-genai. Error: %s", e,
|
| 176 |
+
)
|
| 177 |
+
return
|
| 178 |
+
|
| 179 |
+
api_key = (os.getenv("GEMINI_API_KEY") or "").strip()
|
| 180 |
+
if not api_key:
|
| 181 |
+
logger.error("GEMINI_API_KEY not set — cannot start Gemini Live backend")
|
| 182 |
+
return
|
| 183 |
+
|
| 184 |
+
self._client = genai.Client(api_key=api_key, http_options={"api_version": "v1beta"})
|
| 185 |
+
model_id = _gemini_model_name()
|
| 186 |
+
logger.info("Gemini Live backend starting with model=%s", model_id)
|
| 187 |
+
|
| 188 |
+
# Build session config — instructions, voice, tools, VAD-equivalent
|
| 189 |
+
instructions = get_session_instructions()
|
| 190 |
+
voice = get_session_voice()
|
| 191 |
+
openai_tools = get_tool_specs() # type: ignore[no-untyped-call]
|
| 192 |
+
gemini_tools = _openai_tools_to_gemini(openai_tools)
|
| 193 |
+
|
| 194 |
+
# Minimal config — back to known-good shape after the
|
| 195 |
+
# realtime_input_config attempt caused 1011 internal server
|
| 196 |
+
# errors. Will revisit low-latency VAD tuning via SDK-specific
|
| 197 |
+
# types once we confirm the exact accepted shape.
|
| 198 |
+
config_obj: dict[str, Any] = {
|
| 199 |
+
"response_modalities": ["AUDIO"],
|
| 200 |
+
"system_instruction": instructions,
|
| 201 |
+
}
|
| 202 |
+
|
| 203 |
+
# Half-cascade Live models (model name contains "-live-" but
|
| 204 |
+
# NOT "native-audio") require an explicit speech_config /
|
| 205 |
+
# voice_config to actually emit audio. Without it they degrade
|
| 206 |
+
# to 2-byte placeholder chunks and `resp.text`-only responses
|
| 207 |
+
# (observed 2026-05-21 with gemini-3.1-flash-live-preview).
|
| 208 |
+
# Native-audio models auto-select voice and REJECT voice_config
|
| 209 |
+
# with 1007 "Cannot extract voices from a non-audio request",
|
| 210 |
+
# so we conditionally apply this only when the model name says
|
| 211 |
+
# we should. Voice name override via env var.
|
| 212 |
+
model_lower = model_id.lower()
|
| 213 |
+
is_half_cascade = ("-live-" in model_lower or model_lower.endswith("-live-preview")) \
|
| 214 |
+
and "native-audio" not in model_lower
|
| 215 |
+
if is_half_cascade:
|
| 216 |
+
voice_name = (os.getenv("GEMINI_LIVE_VOICE") or "Puck").strip() or "Puck"
|
| 217 |
+
config_obj["speech_config"] = {
|
| 218 |
+
"voice_config": {
|
| 219 |
+
"prebuilt_voice_config": {"voice_name": voice_name},
|
| 220 |
+
},
|
| 221 |
+
}
|
| 222 |
+
# Half-cascade also benefits from explicit input/output
|
| 223 |
+
# transcription so we can log what the model heard / said
|
| 224 |
+
# for debugging. Empty dict = enable with defaults.
|
| 225 |
+
config_obj["input_audio_transcription"] = {}
|
| 226 |
+
config_obj["output_audio_transcription"] = {}
|
| 227 |
+
logger.info(
|
| 228 |
+
"Gemini Live: half-cascade model detected — adding "
|
| 229 |
+
"speech_config (voice=%s) + transcription", voice_name,
|
| 230 |
+
)
|
| 231 |
+
|
| 232 |
+
if gemini_tools:
|
| 233 |
+
config_obj["tools"] = gemini_tools
|
| 234 |
+
|
| 235 |
+
try:
|
| 236 |
+
async with self._client.aio.live.connect(model=model_id, config=config_obj) as session:
|
| 237 |
+
self.session = session
|
| 238 |
+
logger.info("Gemini Live connected.")
|
| 239 |
+
# Start background tool manager (same callback signature)
|
| 240 |
+
self.tool_manager.start_up(tool_callbacks=[self._handle_tool_result])
|
| 241 |
+
|
| 242 |
+
# Kick off conversation with a one-shot greeting prompt.
|
| 243 |
+
# With the tighter VAD config (400ms silence) we could
|
| 244 |
+
# in theory wait for visitor speech, but a proactive
|
| 245 |
+
# greeting keeps the demo natural — the bot says hi
|
| 246 |
+
# the moment a face is detected. Server VAD handles
|
| 247 |
+
# all subsequent visitor turns with low latency.
|
| 248 |
+
try:
|
| 249 |
+
await session.send_client_content(
|
| 250 |
+
turns=[{
|
| 251 |
+
"role": "user",
|
| 252 |
+
"parts": [{
|
| 253 |
+
"text": (
|
| 254 |
+
"(Visitor just walked up. Greet them "
|
| 255 |
+
"very briefly in ONE short friendly "
|
| 256 |
+
"sentence and ask their name or who "
|
| 257 |
+
"they're here to see. Keep it under "
|
| 258 |
+
"8 words.)"
|
| 259 |
+
),
|
| 260 |
+
}],
|
| 261 |
+
}],
|
| 262 |
+
turn_complete=True,
|
| 263 |
+
)
|
| 264 |
+
logger.info("Gemini Live: sent kick-off greeting prompt")
|
| 265 |
+
except Exception as e:
|
| 266 |
+
logger.warning("Gemini Live: kick-off send failed: %s", e)
|
| 267 |
+
|
| 268 |
+
try:
|
| 269 |
+
await self._run_event_loop()
|
| 270 |
+
except Exception as inner:
|
| 271 |
+
logger.exception("Gemini Live event loop crashed: %s", inner)
|
| 272 |
+
finally:
|
| 273 |
+
await self.tool_manager.shutdown()
|
| 274 |
+
self.session = None
|
| 275 |
+
logger.info("Gemini Live: session ended cleanly")
|
| 276 |
+
except Exception as e:
|
| 277 |
+
logger.exception("Gemini Live session failed: %s", e)
|
| 278 |
+
self.session = None
|
| 279 |
+
|
| 280 |
+
async def shutdown(self) -> None:
|
| 281 |
+
self._shutdown_requested = True
|
| 282 |
+
try:
|
| 283 |
+
if self.session is not None:
|
| 284 |
+
await self.session.close()
|
| 285 |
+
except Exception as e:
|
| 286 |
+
logger.debug("Gemini session close ignored: %s", e)
|
| 287 |
+
try:
|
| 288 |
+
await self.tool_manager.shutdown()
|
| 289 |
+
except Exception:
|
| 290 |
+
pass
|
| 291 |
+
|
| 292 |
+
async def receive(self, frame: Tuple[int, NDArray[np.int16]]) -> None:
|
| 293 |
+
"""Stream visitor mic frames to Gemini Live."""
|
| 294 |
+
if self.session is None:
|
| 295 |
+
return
|
| 296 |
+
input_rate, audio_frame = frame
|
| 297 |
+
if audio_frame.ndim == 2:
|
| 298 |
+
if audio_frame.shape[1] > audio_frame.shape[0]:
|
| 299 |
+
audio_frame = audio_frame.T
|
| 300 |
+
if audio_frame.shape[1] > 1:
|
| 301 |
+
audio_frame = audio_frame[:, 0]
|
| 302 |
+
# Resample if needed
|
| 303 |
+
if self.input_sample_rate != input_rate:
|
| 304 |
+
audio_frame = resample(
|
| 305 |
+
audio_frame,
|
| 306 |
+
int(len(audio_frame) * self.input_sample_rate / input_rate),
|
| 307 |
+
)
|
| 308 |
+
audio_frame = audio_to_int16(audio_frame)
|
| 309 |
+
try:
|
| 310 |
+
await self.session.send_realtime_input(
|
| 311 |
+
audio={
|
| 312 |
+
"data": audio_frame.tobytes(),
|
| 313 |
+
"mime_type": f"audio/pcm;rate={self.input_sample_rate}",
|
| 314 |
+
},
|
| 315 |
+
)
|
| 316 |
+
# Log once after first frame so we know mic-to-Gemini path is live.
|
| 317 |
+
if not getattr(self, "_first_mic_frame_logged", False):
|
| 318 |
+
logger.info(
|
| 319 |
+
"Gemini Live: first mic frame sent (input_rate=%d, target_rate=%d, samples=%d)",
|
| 320 |
+
input_rate, self.input_sample_rate, len(audio_frame),
|
| 321 |
+
)
|
| 322 |
+
self._first_mic_frame_logged = True
|
| 323 |
+
except Exception as e:
|
| 324 |
+
logger.debug("Dropped mic frame: %s", e)
|
| 325 |
+
|
| 326 |
+
async def emit(self) -> Tuple[int, NDArray[np.int16]] | AdditionalOutputs | None:
|
| 327 |
+
# Optional idle/timeout reset like OpenAI handler
|
| 328 |
+
if self._session_manager is not None:
|
| 329 |
+
try:
|
| 330 |
+
self._session_manager.maybe_reset_if_stale()
|
| 331 |
+
except Exception:
|
| 332 |
+
pass
|
| 333 |
+
return await wait_for_item(self.output_queue) # type: ignore[no-any-return]
|
| 334 |
+
|
| 335 |
+
# ------------------------------------------------------------------
|
| 336 |
+
# Event loop — read Gemini events and dispatch
|
| 337 |
+
# ------------------------------------------------------------------
|
| 338 |
+
|
| 339 |
+
async def _run_event_loop(self) -> None:
|
| 340 |
+
"""Consume Gemini Live server events until session closes / shutdown.
|
| 341 |
+
|
| 342 |
+
Wraps session.receive() in an outer loop so the conversation
|
| 343 |
+
survives multiple turns. Gemini's receive() iterator can return
|
| 344 |
+
after a single turn in some SDK versions — when it exits we
|
| 345 |
+
re-enter as long as the session and shutdown flag say keep going.
|
| 346 |
+
"""
|
| 347 |
+
if self.session is None:
|
| 348 |
+
return
|
| 349 |
+
event_count = 0
|
| 350 |
+
audio_chunks = 0
|
| 351 |
+
outer_iterations = 0
|
| 352 |
+
try:
|
| 353 |
+
while self.session is not None and not self._shutdown_requested:
|
| 354 |
+
outer_iterations += 1
|
| 355 |
+
logger.info("Gemini Live: receive() iteration %d starting", outer_iterations)
|
| 356 |
+
async for resp in self.session.receive():
|
| 357 |
+
if self._shutdown_requested:
|
| 358 |
+
break
|
| 359 |
+
event_count += 1
|
| 360 |
+
|
| 361 |
+
# Smoke test confirmed: resp.data IS the same audio
|
| 362 |
+
# as server_content.model_turn.parts[].inline_data.data
|
| 363 |
+
# (just a convenience shortcut). Use ONLY the shortcut
|
| 364 |
+
# to avoid double-emit when both paths fire.
|
| 365 |
+
data = getattr(resp, "data", None)
|
| 366 |
+
if data:
|
| 367 |
+
audio_chunks += 1
|
| 368 |
+
arr = np.frombuffer(data, dtype=np.int16).reshape(1, -1)
|
| 369 |
+
await self.output_queue.put((self.output_sample_rate, arr))
|
| 370 |
+
self.last_activity_time = asyncio.get_event_loop().time()
|
| 371 |
+
if audio_chunks == 1:
|
| 372 |
+
logger.info("Gemini Live: first audio chunk received (%d bytes)", len(data))
|
| 373 |
+
|
| 374 |
+
# text shortcut — concatenated text parts
|
| 375 |
+
text = getattr(resp, "text", None)
|
| 376 |
+
if text:
|
| 377 |
+
logger.info("Gemini Live text: %r", text[:120])
|
| 378 |
+
await self.output_queue.put(
|
| 379 |
+
AdditionalOutputs({"role": "assistant", "content": text})
|
| 380 |
+
)
|
| 381 |
+
|
| 382 |
+
# server_content parsing for non-audio metadata + turn_complete
|
| 383 |
+
server_content = getattr(resp, "server_content", None)
|
| 384 |
+
if server_content is not None:
|
| 385 |
+
# NOTE: audio chunks via model_turn.parts[].inline_data
|
| 386 |
+
# are ALREADY surfaced via resp.data above — don't
|
| 387 |
+
# re-emit. Just look at non-audio parts here.
|
| 388 |
+
model_turn = getattr(server_content, "model_turn", None)
|
| 389 |
+
if model_turn is not None:
|
| 390 |
+
parts = getattr(model_turn, "parts", None) or []
|
| 391 |
+
for part in parts:
|
| 392 |
+
if getattr(part, "thought", False):
|
| 393 |
+
continue # silently swallow chain-of-thought
|
| 394 |
+
ptext = getattr(part, "text", None)
|
| 395 |
+
if ptext and not text: # avoid double-text-emit
|
| 396 |
+
logger.info("Gemini Live model_turn text: %r", ptext[:120])
|
| 397 |
+
await self.output_queue.put(
|
| 398 |
+
AdditionalOutputs({"role": "assistant", "content": ptext})
|
| 399 |
+
)
|
| 400 |
+
|
| 401 |
+
in_tr = getattr(server_content, "input_transcription", None)
|
| 402 |
+
if in_tr is not None:
|
| 403 |
+
txt = (getattr(in_tr, "text", "") or "").strip()
|
| 404 |
+
if txt:
|
| 405 |
+
logger.info("Gemini Live input transcript: %r", txt)
|
| 406 |
+
if self._session_manager is not None:
|
| 407 |
+
try:
|
| 408 |
+
self._session_manager.record_user_transcript(txt)
|
| 409 |
+
except Exception:
|
| 410 |
+
pass
|
| 411 |
+
await self.output_queue.put(
|
| 412 |
+
AdditionalOutputs({"role": "user", "content": txt})
|
| 413 |
+
)
|
| 414 |
+
|
| 415 |
+
if getattr(server_content, "turn_complete", False):
|
| 416 |
+
logger.info(
|
| 417 |
+
"Gemini Live: model turn complete (events=%d, audio_chunks=%d)",
|
| 418 |
+
event_count, audio_chunks,
|
| 419 |
+
)
|
| 420 |
+
audio_chunks = 0 # reset for next turn
|
| 421 |
+
|
| 422 |
+
# Tool calls
|
| 423 |
+
tool_call = getattr(resp, "tool_call", None)
|
| 424 |
+
if tool_call is not None:
|
| 425 |
+
fcs = getattr(tool_call, "function_calls", None) or []
|
| 426 |
+
logger.info("Gemini Live: tool_call with %d function calls", len(fcs))
|
| 427 |
+
for fc in fcs:
|
| 428 |
+
await self._dispatch_function_call(fc)
|
| 429 |
+
|
| 430 |
+
logger.info(
|
| 431 |
+
"Gemini Live: receive() iteration %d ended (events so far=%d). Looping.",
|
| 432 |
+
outer_iterations, event_count,
|
| 433 |
+
)
|
| 434 |
+
# Small backoff so we don't spin if session is genuinely dead
|
| 435 |
+
await asyncio.sleep(0.1)
|
| 436 |
+
except Exception as e:
|
| 437 |
+
logger.warning("Gemini Live event loop exited with exception: %s", e)
|
| 438 |
+
finally:
|
| 439 |
+
logger.info(
|
| 440 |
+
"Gemini Live event loop: total iterations=%d, events=%d, audio_chunks=%d",
|
| 441 |
+
outer_iterations, event_count, audio_chunks,
|
| 442 |
+
)
|
| 443 |
+
|
| 444 |
+
async def _dispatch_function_call(self, fc: Any) -> None:
|
| 445 |
+
"""Route a Gemini function call into the background tool manager."""
|
| 446 |
+
tool_name = getattr(fc, "name", None)
|
| 447 |
+
args_obj = getattr(fc, "args", {}) or {}
|
| 448 |
+
call_id = str(getattr(fc, "id", None) or uuid.uuid4())
|
| 449 |
+
if not tool_name:
|
| 450 |
+
return
|
| 451 |
+
# Normalize args dict
|
| 452 |
+
if not isinstance(args_obj, dict):
|
| 453 |
+
try:
|
| 454 |
+
args_obj = dict(args_obj)
|
| 455 |
+
except Exception:
|
| 456 |
+
args_obj = {}
|
| 457 |
+
args_json_str = json.dumps(args_obj)
|
| 458 |
+
self._tool_call_args[call_id] = args_obj
|
| 459 |
+
logger.info(
|
| 460 |
+
"Gemini tool call: %s call_id=%s args=%s", tool_name, call_id, args_json_str,
|
| 461 |
+
)
|
| 462 |
+
try:
|
| 463 |
+
await self.tool_manager.start_tool(
|
| 464 |
+
call_id=call_id,
|
| 465 |
+
tool_call_routine=ToolCallRoutine(
|
| 466 |
+
tool_name=tool_name,
|
| 467 |
+
args_json_str=args_json_str,
|
| 468 |
+
deps=self.deps,
|
| 469 |
+
),
|
| 470 |
+
is_idle_tool_call=False,
|
| 471 |
+
)
|
| 472 |
+
except Exception as e:
|
| 473 |
+
logger.warning("Failed to start Gemini tool '%s': %s", tool_name, e)
|
| 474 |
+
|
| 475 |
+
async def _handle_tool_result(self, bg_tool: ToolNotification) -> None:
|
| 476 |
+
"""Send the tool result back to Gemini + notify the controller."""
|
| 477 |
+
if bg_tool.error is not None:
|
| 478 |
+
tool_result: dict[str, Any] = {"error": bg_tool.error}
|
| 479 |
+
elif bg_tool.result is not None:
|
| 480 |
+
tool_result = bg_tool.result
|
| 481 |
+
else:
|
| 482 |
+
tool_result = {"error": "No result"}
|
| 483 |
+
|
| 484 |
+
call_args = self._tool_call_args.pop(bg_tool.id, {})
|
| 485 |
+
|
| 486 |
+
# Send result back to Gemini Live
|
| 487 |
+
if self.session is not None:
|
| 488 |
+
try:
|
| 489 |
+
# Gemini expects function_responses with {id, name, response: {output: ...}}
|
| 490 |
+
await self.session.send_tool_response(
|
| 491 |
+
function_responses=[{
|
| 492 |
+
"id": bg_tool.id if isinstance(bg_tool.id, str) else None,
|
| 493 |
+
"name": bg_tool.tool_name,
|
| 494 |
+
"response": {"output": tool_result},
|
| 495 |
+
}],
|
| 496 |
+
)
|
| 497 |
+
except Exception as e:
|
| 498 |
+
logger.debug("send_tool_response failed: %s", e)
|
| 499 |
+
|
| 500 |
+
# Surface tool result to dashboard chatbot
|
| 501 |
+
await self.output_queue.put(
|
| 502 |
+
AdditionalOutputs({
|
| 503 |
+
"role": "assistant",
|
| 504 |
+
"content": json.dumps(tool_result),
|
| 505 |
+
"metadata": {
|
| 506 |
+
"title": f"🛠️ Used tool {bg_tool.tool_name}",
|
| 507 |
+
"status": "done",
|
| 508 |
+
},
|
| 509 |
+
})
|
| 510 |
+
)
|
| 511 |
+
|
| 512 |
+
# Drive backend state transitions (same as OpenAI handler)
|
| 513 |
+
if self._controller is not None:
|
| 514 |
+
try:
|
| 515 |
+
await self._controller.on_tool_completed_async(
|
| 516 |
+
bg_tool.tool_name, call_args, tool_result,
|
| 517 |
+
)
|
| 518 |
+
except Exception as e:
|
| 519 |
+
logger.warning(
|
| 520 |
+
"ConversationController.on_tool_completed_async raised %s: %s",
|
| 521 |
+
type(e).__name__, e,
|
| 522 |
+
)
|
| 523 |
+
|
| 524 |
+
# ------------------------------------------------------------------
|
| 525 |
+
# Public API — face + session context push (mirror of OpenAI handler)
|
| 526 |
+
# ------------------------------------------------------------------
|
| 527 |
+
|
| 528 |
+
def _stash_pending_face_event(self, face_event: dict[str, Any]) -> None:
|
| 529 |
+
with self._pending_face_event_lock:
|
| 530 |
+
self._pending_face_event = dict(face_event)
|
| 531 |
+
|
| 532 |
+
def _pop_pending_face_event(self) -> dict[str, Any] | None:
|
| 533 |
+
with self._pending_face_event_lock:
|
| 534 |
+
p = self._pending_face_event
|
| 535 |
+
self._pending_face_event = None
|
| 536 |
+
return p
|
| 537 |
+
|
| 538 |
+
async def _flush_pending_face_event(self) -> None:
|
| 539 |
+
p = self._pop_pending_face_event()
|
| 540 |
+
if p is not None:
|
| 541 |
+
try:
|
| 542 |
+
await self._push_face_context_event(p)
|
| 543 |
+
except Exception as e:
|
| 544 |
+
logger.debug("flush pending face event failed: %s", e)
|
| 545 |
+
self._stash_pending_face_event(p)
|
| 546 |
+
|
| 547 |
+
def notify_external_face_event(self, face_event: dict[str, Any]) -> None:
|
| 548 |
+
loop = self._runtime_loop
|
| 549 |
+
if loop is None or loop.is_closed() or self.session is None:
|
| 550 |
+
self._stash_pending_face_event(face_event)
|
| 551 |
+
return
|
| 552 |
+
try:
|
| 553 |
+
future = asyncio.run_coroutine_threadsafe(
|
| 554 |
+
self._push_face_context_event(face_event), loop,
|
| 555 |
+
)
|
| 556 |
+
|
| 557 |
+
def _done(fut: "asyncio.Future[None]") -> None:
|
| 558 |
+
try:
|
| 559 |
+
fut.result()
|
| 560 |
+
except Exception as e:
|
| 561 |
+
logger.debug("face event push failed: %s", e)
|
| 562 |
+
self._stash_pending_face_event(face_event)
|
| 563 |
+
|
| 564 |
+
future.add_done_callback(_done)
|
| 565 |
+
except Exception as e:
|
| 566 |
+
logger.debug("schedule face event failed: %s", e)
|
| 567 |
+
self._stash_pending_face_event(face_event)
|
| 568 |
+
|
| 569 |
+
async def _push_face_context_event(self, face_event: dict[str, Any]) -> None:
|
| 570 |
+
if self.session is None:
|
| 571 |
+
self._stash_pending_face_event(face_event)
|
| 572 |
+
return
|
| 573 |
+
state = str(face_event.get("state", "unknown"))
|
| 574 |
+
name = face_event.get("name")
|
| 575 |
+
msg = (
|
| 576 |
+
f"[External face update {self.format_timestamp()}] "
|
| 577 |
+
f"state={state}; name={name}. Context only; don't respond unless the user speaks."
|
| 578 |
+
)
|
| 579 |
+
try:
|
| 580 |
+
# send_realtime_input(text=...) injects context without
|
| 581 |
+
# forcing a turn — perfect for face state updates that
|
| 582 |
+
# the model should know about but not respond to.
|
| 583 |
+
await self.session.send_realtime_input(text=msg)
|
| 584 |
+
except Exception as e:
|
| 585 |
+
logger.debug("send face context failed: %s", e)
|
| 586 |
+
self._stash_pending_face_event(face_event)
|
| 587 |
+
return
|
| 588 |
+
|
| 589 |
+
sent_at = time.time()
|
| 590 |
+
payload = {
|
| 591 |
+
"state": state,
|
| 592 |
+
"name": name,
|
| 593 |
+
"previous_state": face_event.get("previous_state"),
|
| 594 |
+
"previous_name": face_event.get("previous_name"),
|
| 595 |
+
"lbph_confidence": float(face_event.get("lbph_confidence") or 0.0),
|
| 596 |
+
"detection_confidence": float(face_event.get("detection_confidence") or 0.0),
|
| 597 |
+
"sent_at": sent_at,
|
| 598 |
+
"sent_at_iso": datetime.fromtimestamp(sent_at).strftime("%Y-%m-%d %H:%M:%S"),
|
| 599 |
+
}
|
| 600 |
+
with self._face_event_lock:
|
| 601 |
+
self._last_face_event_sent = payload
|
| 602 |
+
|
| 603 |
+
def get_last_face_event_sent(self) -> dict[str, Any] | None:
|
| 604 |
+
with self._face_event_lock:
|
| 605 |
+
return dict(self._last_face_event_sent) if self._last_face_event_sent else None
|
| 606 |
+
|
| 607 |
+
# --- session events ---
|
| 608 |
+
|
| 609 |
+
def _stash_pending_session_event(self, payload: dict[str, Any]) -> None:
|
| 610 |
+
with self._pending_session_event_lock:
|
| 611 |
+
self._pending_session_event = dict(payload)
|
| 612 |
+
|
| 613 |
+
def _pop_pending_session_event(self) -> dict[str, Any] | None:
|
| 614 |
+
with self._pending_session_event_lock:
|
| 615 |
+
p = self._pending_session_event
|
| 616 |
+
self._pending_session_event = None
|
| 617 |
+
return p
|
| 618 |
+
|
| 619 |
+
async def _flush_pending_session_event(self) -> None:
|
| 620 |
+
p = self._pop_pending_session_event()
|
| 621 |
+
if p is not None:
|
| 622 |
+
try:
|
| 623 |
+
await self._push_session_context_event(p)
|
| 624 |
+
except Exception:
|
| 625 |
+
self._stash_pending_session_event(p)
|
| 626 |
+
|
| 627 |
+
def notify_session_event(self, previous_state: Any, new_state: Any, snapshot: Any) -> None:
|
| 628 |
+
try:
|
| 629 |
+
payload = {
|
| 630 |
+
"previous_state": getattr(previous_state, "value", str(previous_state)),
|
| 631 |
+
"new_state": getattr(new_state, "value", str(new_state)),
|
| 632 |
+
"snapshot": snapshot.to_dict() if hasattr(snapshot, "to_dict") else {},
|
| 633 |
+
}
|
| 634 |
+
except Exception:
|
| 635 |
+
return
|
| 636 |
+
|
| 637 |
+
if payload.get("new_state") == "idle":
|
| 638 |
+
self._idle_speech_cue_pushed = False
|
| 639 |
+
|
| 640 |
+
loop = self._runtime_loop
|
| 641 |
+
if loop is None or loop.is_closed() or self.session is None:
|
| 642 |
+
self._stash_pending_session_event(payload)
|
| 643 |
+
return
|
| 644 |
+
try:
|
| 645 |
+
future = asyncio.run_coroutine_threadsafe(
|
| 646 |
+
self._push_session_context_event(payload), loop,
|
| 647 |
+
)
|
| 648 |
+
|
| 649 |
+
def _done(fut: "asyncio.Future[None]") -> None:
|
| 650 |
+
try:
|
| 651 |
+
fut.result()
|
| 652 |
+
except Exception:
|
| 653 |
+
self._stash_pending_session_event(payload)
|
| 654 |
+
|
| 655 |
+
future.add_done_callback(_done)
|
| 656 |
+
except Exception:
|
| 657 |
+
self._stash_pending_session_event(payload)
|
| 658 |
+
|
| 659 |
+
async def _push_session_context_event(self, payload: dict[str, Any]) -> None:
|
| 660 |
+
if self.session is None:
|
| 661 |
+
self._stash_pending_session_event(payload)
|
| 662 |
+
return
|
| 663 |
+
snap = payload.get("snapshot") or {}
|
| 664 |
+
new_state_value = payload.get("new_state")
|
| 665 |
+
hint = ""
|
| 666 |
+
speak_now = False
|
| 667 |
+
try:
|
| 668 |
+
from reachy_mini_receptionist.conversation_controller import (
|
| 669 |
+
next_action_hint, should_speak_immediately,
|
| 670 |
+
)
|
| 671 |
+
from reachy_mini_receptionist.receptionist_state import ReceptionState
|
| 672 |
+
if new_state_value:
|
| 673 |
+
new_state_enum = ReceptionState(new_state_value)
|
| 674 |
+
hint = next_action_hint(new_state_enum)
|
| 675 |
+
speak_now = should_speak_immediately(new_state_enum)
|
| 676 |
+
except Exception:
|
| 677 |
+
pass
|
| 678 |
+
|
| 679 |
+
base = (
|
| 680 |
+
f"[Backend session update {self.format_timestamp()}] "
|
| 681 |
+
f"state: {payload.get('previous_state')} -> {new_state_value}; "
|
| 682 |
+
f"visitor={snap.get('visitor_name')}; "
|
| 683 |
+
f"employee={snap.get('employee_name')}; "
|
| 684 |
+
f"appointment={(snap.get('matched_appointment') or {}).get('time')}; "
|
| 685 |
+
f"email_sent_to={snap.get('email_sent_to')}."
|
| 686 |
+
)
|
| 687 |
+
if hint and speak_now:
|
| 688 |
+
msg = f"{base} SPEAK NOW: {hint}"
|
| 689 |
+
elif hint:
|
| 690 |
+
msg = f"{base} Next: {hint} (Stay quiet until the visitor speaks; context only.)"
|
| 691 |
+
else:
|
| 692 |
+
msg = f"{base} Context only; do not respond unless the user speaks."
|
| 693 |
+
|
| 694 |
+
try:
|
| 695 |
+
# For SPEAK_NOW transitions, use send_client_content with
|
| 696 |
+
# turn_complete=True so the model actually responds.
|
| 697 |
+
# Otherwise context-only via send_realtime_input(text=).
|
| 698 |
+
from reachy_mini_receptionist.conversation_controller import should_speak_immediately
|
| 699 |
+
from reachy_mini_receptionist.receptionist_state import ReceptionState
|
| 700 |
+
speak_now = False
|
| 701 |
+
try:
|
| 702 |
+
if new_state_value:
|
| 703 |
+
speak_now = should_speak_immediately(ReceptionState(new_state_value))
|
| 704 |
+
except Exception:
|
| 705 |
+
pass
|
| 706 |
+
|
| 707 |
+
if speak_now:
|
| 708 |
+
await self.session.send_client_content(
|
| 709 |
+
turns=[{
|
| 710 |
+
"role": "user",
|
| 711 |
+
"parts": [{"text": msg}],
|
| 712 |
+
}],
|
| 713 |
+
turn_complete=True,
|
| 714 |
+
)
|
| 715 |
+
else:
|
| 716 |
+
await self.session.send_realtime_input(text=msg)
|
| 717 |
+
except Exception as e:
|
| 718 |
+
logger.debug("session context push failed: %s", e)
|
| 719 |
+
return
|
| 720 |
+
|
| 721 |
+
sent_payload = {**payload, "sent_at": time.time(), "hint": hint}
|
| 722 |
+
with self._session_event_lock:
|
| 723 |
+
self._last_session_event_sent = sent_payload
|
| 724 |
+
|
| 725 |
+
def get_last_session_event_sent(self) -> dict[str, Any] | None:
|
| 726 |
+
with self._session_event_lock:
|
| 727 |
+
return dict(self._last_session_event_sent) if self._last_session_event_sent else None
|
| 728 |
+
|
| 729 |
+
# ------------------------------------------------------------------
|
| 730 |
+
# Personality + voice (UI hooks)
|
| 731 |
+
# ------------------------------------------------------------------
|
| 732 |
+
|
| 733 |
+
async def apply_personality(self, profile: str | None) -> str:
|
| 734 |
+
"""Profile updates require a session restart — minimal impl."""
|
| 735 |
+
try:
|
| 736 |
+
from reachy_mini_receptionist.config import set_custom_profile
|
| 737 |
+
set_custom_profile(profile)
|
| 738 |
+
return "Personality updated. Restart for it to take effect (Gemini backend)."
|
| 739 |
+
except Exception as e:
|
| 740 |
+
return f"Failed to apply personality: {e}"
|
| 741 |
+
|
| 742 |
+
async def get_available_voices(self) -> list[str]:
|
| 743 |
+
"""Gemini prebuilt voices (fixed list — no discovery API)."""
|
| 744 |
+
return ["Aoede", "Charon", "Kore", "Puck", "Fenrir"]
|
| 745 |
+
|
| 746 |
+
# ------------------------------------------------------------------
|
| 747 |
+
# Helpers
|
| 748 |
+
# ------------------------------------------------------------------
|
| 749 |
+
|
| 750 |
+
def format_timestamp(self) -> str:
|
| 751 |
+
loop_time = asyncio.get_event_loop().time()
|
| 752 |
+
elapsed = loop_time - self.start_time
|
| 753 |
+
dt = datetime.now()
|
| 754 |
+
return f"[{dt.strftime('%Y-%m-%d %H:%M:%S')} | +{elapsed:.1f}s]"
|
src/reachy_mini_receptionist/gradio_personality.py
ADDED
|
@@ -0,0 +1,316 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Gradio personality UI components and wiring.
|
| 2 |
+
|
| 3 |
+
This module encapsulates the UI elements and logic related to managing
|
| 4 |
+
conversation "personalities" (profiles) so that `main.py` stays lean.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
from typing import Any
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
|
| 11 |
+
import gradio as gr
|
| 12 |
+
|
| 13 |
+
from .config import LOCKED_PROFILE, config
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class PersonalityUI:
|
| 17 |
+
"""Container for personality-related Gradio components."""
|
| 18 |
+
|
| 19 |
+
def __init__(self) -> None:
|
| 20 |
+
"""Initialize the PersonalityUI instance."""
|
| 21 |
+
# Constants and paths
|
| 22 |
+
self.DEFAULT_OPTION = "(built-in default)"
|
| 23 |
+
self._profiles_root = Path(__file__).parent / "profiles"
|
| 24 |
+
self._tools_dir = Path(__file__).parent / "tools"
|
| 25 |
+
self._prompts_dir = Path(__file__).parent / "prompts"
|
| 26 |
+
|
| 27 |
+
# Components (initialized in create_components)
|
| 28 |
+
self.personalities_dropdown: gr.Dropdown
|
| 29 |
+
self.apply_btn: gr.Button
|
| 30 |
+
self.status_md: gr.Markdown
|
| 31 |
+
self.preview_md: gr.Markdown
|
| 32 |
+
self.person_name_tb: gr.Textbox
|
| 33 |
+
self.person_instr_ta: gr.TextArea
|
| 34 |
+
self.tools_txt_ta: gr.TextArea
|
| 35 |
+
self.voice_dropdown: gr.Dropdown
|
| 36 |
+
self.new_personality_btn: gr.Button
|
| 37 |
+
self.available_tools_cg: gr.CheckboxGroup
|
| 38 |
+
self.save_btn: gr.Button
|
| 39 |
+
|
| 40 |
+
# ---------- Filesystem helpers ----------
|
| 41 |
+
def _list_personalities(self) -> list[str]:
|
| 42 |
+
names: list[str] = []
|
| 43 |
+
try:
|
| 44 |
+
if self._profiles_root.exists():
|
| 45 |
+
for p in sorted(self._profiles_root.iterdir()):
|
| 46 |
+
if p.name == "user_personalities":
|
| 47 |
+
continue
|
| 48 |
+
if p.is_dir() and (p / "instructions.txt").exists():
|
| 49 |
+
names.append(p.name)
|
| 50 |
+
user_dir = self._profiles_root / "user_personalities"
|
| 51 |
+
if user_dir.exists():
|
| 52 |
+
for p in sorted(user_dir.iterdir()):
|
| 53 |
+
if p.is_dir() and (p / "instructions.txt").exists():
|
| 54 |
+
names.append(f"user_personalities/{p.name}")
|
| 55 |
+
except Exception:
|
| 56 |
+
pass
|
| 57 |
+
return names
|
| 58 |
+
|
| 59 |
+
def _resolve_profile_dir(self, selection: str) -> Path:
|
| 60 |
+
return self._profiles_root / selection
|
| 61 |
+
|
| 62 |
+
def _read_instructions_for(self, name: str) -> str:
|
| 63 |
+
try:
|
| 64 |
+
if name == self.DEFAULT_OPTION:
|
| 65 |
+
default_file = self._prompts_dir / "default_prompt.txt"
|
| 66 |
+
if default_file.exists():
|
| 67 |
+
return default_file.read_text(encoding="utf-8").strip()
|
| 68 |
+
return ""
|
| 69 |
+
target = self._resolve_profile_dir(name) / "instructions.txt"
|
| 70 |
+
if target.exists():
|
| 71 |
+
return target.read_text(encoding="utf-8").strip()
|
| 72 |
+
return ""
|
| 73 |
+
except Exception as e:
|
| 74 |
+
return f"Could not load instructions: {e}"
|
| 75 |
+
|
| 76 |
+
@staticmethod
|
| 77 |
+
def _sanitize_name(name: str) -> str:
|
| 78 |
+
import re
|
| 79 |
+
|
| 80 |
+
s = name.strip()
|
| 81 |
+
s = re.sub(r"\s+", "_", s)
|
| 82 |
+
s = re.sub(r"[^a-zA-Z0-9_-]", "", s)
|
| 83 |
+
return s
|
| 84 |
+
|
| 85 |
+
# ---------- Public API ----------
|
| 86 |
+
def create_components(self) -> None:
|
| 87 |
+
"""Instantiate Gradio components for the personality UI."""
|
| 88 |
+
if LOCKED_PROFILE is not None:
|
| 89 |
+
is_locked = True
|
| 90 |
+
current_value: str = LOCKED_PROFILE
|
| 91 |
+
dropdown_label = "Select personality (locked)"
|
| 92 |
+
dropdown_choices: list[str] = [LOCKED_PROFILE]
|
| 93 |
+
else:
|
| 94 |
+
is_locked = False
|
| 95 |
+
current_value = config.REACHY_MINI_CUSTOM_PROFILE or self.DEFAULT_OPTION
|
| 96 |
+
dropdown_label = "Select personality"
|
| 97 |
+
dropdown_choices = [self.DEFAULT_OPTION, *(self._list_personalities())]
|
| 98 |
+
|
| 99 |
+
self.personalities_dropdown = gr.Dropdown(
|
| 100 |
+
label=dropdown_label,
|
| 101 |
+
choices=dropdown_choices,
|
| 102 |
+
value=current_value,
|
| 103 |
+
interactive=not is_locked,
|
| 104 |
+
)
|
| 105 |
+
self.apply_btn = gr.Button("Apply personality", interactive=not is_locked)
|
| 106 |
+
self.status_md = gr.Markdown(visible=True)
|
| 107 |
+
self.preview_md = gr.Markdown(value=self._read_instructions_for(current_value))
|
| 108 |
+
self.person_name_tb = gr.Textbox(label="Personality name", interactive=not is_locked)
|
| 109 |
+
self.person_instr_ta = gr.TextArea(label="Personality instructions", lines=10, interactive=not is_locked)
|
| 110 |
+
self.tools_txt_ta = gr.TextArea(label="tools.txt", lines=10, interactive=not is_locked)
|
| 111 |
+
self.voice_dropdown = gr.Dropdown(label="Voice", choices=["marin"], value="marin", interactive=not is_locked)
|
| 112 |
+
self.new_personality_btn = gr.Button("New personality", interactive=not is_locked)
|
| 113 |
+
self.available_tools_cg = gr.CheckboxGroup(label="Available tools (helper)", choices=[], value=[], interactive=not is_locked)
|
| 114 |
+
self.save_btn = gr.Button("Save personality (instructions + tools)", interactive=not is_locked)
|
| 115 |
+
|
| 116 |
+
def additional_inputs_ordered(self) -> list[Any]:
|
| 117 |
+
"""Return the additional inputs in the expected order for Stream."""
|
| 118 |
+
return [
|
| 119 |
+
self.personalities_dropdown,
|
| 120 |
+
self.apply_btn,
|
| 121 |
+
self.new_personality_btn,
|
| 122 |
+
self.status_md,
|
| 123 |
+
self.preview_md,
|
| 124 |
+
self.person_name_tb,
|
| 125 |
+
self.person_instr_ta,
|
| 126 |
+
self.tools_txt_ta,
|
| 127 |
+
self.voice_dropdown,
|
| 128 |
+
self.available_tools_cg,
|
| 129 |
+
self.save_btn,
|
| 130 |
+
]
|
| 131 |
+
|
| 132 |
+
# ---------- Event wiring ----------
|
| 133 |
+
def wire_events(self, handler: Any, blocks: gr.Blocks) -> None:
|
| 134 |
+
"""Attach event handlers to components within a Blocks context."""
|
| 135 |
+
|
| 136 |
+
async def _apply_personality(selected: str) -> tuple[str, str]:
|
| 137 |
+
if LOCKED_PROFILE is not None and selected != LOCKED_PROFILE:
|
| 138 |
+
return (
|
| 139 |
+
f"Profile is locked to '{LOCKED_PROFILE}'. Cannot change personality.",
|
| 140 |
+
self._read_instructions_for(LOCKED_PROFILE),
|
| 141 |
+
)
|
| 142 |
+
profile = None if selected == self.DEFAULT_OPTION else selected
|
| 143 |
+
status = await handler.apply_personality(profile)
|
| 144 |
+
preview = self._read_instructions_for(selected)
|
| 145 |
+
return status, preview
|
| 146 |
+
|
| 147 |
+
def _read_voice_for(name: str) -> str:
|
| 148 |
+
try:
|
| 149 |
+
if name == self.DEFAULT_OPTION:
|
| 150 |
+
return "marin"
|
| 151 |
+
vf = self._resolve_profile_dir(name) / "voice.txt"
|
| 152 |
+
if vf.exists():
|
| 153 |
+
v = vf.read_text(encoding="utf-8").strip()
|
| 154 |
+
return v or "marin"
|
| 155 |
+
except Exception:
|
| 156 |
+
pass
|
| 157 |
+
return "marin"
|
| 158 |
+
|
| 159 |
+
async def _fetch_voices(selected: str) -> dict[str, Any]:
|
| 160 |
+
try:
|
| 161 |
+
voices = await handler.get_available_voices()
|
| 162 |
+
current = _read_voice_for(selected)
|
| 163 |
+
if current not in voices:
|
| 164 |
+
current = "marin"
|
| 165 |
+
return gr.update(choices=voices, value=current)
|
| 166 |
+
except Exception:
|
| 167 |
+
return gr.update(choices=["marin"], value="marin")
|
| 168 |
+
|
| 169 |
+
def _available_tools_for(selected: str) -> tuple[list[str], list[str]]:
|
| 170 |
+
shared: list[str] = []
|
| 171 |
+
try:
|
| 172 |
+
for py in self._tools_dir.glob("*.py"):
|
| 173 |
+
if py.stem in {"__init__", "core_tools"}:
|
| 174 |
+
continue
|
| 175 |
+
shared.append(py.stem)
|
| 176 |
+
except Exception:
|
| 177 |
+
pass
|
| 178 |
+
local: list[str] = []
|
| 179 |
+
try:
|
| 180 |
+
if selected != self.DEFAULT_OPTION:
|
| 181 |
+
for py in (self._profiles_root / selected).glob("*.py"):
|
| 182 |
+
local.append(py.stem)
|
| 183 |
+
except Exception:
|
| 184 |
+
pass
|
| 185 |
+
return sorted(shared), sorted(local)
|
| 186 |
+
|
| 187 |
+
def _parse_enabled_tools(text: str) -> list[str]:
|
| 188 |
+
enabled: list[str] = []
|
| 189 |
+
for line in text.splitlines():
|
| 190 |
+
s = line.strip()
|
| 191 |
+
if not s or s.startswith("#"):
|
| 192 |
+
continue
|
| 193 |
+
enabled.append(s)
|
| 194 |
+
return enabled
|
| 195 |
+
|
| 196 |
+
def _load_profile_for_edit(selected: str) -> tuple[dict[str, Any], dict[str, Any], dict[str, Any], str]:
|
| 197 |
+
instr = self._read_instructions_for(selected)
|
| 198 |
+
tools_txt = ""
|
| 199 |
+
if selected != self.DEFAULT_OPTION:
|
| 200 |
+
tp = self._resolve_profile_dir(selected) / "tools.txt"
|
| 201 |
+
if tp.exists():
|
| 202 |
+
tools_txt = tp.read_text(encoding="utf-8")
|
| 203 |
+
shared, local = _available_tools_for(selected)
|
| 204 |
+
all_tools = sorted(set(shared + local))
|
| 205 |
+
enabled = _parse_enabled_tools(tools_txt)
|
| 206 |
+
status_text = f"Loaded profile '{selected}'."
|
| 207 |
+
return (
|
| 208 |
+
gr.update(value=instr),
|
| 209 |
+
gr.update(value=tools_txt),
|
| 210 |
+
gr.update(choices=all_tools, value=enabled),
|
| 211 |
+
status_text,
|
| 212 |
+
)
|
| 213 |
+
|
| 214 |
+
def _new_personality() -> tuple[
|
| 215 |
+
dict[str, Any], dict[str, Any], dict[str, Any], dict[str, Any], str, dict[str, Any]
|
| 216 |
+
]:
|
| 217 |
+
try:
|
| 218 |
+
# Prefill with hints
|
| 219 |
+
instr_val = """# Write your instructions here\n# e.g., Keep responses concise and friendly."""
|
| 220 |
+
tools_txt_val = "# tools enabled for this profile\n"
|
| 221 |
+
return (
|
| 222 |
+
gr.update(value=""),
|
| 223 |
+
gr.update(value=instr_val),
|
| 224 |
+
gr.update(value=tools_txt_val),
|
| 225 |
+
gr.update(choices=sorted(_available_tools_for(self.DEFAULT_OPTION)[0]), value=[]),
|
| 226 |
+
"Fill in a name, instructions and (optional) tools, then Save.",
|
| 227 |
+
gr.update(value="marin"),
|
| 228 |
+
)
|
| 229 |
+
except Exception:
|
| 230 |
+
return (
|
| 231 |
+
gr.update(),
|
| 232 |
+
gr.update(),
|
| 233 |
+
gr.update(),
|
| 234 |
+
gr.update(),
|
| 235 |
+
"Failed to initialize new personality.",
|
| 236 |
+
gr.update(),
|
| 237 |
+
)
|
| 238 |
+
|
| 239 |
+
def _save_personality(
|
| 240 |
+
name: str, instructions: str, tools_text: str, voice: str
|
| 241 |
+
) -> tuple[dict[str, Any], dict[str, Any], str]:
|
| 242 |
+
name_s = self._sanitize_name(name)
|
| 243 |
+
if not name_s:
|
| 244 |
+
return gr.update(), gr.update(), "Please enter a valid name."
|
| 245 |
+
try:
|
| 246 |
+
target_dir = self._profiles_root / "user_personalities" / name_s
|
| 247 |
+
target_dir.mkdir(parents=True, exist_ok=True)
|
| 248 |
+
(target_dir / "instructions.txt").write_text(instructions.strip() + "\n", encoding="utf-8")
|
| 249 |
+
(target_dir / "tools.txt").write_text(tools_text.strip() + "\n", encoding="utf-8")
|
| 250 |
+
(target_dir / "voice.txt").write_text((voice or "marin").strip() + "\n", encoding="utf-8")
|
| 251 |
+
|
| 252 |
+
choices = self._list_personalities()
|
| 253 |
+
value = f"user_personalities/{name_s}"
|
| 254 |
+
if value not in choices:
|
| 255 |
+
choices.append(value)
|
| 256 |
+
return (
|
| 257 |
+
gr.update(choices=[self.DEFAULT_OPTION, *sorted(choices)], value=value),
|
| 258 |
+
gr.update(value=instructions),
|
| 259 |
+
f"Saved personality '{name_s}'.",
|
| 260 |
+
)
|
| 261 |
+
except Exception as e:
|
| 262 |
+
return gr.update(), gr.update(), f"Failed to save personality: {e}"
|
| 263 |
+
|
| 264 |
+
def _sync_tools_from_checks(selected: list[str], current_text: str) -> dict[str, Any]:
|
| 265 |
+
comments = [ln for ln in current_text.splitlines() if ln.strip().startswith("#")]
|
| 266 |
+
body = "\n".join(selected)
|
| 267 |
+
out = ("\n".join(comments) + ("\n" if comments else "") + body).strip() + "\n"
|
| 268 |
+
return gr.update(value=out)
|
| 269 |
+
|
| 270 |
+
with blocks:
|
| 271 |
+
self.apply_btn.click(
|
| 272 |
+
fn=_apply_personality,
|
| 273 |
+
inputs=[self.personalities_dropdown],
|
| 274 |
+
outputs=[self.status_md, self.preview_md],
|
| 275 |
+
)
|
| 276 |
+
|
| 277 |
+
self.personalities_dropdown.change(
|
| 278 |
+
fn=_load_profile_for_edit,
|
| 279 |
+
inputs=[self.personalities_dropdown],
|
| 280 |
+
outputs=[self.person_instr_ta, self.tools_txt_ta, self.available_tools_cg, self.status_md],
|
| 281 |
+
)
|
| 282 |
+
|
| 283 |
+
blocks.load(
|
| 284 |
+
fn=_fetch_voices,
|
| 285 |
+
inputs=[self.personalities_dropdown],
|
| 286 |
+
outputs=[self.voice_dropdown],
|
| 287 |
+
)
|
| 288 |
+
|
| 289 |
+
self.available_tools_cg.change(
|
| 290 |
+
fn=_sync_tools_from_checks,
|
| 291 |
+
inputs=[self.available_tools_cg, self.tools_txt_ta],
|
| 292 |
+
outputs=[self.tools_txt_ta],
|
| 293 |
+
)
|
| 294 |
+
|
| 295 |
+
self.new_personality_btn.click(
|
| 296 |
+
fn=_new_personality,
|
| 297 |
+
inputs=[],
|
| 298 |
+
outputs=[
|
| 299 |
+
self.person_name_tb,
|
| 300 |
+
self.person_instr_ta,
|
| 301 |
+
self.tools_txt_ta,
|
| 302 |
+
self.available_tools_cg,
|
| 303 |
+
self.status_md,
|
| 304 |
+
self.voice_dropdown,
|
| 305 |
+
],
|
| 306 |
+
)
|
| 307 |
+
|
| 308 |
+
self.save_btn.click(
|
| 309 |
+
fn=_save_personality,
|
| 310 |
+
inputs=[self.person_name_tb, self.person_instr_ta, self.tools_txt_ta, self.voice_dropdown],
|
| 311 |
+
outputs=[self.personalities_dropdown, self.person_instr_ta, self.status_md],
|
| 312 |
+
).then(
|
| 313 |
+
fn=_apply_personality,
|
| 314 |
+
inputs=[self.personalities_dropdown],
|
| 315 |
+
outputs=[self.status_md, self.preview_md],
|
| 316 |
+
)
|
src/reachy_mini_receptionist/headless_personality.py
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Headless personality management (console-based).
|
| 2 |
+
|
| 3 |
+
Provides an interactive CLI to browse, preview, apply, create and edit
|
| 4 |
+
"personalities" (profiles) when running without Gradio.
|
| 5 |
+
|
| 6 |
+
This module is intentionally not shared with the Gradio implementation to
|
| 7 |
+
avoid coupling and keep responsibilities clear for headless mode.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
from __future__ import annotations
|
| 11 |
+
from typing import List
|
| 12 |
+
from pathlib import Path
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
DEFAULT_OPTION = "(built-in default)"
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def _profiles_root() -> Path:
|
| 19 |
+
return Path(__file__).parent / "profiles"
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def _prompts_dir() -> Path:
|
| 23 |
+
return Path(__file__).parent / "prompts"
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def _tools_dir() -> Path:
|
| 27 |
+
return Path(__file__).parent / "tools"
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def _sanitize_name(name: str) -> str:
|
| 31 |
+
import re
|
| 32 |
+
|
| 33 |
+
s = name.strip()
|
| 34 |
+
s = re.sub(r"\s+", "_", s)
|
| 35 |
+
s = re.sub(r"[^a-zA-Z0-9_-]", "", s)
|
| 36 |
+
return s
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def list_personalities() -> List[str]:
|
| 40 |
+
"""List available personality profile names."""
|
| 41 |
+
names: List[str] = []
|
| 42 |
+
root = _profiles_root()
|
| 43 |
+
try:
|
| 44 |
+
if root.exists():
|
| 45 |
+
for p in sorted(root.iterdir()):
|
| 46 |
+
if p.name == "user_personalities":
|
| 47 |
+
continue
|
| 48 |
+
if p.is_dir() and (p / "instructions.txt").exists():
|
| 49 |
+
names.append(p.name)
|
| 50 |
+
udir = root / "user_personalities"
|
| 51 |
+
if udir.exists():
|
| 52 |
+
for p in sorted(udir.iterdir()):
|
| 53 |
+
if p.is_dir() and (p / "instructions.txt").exists():
|
| 54 |
+
names.append(f"user_personalities/{p.name}")
|
| 55 |
+
except Exception:
|
| 56 |
+
pass
|
| 57 |
+
return names
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def resolve_profile_dir(selection: str) -> Path:
|
| 61 |
+
"""Resolve the directory path for the given profile selection."""
|
| 62 |
+
return _profiles_root() / selection
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def read_instructions_for(name: str) -> str:
|
| 66 |
+
"""Read the instructions.txt content for the given profile name."""
|
| 67 |
+
try:
|
| 68 |
+
if name == DEFAULT_OPTION:
|
| 69 |
+
df = _prompts_dir() / "default_prompt.txt"
|
| 70 |
+
return df.read_text(encoding="utf-8").strip() if df.exists() else ""
|
| 71 |
+
target = resolve_profile_dir(name) / "instructions.txt"
|
| 72 |
+
return target.read_text(encoding="utf-8").strip() if target.exists() else ""
|
| 73 |
+
except Exception as e:
|
| 74 |
+
return f"Could not load instructions: {e}"
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def available_tools_for(selected: str) -> List[str]:
|
| 78 |
+
"""List available tool modules for the given profile selection."""
|
| 79 |
+
shared: List[str] = []
|
| 80 |
+
try:
|
| 81 |
+
for py in _tools_dir().glob("*.py"):
|
| 82 |
+
if py.stem in {"__init__", "core_tools"}:
|
| 83 |
+
continue
|
| 84 |
+
shared.append(py.stem)
|
| 85 |
+
except Exception:
|
| 86 |
+
pass
|
| 87 |
+
local: List[str] = []
|
| 88 |
+
try:
|
| 89 |
+
if selected != DEFAULT_OPTION:
|
| 90 |
+
for py in resolve_profile_dir(selected).glob("*.py"):
|
| 91 |
+
local.append(py.stem)
|
| 92 |
+
except Exception:
|
| 93 |
+
pass
|
| 94 |
+
return sorted(set(shared + local))
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def _write_profile(name_s: str, instructions: str, tools_text: str, voice: str = "marin") -> None:
|
| 98 |
+
target_dir = _profiles_root() / "user_personalities" / name_s
|
| 99 |
+
target_dir.mkdir(parents=True, exist_ok=True)
|
| 100 |
+
(target_dir / "instructions.txt").write_text(instructions.strip() + "\n", encoding="utf-8")
|
| 101 |
+
(target_dir / "tools.txt").write_text((tools_text or "").strip() + "\n", encoding="utf-8")
|
| 102 |
+
(target_dir / "voice.txt").write_text((voice or "marin").strip() + "\n", encoding="utf-8")
|
src/reachy_mini_receptionist/headless_personality_ui.py
ADDED
|
@@ -0,0 +1,287 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Settings UI routes for headless personality management.
|
| 2 |
+
|
| 3 |
+
Exposes REST endpoints on the provided FastAPI settings app. The
|
| 4 |
+
implementation schedules backend actions (apply personality, fetch voices)
|
| 5 |
+
onto the running LocalStream asyncio loop using the supplied get_loop
|
| 6 |
+
callable to avoid cross-thread issues.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from __future__ import annotations
|
| 10 |
+
import asyncio
|
| 11 |
+
import logging
|
| 12 |
+
from typing import Any, Callable, Optional
|
| 13 |
+
|
| 14 |
+
from fastapi import FastAPI
|
| 15 |
+
|
| 16 |
+
from .config import LOCKED_PROFILE, config
|
| 17 |
+
from .openai_realtime import OpenaiRealtimeHandler
|
| 18 |
+
from .headless_personality import (
|
| 19 |
+
DEFAULT_OPTION,
|
| 20 |
+
_sanitize_name,
|
| 21 |
+
_write_profile,
|
| 22 |
+
list_personalities,
|
| 23 |
+
available_tools_for,
|
| 24 |
+
resolve_profile_dir,
|
| 25 |
+
read_instructions_for,
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def mount_personality_routes(
|
| 30 |
+
app: FastAPI,
|
| 31 |
+
handler: OpenaiRealtimeHandler,
|
| 32 |
+
get_loop: Callable[[], asyncio.AbstractEventLoop | None],
|
| 33 |
+
*,
|
| 34 |
+
persist_personality: Callable[[Optional[str]], None] | None = None,
|
| 35 |
+
get_persisted_personality: Callable[[], Optional[str]] | None = None,
|
| 36 |
+
) -> None:
|
| 37 |
+
"""Register personality management endpoints on a FastAPI app."""
|
| 38 |
+
try:
|
| 39 |
+
from fastapi import Request
|
| 40 |
+
from pydantic import BaseModel
|
| 41 |
+
from fastapi.responses import JSONResponse
|
| 42 |
+
except Exception: # pragma: no cover - only when settings app not available
|
| 43 |
+
return
|
| 44 |
+
|
| 45 |
+
class SavePayload(BaseModel):
|
| 46 |
+
name: str
|
| 47 |
+
instructions: str
|
| 48 |
+
tools_text: str
|
| 49 |
+
voice: Optional[str] = "marin"
|
| 50 |
+
|
| 51 |
+
class ApplyPayload(BaseModel):
|
| 52 |
+
name: str
|
| 53 |
+
persist: Optional[bool] = False
|
| 54 |
+
|
| 55 |
+
def _startup_choice() -> Any:
|
| 56 |
+
"""Return the persisted startup personality or default."""
|
| 57 |
+
try:
|
| 58 |
+
if get_persisted_personality is not None:
|
| 59 |
+
stored = get_persisted_personality()
|
| 60 |
+
if stored:
|
| 61 |
+
return stored
|
| 62 |
+
env_val = getattr(config, "REACHY_MINI_CUSTOM_PROFILE", None)
|
| 63 |
+
if env_val:
|
| 64 |
+
return env_val
|
| 65 |
+
except Exception:
|
| 66 |
+
pass
|
| 67 |
+
return DEFAULT_OPTION
|
| 68 |
+
|
| 69 |
+
def _current_choice() -> str:
|
| 70 |
+
try:
|
| 71 |
+
cur = getattr(config, "REACHY_MINI_CUSTOM_PROFILE", None)
|
| 72 |
+
return cur or DEFAULT_OPTION
|
| 73 |
+
except Exception:
|
| 74 |
+
return DEFAULT_OPTION
|
| 75 |
+
|
| 76 |
+
@app.get("/personalities")
|
| 77 |
+
def _list() -> dict: # type: ignore
|
| 78 |
+
choices = [DEFAULT_OPTION, *list_personalities()]
|
| 79 |
+
return {
|
| 80 |
+
"choices": choices,
|
| 81 |
+
"current": _current_choice(),
|
| 82 |
+
"startup": _startup_choice(),
|
| 83 |
+
"locked": LOCKED_PROFILE is not None,
|
| 84 |
+
"locked_to": LOCKED_PROFILE,
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
@app.get("/personalities/load")
|
| 88 |
+
def _load(name: str) -> dict: # type: ignore
|
| 89 |
+
instr = read_instructions_for(name)
|
| 90 |
+
tools_txt = ""
|
| 91 |
+
voice = "marin"
|
| 92 |
+
if name != DEFAULT_OPTION:
|
| 93 |
+
pdir = resolve_profile_dir(name)
|
| 94 |
+
tp = pdir / "tools.txt"
|
| 95 |
+
if tp.exists():
|
| 96 |
+
tools_txt = tp.read_text(encoding="utf-8")
|
| 97 |
+
vf = pdir / "voice.txt"
|
| 98 |
+
if vf.exists():
|
| 99 |
+
v = vf.read_text(encoding="utf-8").strip()
|
| 100 |
+
voice = v or "marin"
|
| 101 |
+
avail = available_tools_for(name)
|
| 102 |
+
enabled = [ln.strip() for ln in tools_txt.splitlines() if ln.strip() and not ln.strip().startswith("#")]
|
| 103 |
+
return {
|
| 104 |
+
"instructions": instr,
|
| 105 |
+
"tools_text": tools_txt,
|
| 106 |
+
"voice": voice,
|
| 107 |
+
"available_tools": avail,
|
| 108 |
+
"enabled_tools": enabled,
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
@app.post("/personalities/save")
|
| 112 |
+
async def _save(request: Request) -> dict: # type: ignore
|
| 113 |
+
# Accept raw JSON only to avoid validation-related 422s
|
| 114 |
+
try:
|
| 115 |
+
raw = await request.json()
|
| 116 |
+
except Exception:
|
| 117 |
+
raw = {}
|
| 118 |
+
name = str(raw.get("name", ""))
|
| 119 |
+
instructions = str(raw.get("instructions", ""))
|
| 120 |
+
tools_text = str(raw.get("tools_text", ""))
|
| 121 |
+
voice = str(raw.get("voice", "marin")) if raw.get("voice") is not None else "marin"
|
| 122 |
+
|
| 123 |
+
name_s = _sanitize_name(name)
|
| 124 |
+
if not name_s:
|
| 125 |
+
return JSONResponse({"ok": False, "error": "invalid_name"}, status_code=400) # type: ignore
|
| 126 |
+
try:
|
| 127 |
+
logger.info(
|
| 128 |
+
"Headless save: name=%r voice=%r instr_len=%d tools_len=%d",
|
| 129 |
+
name_s,
|
| 130 |
+
voice,
|
| 131 |
+
len(instructions),
|
| 132 |
+
len(tools_text),
|
| 133 |
+
)
|
| 134 |
+
_write_profile(name_s, instructions, tools_text, voice or "marin")
|
| 135 |
+
value = f"user_personalities/{name_s}"
|
| 136 |
+
choices = [DEFAULT_OPTION, *list_personalities()]
|
| 137 |
+
return {"ok": True, "value": value, "choices": choices}
|
| 138 |
+
except Exception as e:
|
| 139 |
+
return JSONResponse({"ok": False, "error": str(e)}, status_code=500) # type: ignore
|
| 140 |
+
|
| 141 |
+
@app.post("/personalities/save_raw")
|
| 142 |
+
async def _save_raw(
|
| 143 |
+
request: Request,
|
| 144 |
+
name: Optional[str] = None,
|
| 145 |
+
instructions: Optional[str] = None,
|
| 146 |
+
tools_text: Optional[str] = None,
|
| 147 |
+
voice: Optional[str] = None,
|
| 148 |
+
) -> dict: # type: ignore
|
| 149 |
+
# Accept query params, form-encoded, or raw JSON
|
| 150 |
+
data = {"name": name, "instructions": instructions, "tools_text": tools_text, "voice": voice}
|
| 151 |
+
# Prefer form if present
|
| 152 |
+
try:
|
| 153 |
+
form = await request.form()
|
| 154 |
+
for k in ("name", "instructions", "tools_text", "voice"):
|
| 155 |
+
if k in form and form[k] is not None:
|
| 156 |
+
data[k] = str(form[k])
|
| 157 |
+
except Exception:
|
| 158 |
+
pass
|
| 159 |
+
# Try JSON
|
| 160 |
+
try:
|
| 161 |
+
raw = await request.json()
|
| 162 |
+
if isinstance(raw, dict):
|
| 163 |
+
for k in ("name", "instructions", "tools_text", "voice"):
|
| 164 |
+
if raw.get(k) is not None:
|
| 165 |
+
data[k] = str(raw.get(k))
|
| 166 |
+
except Exception:
|
| 167 |
+
pass
|
| 168 |
+
|
| 169 |
+
name_s = _sanitize_name(str(data.get("name") or ""))
|
| 170 |
+
if not name_s:
|
| 171 |
+
return JSONResponse({"ok": False, "error": "invalid_name"}, status_code=400) # type: ignore
|
| 172 |
+
instr = str(data.get("instructions") or "")
|
| 173 |
+
tools = str(data.get("tools_text") or "")
|
| 174 |
+
v = str(data.get("voice") or "marin")
|
| 175 |
+
try:
|
| 176 |
+
logger.info(
|
| 177 |
+
"Headless save_raw: name=%r voice=%r instr_len=%d tools_len=%d", name_s, v, len(instr), len(tools)
|
| 178 |
+
)
|
| 179 |
+
_write_profile(name_s, instr, tools, v)
|
| 180 |
+
value = f"user_personalities/{name_s}"
|
| 181 |
+
choices = [DEFAULT_OPTION, *list_personalities()]
|
| 182 |
+
return {"ok": True, "value": value, "choices": choices}
|
| 183 |
+
except Exception as e:
|
| 184 |
+
return JSONResponse({"ok": False, "error": str(e)}, status_code=500) # type: ignore
|
| 185 |
+
|
| 186 |
+
@app.get("/personalities/save_raw")
|
| 187 |
+
async def _save_raw_get(name: str, instructions: str = "", tools_text: str = "", voice: str = "marin") -> dict: # type: ignore
|
| 188 |
+
name_s = _sanitize_name(name)
|
| 189 |
+
if not name_s:
|
| 190 |
+
return JSONResponse({"ok": False, "error": "invalid_name"}, status_code=400) # type: ignore
|
| 191 |
+
try:
|
| 192 |
+
logger.info(
|
| 193 |
+
"Headless save_raw(GET): name=%r voice=%r instr_len=%d tools_len=%d",
|
| 194 |
+
name_s,
|
| 195 |
+
voice,
|
| 196 |
+
len(instructions),
|
| 197 |
+
len(tools_text),
|
| 198 |
+
)
|
| 199 |
+
_write_profile(name_s, instructions, tools_text, voice or "marin")
|
| 200 |
+
value = f"user_personalities/{name_s}"
|
| 201 |
+
choices = [DEFAULT_OPTION, *list_personalities()]
|
| 202 |
+
return {"ok": True, "value": value, "choices": choices}
|
| 203 |
+
except Exception as e:
|
| 204 |
+
return JSONResponse({"ok": False, "error": str(e)}, status_code=500) # type: ignore
|
| 205 |
+
|
| 206 |
+
logger = logging.getLogger(__name__)
|
| 207 |
+
|
| 208 |
+
@app.post("/personalities/apply")
|
| 209 |
+
async def _apply(
|
| 210 |
+
payload: ApplyPayload | None = None,
|
| 211 |
+
name: str | None = None,
|
| 212 |
+
persist: Optional[bool] = None,
|
| 213 |
+
request: Optional[Request] = None,
|
| 214 |
+
) -> dict: # type: ignore
|
| 215 |
+
if LOCKED_PROFILE is not None:
|
| 216 |
+
return JSONResponse(
|
| 217 |
+
{"ok": False, "error": "profile_locked", "locked_to": LOCKED_PROFILE},
|
| 218 |
+
status_code=403,
|
| 219 |
+
) # type: ignore
|
| 220 |
+
loop = get_loop()
|
| 221 |
+
if loop is None:
|
| 222 |
+
return JSONResponse({"ok": False, "error": "loop_unavailable"}, status_code=503) # type: ignore
|
| 223 |
+
|
| 224 |
+
# Accept both JSON payload and query param for convenience
|
| 225 |
+
sel_name: Optional[str] = None
|
| 226 |
+
persist_flag = bool(persist) if persist is not None else False
|
| 227 |
+
if payload and getattr(payload, "name", None):
|
| 228 |
+
sel_name = payload.name
|
| 229 |
+
persist_flag = bool(getattr(payload, "persist", False))
|
| 230 |
+
elif name:
|
| 231 |
+
sel_name = name
|
| 232 |
+
elif request is not None:
|
| 233 |
+
try:
|
| 234 |
+
body = await request.json()
|
| 235 |
+
if isinstance(body, dict) and body.get("name"):
|
| 236 |
+
sel_name = str(body.get("name"))
|
| 237 |
+
if isinstance(body, dict) and "persist" in body:
|
| 238 |
+
persist_flag = bool(body.get("persist"))
|
| 239 |
+
except Exception:
|
| 240 |
+
sel_name = None
|
| 241 |
+
if request is not None:
|
| 242 |
+
try:
|
| 243 |
+
q_persist = request.query_params.get("persist")
|
| 244 |
+
if q_persist is not None:
|
| 245 |
+
persist_flag = str(q_persist).lower() in {"1", "true", "yes", "on"}
|
| 246 |
+
except Exception:
|
| 247 |
+
pass
|
| 248 |
+
if not sel_name:
|
| 249 |
+
sel_name = DEFAULT_OPTION
|
| 250 |
+
|
| 251 |
+
async def _do_apply() -> str:
|
| 252 |
+
sel = None if sel_name == DEFAULT_OPTION else sel_name
|
| 253 |
+
status = await handler.apply_personality(sel)
|
| 254 |
+
return status
|
| 255 |
+
|
| 256 |
+
try:
|
| 257 |
+
logger.info("Headless apply: requested name=%r", sel_name)
|
| 258 |
+
fut = asyncio.run_coroutine_threadsafe(_do_apply(), loop)
|
| 259 |
+
status = fut.result(timeout=10)
|
| 260 |
+
persisted_choice = _startup_choice()
|
| 261 |
+
if persist_flag and persist_personality is not None:
|
| 262 |
+
try:
|
| 263 |
+
persist_personality(None if sel_name == DEFAULT_OPTION else sel_name)
|
| 264 |
+
persisted_choice = _startup_choice()
|
| 265 |
+
except Exception as e:
|
| 266 |
+
logger.warning("Failed to persist startup personality: %s", e)
|
| 267 |
+
return {"ok": True, "status": status, "startup": persisted_choice}
|
| 268 |
+
except Exception as e:
|
| 269 |
+
return JSONResponse({"ok": False, "error": str(e)}, status_code=500) # type: ignore
|
| 270 |
+
|
| 271 |
+
@app.get("/voices")
|
| 272 |
+
async def _voices() -> list[str]:
|
| 273 |
+
loop = get_loop()
|
| 274 |
+
if loop is None:
|
| 275 |
+
return ["marin"]
|
| 276 |
+
|
| 277 |
+
async def _get_v() -> list[str]:
|
| 278 |
+
try:
|
| 279 |
+
return await handler.get_available_voices()
|
| 280 |
+
except Exception:
|
| 281 |
+
return ["marin"]
|
| 282 |
+
|
| 283 |
+
try:
|
| 284 |
+
fut = asyncio.run_coroutine_threadsafe(_get_v(), loop)
|
| 285 |
+
return fut.result(timeout=10)
|
| 286 |
+
except Exception:
|
| 287 |
+
return ["marin"]
|
src/reachy_mini_receptionist/ical_calendar.py
ADDED
|
@@ -0,0 +1,248 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""iCal calendar source for the receptionist.
|
| 2 |
+
|
| 3 |
+
When ``RECEPTION_ICS_URL`` is set in the environment, ``calendar_data``
|
| 4 |
+
uses this module to fetch today's appointments. The URL is typically a
|
| 5 |
+
Google Calendar "Public address in iCal format" link
|
| 6 |
+
(Settings -> Integrate calendar -> Public address in iCal format).
|
| 7 |
+
When the URL is unset, ``calendar_data`` returns an empty schedule and
|
| 8 |
+
the receptionist serves walk-in visitors only (via ``lookup_employee``).
|
| 9 |
+
|
| 10 |
+
Operator convention for event titles:
|
| 11 |
+
|
| 12 |
+
"<Visitor name> with <Host name>"
|
| 13 |
+
|
| 14 |
+
Examples:
|
| 15 |
+
|
| 16 |
+
"Rohan Verma with Mukul"
|
| 17 |
+
"Sara Khan with Priya"
|
| 18 |
+
"David Lee with Arjun Mehta"
|
| 19 |
+
|
| 20 |
+
The host name (or alias) is matched against ``employees.py``. Add an
|
| 21 |
+
optional " — note" suffix to the title or use the event's DESCRIPTION
|
| 22 |
+
field for the note. The event's LOCATION field is used as a fallback host
|
| 23 |
+
when the title doesn't contain " with ".
|
| 24 |
+
|
| 25 |
+
Single-occurrence events only — RRULE recurrence is not expanded in v1.
|
| 26 |
+
"""
|
| 27 |
+
from __future__ import annotations
|
| 28 |
+
|
| 29 |
+
import logging
|
| 30 |
+
import os
|
| 31 |
+
import time
|
| 32 |
+
from datetime import date, datetime
|
| 33 |
+
from typing import Any, Dict, List, Optional, Tuple
|
| 34 |
+
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
|
| 35 |
+
|
| 36 |
+
import httpx
|
| 37 |
+
from icalendar import Calendar
|
| 38 |
+
|
| 39 |
+
logger = logging.getLogger(__name__)
|
| 40 |
+
|
| 41 |
+
_CACHE_TTL_SECONDS: float = 300.0
|
| 42 |
+
_HTTP_TIMEOUT_SECONDS: float = 10.0
|
| 43 |
+
|
| 44 |
+
_cache: Dict[str, Any] = {"fetched_at": 0.0, "data": [], "url": None, "valid": False}
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def _display_tz() -> Any:
|
| 48 |
+
"""Return the timezone to use for displaying iCal event times.
|
| 49 |
+
|
| 50 |
+
Read from ``RECEPTION_TIMEZONE`` env var (e.g. ``Asia/Kolkata``,
|
| 51 |
+
``America/New_York``). Defaults to ``Asia/Kolkata`` since the pilot
|
| 52 |
+
deployment is in India. Falls back to system local time on a bad
|
| 53 |
+
value rather than crashing the whole calendar fetch.
|
| 54 |
+
"""
|
| 55 |
+
raw = (os.getenv("RECEPTION_TIMEZONE") or "Asia/Kolkata").strip()
|
| 56 |
+
try:
|
| 57 |
+
return ZoneInfo(raw)
|
| 58 |
+
except ZoneInfoNotFoundError:
|
| 59 |
+
logger.warning(
|
| 60 |
+
"RECEPTION_TIMEZONE=%r is not a valid IANA tz name — "
|
| 61 |
+
"falling back to system local time",
|
| 62 |
+
raw,
|
| 63 |
+
)
|
| 64 |
+
return None # signals "use astimezone() default"
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
# Visitor/host separators the operator may write in event titles. Order
|
| 68 |
+
# matters — we try the most-specific phrasing first so "is here to see"
|
| 69 |
+
# wins over a bare "to" if both appear. Case-insensitive match.
|
| 70 |
+
_VISITOR_HOST_SEPARATORS: Tuple[str, ...] = (
|
| 71 |
+
" is here to see ",
|
| 72 |
+
" here to see ",
|
| 73 |
+
" to see ",
|
| 74 |
+
" meets with ",
|
| 75 |
+
" meeting with ",
|
| 76 |
+
" meeting ",
|
| 77 |
+
" meets ",
|
| 78 |
+
# Bare "meet" — added 2026-05-21 after the operator's calendar used
|
| 79 |
+
# the imperative form ("Krishna Meet Rohan", "Alex Meet Arjun"). Without
|
| 80 |
+
# this the title fails to split, host_query stays empty, and the bot
|
| 81 |
+
# falls back to a context-derived email instead of looking the host
|
| 82 |
+
# up in the employee directory.
|
| 83 |
+
" meet ",
|
| 84 |
+
" with ",
|
| 85 |
+
" for ",
|
| 86 |
+
" -> ",
|
| 87 |
+
" → ",
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def _parse_summary(summary: str) -> Tuple[str, str, Optional[str]]:
|
| 92 |
+
"""Split a SUMMARY into (visitor_name, host_query, inline_note).
|
| 93 |
+
|
| 94 |
+
Visitor/host separator: any of ``_VISITOR_HOST_SEPARATORS``
|
| 95 |
+
(case-insensitive). Note separator (applied to the rest after host
|
| 96 |
+
extraction): ``" — "``, ``" - "``, ``": "``.
|
| 97 |
+
|
| 98 |
+
Returns ``(text, "", None)`` if no visitor/host separator is found —
|
| 99 |
+
the caller can then fall back to LOCATION for the host.
|
| 100 |
+
"""
|
| 101 |
+
if not summary:
|
| 102 |
+
return ("", "", None)
|
| 103 |
+
text = summary.strip()
|
| 104 |
+
lower = text.lower()
|
| 105 |
+
sep_idx = -1
|
| 106 |
+
sep_len = 0
|
| 107 |
+
for sep in _VISITOR_HOST_SEPARATORS:
|
| 108 |
+
idx = lower.find(sep)
|
| 109 |
+
if idx >= 0 and (sep_idx < 0 or idx < sep_idx):
|
| 110 |
+
sep_idx = idx
|
| 111 |
+
sep_len = len(sep)
|
| 112 |
+
if sep_idx < 0:
|
| 113 |
+
return (text, "", None)
|
| 114 |
+
visitor = text[:sep_idx].strip()
|
| 115 |
+
rest = text[sep_idx + sep_len:].strip()
|
| 116 |
+
note: Optional[str] = None
|
| 117 |
+
for delim in (" — ", " - ", ": "):
|
| 118 |
+
d = rest.find(delim)
|
| 119 |
+
if d >= 0:
|
| 120 |
+
note = rest[d + len(delim):].strip()
|
| 121 |
+
rest = rest[:d].strip()
|
| 122 |
+
break
|
| 123 |
+
return (visitor, rest, note)
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
def _format_time(dt: Any) -> str:
|
| 127 |
+
"""Format a datetime as 'H:MM AM/PM' in the display timezone.
|
| 128 |
+
|
| 129 |
+
iCal feeds frequently serialise event times in UTC (or with a TZID
|
| 130 |
+
pointing to a different region). We convert to ``RECEPTION_TIMEZONE``
|
| 131 |
+
(default ``Asia/Kolkata``) before formatting so an operator
|
| 132 |
+
scheduling "1 PM" in IST sees "1:00 PM" on the dashboard, not
|
| 133 |
+
"7:30 AM" (UTC) — regardless of what timezone the robot's OS is in.
|
| 134 |
+
"""
|
| 135 |
+
if isinstance(dt, datetime):
|
| 136 |
+
if dt.tzinfo is not None:
|
| 137 |
+
tz = _display_tz()
|
| 138 |
+
local = dt.astimezone(tz) if tz is not None else dt.astimezone()
|
| 139 |
+
else:
|
| 140 |
+
local = dt
|
| 141 |
+
return local.strftime("%I:%M %p").lstrip("0")
|
| 142 |
+
return "all day"
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
def _local_event_date(start: Any) -> date:
|
| 146 |
+
"""Return the display-tz date of an event's DTSTART."""
|
| 147 |
+
if isinstance(start, datetime):
|
| 148 |
+
if start.tzinfo is not None:
|
| 149 |
+
tz = _display_tz()
|
| 150 |
+
return (start.astimezone(tz) if tz is not None else start.astimezone()).date()
|
| 151 |
+
return start.date()
|
| 152 |
+
return start # already a date
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
def _today_events(cal: Calendar, today: date) -> List[Dict[str, Any]]:
|
| 156 |
+
"""Walk a parsed Calendar and return the events that fall on ``today``."""
|
| 157 |
+
out: List[Dict[str, Any]] = []
|
| 158 |
+
for event in cal.walk("VEVENT"):
|
| 159 |
+
dtstart = event.get("DTSTART")
|
| 160 |
+
if dtstart is None:
|
| 161 |
+
continue
|
| 162 |
+
start = dtstart.dt
|
| 163 |
+
if _local_event_date(start) != today:
|
| 164 |
+
continue
|
| 165 |
+
summary = str(event.get("SUMMARY") or "")
|
| 166 |
+
description = str(event.get("DESCRIPTION") or "").strip()
|
| 167 |
+
location = str(event.get("LOCATION") or "").strip()
|
| 168 |
+
visitor, host_from_title, inline_note = _parse_summary(summary)
|
| 169 |
+
host_query = host_from_title or location
|
| 170 |
+
# Only surface a note when there's something beyond the title — never
|
| 171 |
+
# echo the SUMMARY back into ``note`` (that just makes the LLM
|
| 172 |
+
# context noisier without adding information).
|
| 173 |
+
note = inline_note or description or ""
|
| 174 |
+
out.append({
|
| 175 |
+
"time": _format_time(start),
|
| 176 |
+
"name": visitor or summary,
|
| 177 |
+
"note": note,
|
| 178 |
+
"_host_query": host_query,
|
| 179 |
+
"_dt": start,
|
| 180 |
+
})
|
| 181 |
+
out.sort(key=_sort_key)
|
| 182 |
+
return out
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
def _sort_key(event: Dict[str, Any]) -> datetime:
|
| 186 |
+
dt = event.get("_dt")
|
| 187 |
+
if isinstance(dt, datetime):
|
| 188 |
+
return dt.replace(tzinfo=None) if dt.tzinfo is None else dt.astimezone().replace(tzinfo=None)
|
| 189 |
+
if isinstance(dt, date):
|
| 190 |
+
return datetime.combine(dt, datetime.min.time())
|
| 191 |
+
return datetime.min
|
| 192 |
+
|
| 193 |
+
|
| 194 |
+
def _fetch_ics(url: str) -> str:
|
| 195 |
+
resp = httpx.get(url, timeout=_HTTP_TIMEOUT_SECONDS, follow_redirects=True)
|
| 196 |
+
resp.raise_for_status()
|
| 197 |
+
return resp.text
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
def fetch_appointments(ics_url: str, today: Optional[date] = None) -> List[Dict[str, Any]]:
|
| 201 |
+
"""Return today's appointments from the iCal URL, cached for ~5 minutes.
|
| 202 |
+
|
| 203 |
+
On any fetch/parse failure returns the last successful cache (or empty
|
| 204 |
+
list if there's no cache yet) and logs a warning. Each appointment dict
|
| 205 |
+
has keys:
|
| 206 |
+
|
| 207 |
+
time (str) — "H:MM AM/PM" or "all day"
|
| 208 |
+
name (str) — visitor name parsed from SUMMARY
|
| 209 |
+
note (str) — inline note, DESCRIPTION, or SUMMARY fallback
|
| 210 |
+
_host_query (str) — host name/alias from SUMMARY ' with ' or LOCATION
|
| 211 |
+
_dt (datetime|date) — event start, for downstream use
|
| 212 |
+
|
| 213 |
+
Resolution of ``_host_query`` to an email is the caller's job
|
| 214 |
+
(calendar_data.py uses ``employees.find_email_for``).
|
| 215 |
+
"""
|
| 216 |
+
if today is None:
|
| 217 |
+
tz = _display_tz()
|
| 218 |
+
today = (datetime.now(tz) if tz is not None else datetime.now().astimezone()).date()
|
| 219 |
+
now = time.time()
|
| 220 |
+
# Cache freshness is tracked by ``valid`` + ``fetched_at`` only. An empty
|
| 221 |
+
# list is a legitimate cached result ("no appointments today") — treating
|
| 222 |
+
# ``data`` truthiness as the freshness flag would force a re-fetch on
|
| 223 |
+
# every call on an empty-calendar day, which on the receptionist hot path
|
| 224 |
+
# blocks the audio loop with a synchronous HTTP call per request.
|
| 225 |
+
if (
|
| 226 |
+
_cache["url"] == ics_url
|
| 227 |
+
and _cache["valid"]
|
| 228 |
+
and (now - _cache["fetched_at"]) < _CACHE_TTL_SECONDS
|
| 229 |
+
):
|
| 230 |
+
return list(_cache["data"])
|
| 231 |
+
try:
|
| 232 |
+
text = _fetch_ics(ics_url)
|
| 233 |
+
cal = Calendar.from_ical(text)
|
| 234 |
+
events = _today_events(cal, today)
|
| 235 |
+
_cache.update({"fetched_at": now, "data": events, "url": ics_url, "valid": True})
|
| 236 |
+
logger.info("Fetched iCal: %d event(s) for %s", len(events), today)
|
| 237 |
+
return list(events)
|
| 238 |
+
except Exception as e:
|
| 239 |
+
logger.warning(
|
| 240 |
+
"iCal fetch failed (%s: %s); using last-good cache (%d entries)",
|
| 241 |
+
type(e).__name__, e, len(_cache.get("data", [])),
|
| 242 |
+
)
|
| 243 |
+
return list(_cache.get("data", []))
|
| 244 |
+
|
| 245 |
+
|
| 246 |
+
def clear_cache() -> None:
|
| 247 |
+
"""Clear the iCal cache (test hook + manual refresh helper)."""
|
| 248 |
+
_cache.update({"fetched_at": 0.0, "data": [], "url": None, "valid": False})
|
src/reachy_mini_receptionist/images/reachymini_avatar.png
ADDED
|
|
Git LFS Details
|
src/reachy_mini_receptionist/images/user_avatar.png
ADDED
|
|
Git LFS Details
|
src/reachy_mini_receptionist/main.py
ADDED
|
@@ -0,0 +1,1199 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Entrypoint for the Reachy Mini Receptionist app.
|
| 2 |
+
|
| 3 |
+
Changes from the base realtime app template:
|
| 4 |
+
- FaceDatabase and FaceRecognitionWorker are initialised here and injected into
|
| 5 |
+
ToolDependencies (face_worker + face_db fields).
|
| 6 |
+
- A /video_feed MJPEG endpoint and /api/* JSON endpoints are mounted on the
|
| 7 |
+
FastAPI settings_app so the dashboard can show the live annotated camera feed,
|
| 8 |
+
guest list, calendar, and notifications.
|
| 9 |
+
- The camera_worker slot is left intact for future head-tracking integration.
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import os
|
| 13 |
+
import sys
|
| 14 |
+
import time
|
| 15 |
+
import asyncio
|
| 16 |
+
import argparse
|
| 17 |
+
import threading
|
| 18 |
+
from pathlib import Path
|
| 19 |
+
from typing import Any, Dict, List, Optional
|
| 20 |
+
|
| 21 |
+
import gradio as gr
|
| 22 |
+
from fastapi import FastAPI
|
| 23 |
+
from fastapi.responses import StreamingResponse, JSONResponse
|
| 24 |
+
from fastapi.staticfiles import StaticFiles
|
| 25 |
+
from fastrtc import Stream
|
| 26 |
+
from gradio.utils import get_space
|
| 27 |
+
|
| 28 |
+
from reachy_mini import ReachyMini, ReachyMiniApp
|
| 29 |
+
from reachy_mini_receptionist.utils import (
|
| 30 |
+
parse_args,
|
| 31 |
+
setup_logger,
|
| 32 |
+
handle_vision_stuff,
|
| 33 |
+
log_connection_troubleshooting,
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def _mount_dashboard_api(
|
| 38 |
+
app: Any,
|
| 39 |
+
face_worker: Any,
|
| 40 |
+
face_db: Any,
|
| 41 |
+
realtime_handler: Any | None = None,
|
| 42 |
+
session_manager: Any | None = None,
|
| 43 |
+
visitor_log: Any | None = None,
|
| 44 |
+
employee_store: Any | None = None,
|
| 45 |
+
instance_path: Optional[str] = None,
|
| 46 |
+
) -> None:
|
| 47 |
+
"""Mount receptionist dashboard API endpoints on a FastAPI app.
|
| 48 |
+
|
| 49 |
+
Endpoints:
|
| 50 |
+
GET /dashboard → serves the dashboard HTML page
|
| 51 |
+
GET /video_feed → MJPEG stream of annotated camera frames
|
| 52 |
+
GET /api/guests → JSON list of registered guests
|
| 53 |
+
GET /api/calendar → JSON today's appointments
|
| 54 |
+
GET /api/outbox → JSON email outbox log
|
| 55 |
+
GET /api/face_status → JSON current face detection state
|
| 56 |
+
GET /api/face_event_last_sent → JSON last external face event sent to model
|
| 57 |
+
GET /api/logs → JSON recent face recognition debug logs
|
| 58 |
+
GET /api/session → JSON current visitor session state (state machine)
|
| 59 |
+
GET /api/session_event_last_sent → JSON last session context event sent to model
|
| 60 |
+
"""
|
| 61 |
+
if app is None or not hasattr(app, "get"):
|
| 62 |
+
return # No FastAPI app available (e.g. LocalStream without settings_app)
|
| 63 |
+
|
| 64 |
+
# Body needs to be in scope for every POST/PATCH endpoint with a JSON
|
| 65 |
+
# body below. Imported once here at the top of the function so the
|
| 66 |
+
# order endpoints are defined in doesn't matter.
|
| 67 |
+
from fastapi import Body # noqa: F401 — used by endpoints below
|
| 68 |
+
|
| 69 |
+
from reachy_mini_receptionist.calendar_data import get_appointments
|
| 70 |
+
from reachy_mini_receptionist.tools.send_email import get_outbox
|
| 71 |
+
|
| 72 |
+
static_dir = Path(__file__).parent / "static"
|
| 73 |
+
dashboard_html = static_dir / "dashboard.html"
|
| 74 |
+
|
| 75 |
+
# Serve guest face thumbnails as static files
|
| 76 |
+
guests_dir = face_db.guests_dir
|
| 77 |
+
app.mount(
|
| 78 |
+
"/guest_images",
|
| 79 |
+
StaticFiles(directory=str(guests_dir)),
|
| 80 |
+
name="guest_images",
|
| 81 |
+
)
|
| 82 |
+
|
| 83 |
+
# Serve dashboard page
|
| 84 |
+
if dashboard_html.exists():
|
| 85 |
+
from fastapi.responses import FileResponse
|
| 86 |
+
|
| 87 |
+
@app.get("/dashboard")
|
| 88 |
+
def _dashboard_page():
|
| 89 |
+
return FileResponse(str(dashboard_html))
|
| 90 |
+
|
| 91 |
+
# MJPEG video feed
|
| 92 |
+
def _mjpeg_generator():
|
| 93 |
+
boundary = b"--frame"
|
| 94 |
+
while True:
|
| 95 |
+
jpeg = face_worker.latest_annotated_jpeg
|
| 96 |
+
if jpeg:
|
| 97 |
+
yield (
|
| 98 |
+
boundary + b"\r\nContent-Type: image/jpeg\r\n\r\n"
|
| 99 |
+
+ jpeg + b"\r\n"
|
| 100 |
+
)
|
| 101 |
+
time.sleep(0.05) # ~20 fps max
|
| 102 |
+
|
| 103 |
+
@app.get("/video_feed")
|
| 104 |
+
def _video_feed():
|
| 105 |
+
return StreamingResponse(
|
| 106 |
+
_mjpeg_generator(),
|
| 107 |
+
media_type="multipart/x-mixed-replace; boundary=frame",
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
@app.get("/api/guests")
|
| 111 |
+
def _api_guests():
|
| 112 |
+
return JSONResponse(face_db.get_all_guests())
|
| 113 |
+
|
| 114 |
+
@app.delete("/api/guests")
|
| 115 |
+
def _api_delete_guest(name: str):
|
| 116 |
+
removed = face_db.delete_guest(name)
|
| 117 |
+
if not removed:
|
| 118 |
+
return JSONResponse(
|
| 119 |
+
{"ok": False, "error": "Guest not found", "name": name},
|
| 120 |
+
status_code=404,
|
| 121 |
+
)
|
| 122 |
+
return JSONResponse({"ok": True, "name": name})
|
| 123 |
+
|
| 124 |
+
@app.get("/api/calendar")
|
| 125 |
+
def _api_calendar():
|
| 126 |
+
return JSONResponse(get_appointments())
|
| 127 |
+
|
| 128 |
+
@app.get("/api/outbox")
|
| 129 |
+
def _api_outbox():
|
| 130 |
+
return JSONResponse(get_outbox())
|
| 131 |
+
|
| 132 |
+
@app.get("/api/face_status")
|
| 133 |
+
def _api_face_status():
|
| 134 |
+
return JSONResponse({
|
| 135 |
+
"name": face_worker.current_name,
|
| 136 |
+
"confidence": round(face_worker.confidence, 2),
|
| 137 |
+
"is_known": face_worker.current_name not in ("Unknown", "No face"),
|
| 138 |
+
})
|
| 139 |
+
|
| 140 |
+
@app.get("/api/face_event_last_sent")
|
| 141 |
+
def _api_face_event_last_sent():
|
| 142 |
+
if realtime_handler is None or not hasattr(realtime_handler, "get_last_face_event_sent"):
|
| 143 |
+
return JSONResponse({"sent": False})
|
| 144 |
+
|
| 145 |
+
event = realtime_handler.get_last_face_event_sent()
|
| 146 |
+
if event is None:
|
| 147 |
+
return JSONResponse({"sent": False})
|
| 148 |
+
|
| 149 |
+
return JSONResponse({"sent": True, **event})
|
| 150 |
+
|
| 151 |
+
@app.get("/api/logs")
|
| 152 |
+
def _api_logs():
|
| 153 |
+
return JSONResponse({"logs": face_worker.get_recent_logs(100)})
|
| 154 |
+
|
| 155 |
+
@app.get("/api/config")
|
| 156 |
+
def _api_config():
|
| 157 |
+
from reachy_mini_receptionist.config import config
|
| 158 |
+
return JSONResponse({"model": config.MODEL_NAME})
|
| 159 |
+
|
| 160 |
+
@app.get("/api/best_face_jpeg")
|
| 161 |
+
def _api_best_face_jpeg():
|
| 162 |
+
# Return the best face crop from the last 5 seconds as JPEG.
|
| 163 |
+
# When no face is available we return 204 (No Content) instead of
|
| 164 |
+
# 404 so the dashboard doesn't pollute the browser's network tab
|
| 165 |
+
# with red error rows every 500 ms while the room is empty.
|
| 166 |
+
import cv2
|
| 167 |
+
from fastapi.responses import Response
|
| 168 |
+
|
| 169 |
+
# Dashboard preview should show best currently available face immediately
|
| 170 |
+
# and not wait for dwell-based stabilization.
|
| 171 |
+
result = face_worker.best_recent_face(window_seconds=5.0, require_dwell=False)
|
| 172 |
+
name, conf, crop = result
|
| 173 |
+
|
| 174 |
+
if crop is None:
|
| 175 |
+
return Response(status_code=204)
|
| 176 |
+
|
| 177 |
+
# Encode 100x100 grayscale crop as JPEG (upsample to 200x200 for readability)
|
| 178 |
+
display = cv2.resize(crop, (200, 200), interpolation=cv2.INTER_NEAREST)
|
| 179 |
+
_, jpeg_buf = cv2.imencode(".jpg", display, [cv2.IMWRITE_JPEG_QUALITY, 85])
|
| 180 |
+
jpeg_bytes = jpeg_buf.tobytes()
|
| 181 |
+
|
| 182 |
+
# Find face area of the best entry for the header
|
| 183 |
+
face_area = 0
|
| 184 |
+
with face_worker._lock:
|
| 185 |
+
if face_worker._detection_window:
|
| 186 |
+
best_entry = max(face_worker._detection_window, key=lambda e: e[1])
|
| 187 |
+
face_area = best_entry[1]
|
| 188 |
+
|
| 189 |
+
return Response(
|
| 190 |
+
content=jpeg_bytes,
|
| 191 |
+
media_type="image/jpeg",
|
| 192 |
+
headers={
|
| 193 |
+
"X-Face-Name": name,
|
| 194 |
+
"X-Face-Confidence": str(round(conf, 2)),
|
| 195 |
+
"X-Face-Area": str(face_area),
|
| 196 |
+
"Cache-Control": "no-cache, no-store",
|
| 197 |
+
},
|
| 198 |
+
)
|
| 199 |
+
|
| 200 |
+
@app.get("/api/session")
|
| 201 |
+
def _api_session():
|
| 202 |
+
if session_manager is None:
|
| 203 |
+
return JSONResponse({"available": False})
|
| 204 |
+
snap = session_manager.session
|
| 205 |
+
payload = snap.to_dict() if hasattr(snap, "to_dict") else {}
|
| 206 |
+
return JSONResponse({"available": True, **payload})
|
| 207 |
+
|
| 208 |
+
@app.get("/api/session_event_last_sent")
|
| 209 |
+
def _api_session_event_last_sent():
|
| 210 |
+
if realtime_handler is None or not hasattr(realtime_handler, "get_last_session_event_sent"):
|
| 211 |
+
return JSONResponse({"sent": False})
|
| 212 |
+
event = realtime_handler.get_last_session_event_sent()
|
| 213 |
+
if event is None:
|
| 214 |
+
return JSONResponse({"sent": False})
|
| 215 |
+
return JSONResponse({"sent": True, **event})
|
| 216 |
+
|
| 217 |
+
@app.get("/api/visitor_log")
|
| 218 |
+
def _api_visitor_log(limit: int = 100):
|
| 219 |
+
if visitor_log is None:
|
| 220 |
+
return JSONResponse({"available": False, "today_count": 0, "visits": []})
|
| 221 |
+
return JSONResponse({
|
| 222 |
+
"available": True,
|
| 223 |
+
"today_count": visitor_log.count_today(),
|
| 224 |
+
"visits": visitor_log.list_visits(limit=limit),
|
| 225 |
+
})
|
| 226 |
+
|
| 227 |
+
@app.get("/api/visitor_log.csv")
|
| 228 |
+
def _api_visitor_log_csv():
|
| 229 |
+
"""Download the full visitor log as CSV (for HR / facilities handoff)."""
|
| 230 |
+
from fastapi.responses import Response
|
| 231 |
+
import csv
|
| 232 |
+
import io
|
| 233 |
+
|
| 234 |
+
if visitor_log is None:
|
| 235 |
+
return Response(content="", media_type="text/csv")
|
| 236 |
+
|
| 237 |
+
rows = visitor_log.list_visits(limit=1000)
|
| 238 |
+
buf = io.StringIO()
|
| 239 |
+
fieldnames = [
|
| 240 |
+
"id", "started_at", "ended_at",
|
| 241 |
+
"visitor_name", "recognized_face_name", "employee_name",
|
| 242 |
+
"matched_appointment_time", "matched_appointment_note",
|
| 243 |
+
"email_sent_to", "final_state", "error_message",
|
| 244 |
+
]
|
| 245 |
+
writer = csv.DictWriter(buf, fieldnames=fieldnames, extrasaction="ignore")
|
| 246 |
+
writer.writeheader()
|
| 247 |
+
for r in rows:
|
| 248 |
+
writer.writerow(r)
|
| 249 |
+
filename = f"visitor_log_{time.strftime('%Y-%m-%d')}.csv"
|
| 250 |
+
return Response(
|
| 251 |
+
content=buf.getvalue(),
|
| 252 |
+
media_type="text/csv",
|
| 253 |
+
headers={"Content-Disposition": f'attachment; filename="{filename}"'},
|
| 254 |
+
)
|
| 255 |
+
|
| 256 |
+
@app.delete("/api/visitor_log")
|
| 257 |
+
def _api_visitor_log_wipe():
|
| 258 |
+
"""Wipe every row of the visitor log. Scoped destructive action
|
| 259 |
+
used by the dashboard's panel-level Clear button. Does not touch
|
| 260 |
+
employees, settings, face DB, or session state."""
|
| 261 |
+
if visitor_log is None:
|
| 262 |
+
return JSONResponse({"ok": False, "error": "visitor_log unavailable"}, status_code=503)
|
| 263 |
+
try:
|
| 264 |
+
removed = visitor_log.wipe_all()
|
| 265 |
+
return JSONResponse({"ok": True, "removed": removed})
|
| 266 |
+
except Exception as e:
|
| 267 |
+
return JSONResponse(
|
| 268 |
+
{"ok": False, "error": f"{type(e).__name__}: {e}"}, status_code=500,
|
| 269 |
+
)
|
| 270 |
+
|
| 271 |
+
@app.get("/api/stats")
|
| 272 |
+
def _api_stats():
|
| 273 |
+
"""Consolidated stats strip data — visits, emails, last visit, state."""
|
| 274 |
+
visits_today = visitor_log.count_today() if visitor_log is not None else 0
|
| 275 |
+
emails_today = visitor_log.count_emails_delivered_today() if visitor_log is not None else 0
|
| 276 |
+
last = visitor_log.last_visit() if visitor_log is not None else None
|
| 277 |
+
current_state = None
|
| 278 |
+
current_visitor = None
|
| 279 |
+
if session_manager is not None:
|
| 280 |
+
snap = session_manager.session
|
| 281 |
+
current_state = snap.current_state.value
|
| 282 |
+
current_visitor = snap.visitor_name
|
| 283 |
+
return JSONResponse({
|
| 284 |
+
"visits_today": visits_today,
|
| 285 |
+
"emails_delivered_today": emails_today,
|
| 286 |
+
"last_visit": last,
|
| 287 |
+
"current_state": current_state,
|
| 288 |
+
"current_visitor": current_visitor,
|
| 289 |
+
})
|
| 290 |
+
|
| 291 |
+
@app.post("/api/session/reset")
|
| 292 |
+
def _api_session_reset():
|
| 293 |
+
"""Manual override: drop the current visitor session back to IDLE.
|
| 294 |
+
|
| 295 |
+
Useful when the bot gets stuck (e.g. spurious face match keeps the
|
| 296 |
+
state from going back to idle). Mirrors the auto-timeout reset.
|
| 297 |
+
"""
|
| 298 |
+
if session_manager is None:
|
| 299 |
+
return JSONResponse({"ok": False, "error": "Session manager not available"}, status_code=503)
|
| 300 |
+
snap = session_manager.reset()
|
| 301 |
+
return JSONResponse({"ok": True, "current_state": snap.current_state.value})
|
| 302 |
+
|
| 303 |
+
@app.post("/api/guests/manual_register")
|
| 304 |
+
def _api_guests_manual_register(payload: dict = Body(...)):
|
| 305 |
+
"""Operator-initiated visitor registration that bypasses voice.
|
| 306 |
+
|
| 307 |
+
When the bot can't transcribe a visitor's name (common for short
|
| 308 |
+
non-English names that Whisper/gpt-realtime mangle), the operator
|
| 309 |
+
types it on the dashboard. The current camera face crop is saved
|
| 310 |
+
under that name AND the active session is flipped to RECOGNIZED so
|
| 311 |
+
downstream tools (lookup_employee / send_email) can proceed as if
|
| 312 |
+
register_guest had succeeded via voice.
|
| 313 |
+
"""
|
| 314 |
+
name = (payload.get("name") or "").strip()
|
| 315 |
+
if not name:
|
| 316 |
+
return JSONResponse(
|
| 317 |
+
{"ok": False, "error": "name is required"}, status_code=400,
|
| 318 |
+
)
|
| 319 |
+
|
| 320 |
+
worker = face_worker
|
| 321 |
+
if worker is None:
|
| 322 |
+
return JSONResponse(
|
| 323 |
+
{"ok": False, "error": "Face worker not available"},
|
| 324 |
+
status_code=503,
|
| 325 |
+
)
|
| 326 |
+
face_crop = getattr(worker, "current_encoding", None)
|
| 327 |
+
if face_crop is None:
|
| 328 |
+
return JSONResponse(
|
| 329 |
+
{
|
| 330 |
+
"ok": False,
|
| 331 |
+
"error": (
|
| 332 |
+
"No face currently detected by the camera. Ask the "
|
| 333 |
+
"visitor to look directly at the lens and try again."
|
| 334 |
+
),
|
| 335 |
+
},
|
| 336 |
+
status_code=409,
|
| 337 |
+
)
|
| 338 |
+
if face_db is None:
|
| 339 |
+
return JSONResponse(
|
| 340 |
+
{"ok": False, "error": "Face DB not available"},
|
| 341 |
+
status_code=503,
|
| 342 |
+
)
|
| 343 |
+
|
| 344 |
+
try:
|
| 345 |
+
face_db.add_or_update_guest(name, face_crop)
|
| 346 |
+
try:
|
| 347 |
+
worker.rebuild_recognizer()
|
| 348 |
+
except Exception:
|
| 349 |
+
pass
|
| 350 |
+
except Exception as e:
|
| 351 |
+
return JSONResponse(
|
| 352 |
+
{"ok": False, "error": f"{type(e).__name__}: {e}"},
|
| 353 |
+
status_code=500,
|
| 354 |
+
)
|
| 355 |
+
|
| 356 |
+
# Flip the active session so the LLM sees this as a confirmed
|
| 357 |
+
# registration and can resume the flow (calendar match / lookup /
|
| 358 |
+
# send_email) without re-asking for the name.
|
| 359 |
+
if session_manager is not None:
|
| 360 |
+
try:
|
| 361 |
+
from reachy_mini_receptionist.receptionist_state import (
|
| 362 |
+
ReceptionState as _RS,
|
| 363 |
+
)
|
| 364 |
+
session_manager.transition(
|
| 365 |
+
_RS.RECOGNIZED,
|
| 366 |
+
visitor_name=name,
|
| 367 |
+
recognized_face_name=name,
|
| 368 |
+
)
|
| 369 |
+
except Exception as e:
|
| 370 |
+
print(f"[manual_register] session transition failed: {e}")
|
| 371 |
+
return JSONResponse({
|
| 372 |
+
"ok": True,
|
| 373 |
+
"name": name,
|
| 374 |
+
"total_guests": face_db.count(),
|
| 375 |
+
})
|
| 376 |
+
|
| 377 |
+
@app.post("/api/demo/reset")
|
| 378 |
+
def _api_demo_reset():
|
| 379 |
+
"""Wipe everything that accumulates during testing so the next
|
| 380 |
+
demo runs from a clean slate. **Preserves** the employee
|
| 381 |
+
directory, calendar, and .env settings — operators don't want
|
| 382 |
+
to re-enter Mukul/Priya/etc. before every demo.
|
| 383 |
+
|
| 384 |
+
Wipes:
|
| 385 |
+
- face DB (all guests/*.png)
|
| 386 |
+
- visitor log (every visit row)
|
| 387 |
+
- email outbox (in-memory)
|
| 388 |
+
- active session (forces IDLE)
|
| 389 |
+
|
| 390 |
+
Triggers a face-recognizer rebuild so the worker sees the empty
|
| 391 |
+
DB on its next pass.
|
| 392 |
+
"""
|
| 393 |
+
from reachy_mini_receptionist.tools.send_email import clear_outbox
|
| 394 |
+
results = {
|
| 395 |
+
"guests_removed": 0,
|
| 396 |
+
"visits_removed": 0,
|
| 397 |
+
"outbox_removed": 0,
|
| 398 |
+
"session_reset": False,
|
| 399 |
+
"errors": [],
|
| 400 |
+
}
|
| 401 |
+
# Face DB
|
| 402 |
+
try:
|
| 403 |
+
if face_db is not None:
|
| 404 |
+
before = face_db.count()
|
| 405 |
+
face_db.clear()
|
| 406 |
+
results["guests_removed"] = before
|
| 407 |
+
except Exception as e:
|
| 408 |
+
results["errors"].append(f"face_db: {type(e).__name__}: {e}")
|
| 409 |
+
# Rebuild recognizer so the worker drops the wiped faces immediately
|
| 410 |
+
try:
|
| 411 |
+
if face_worker is not None and hasattr(face_worker, "rebuild_recognizer"):
|
| 412 |
+
face_worker.rebuild_recognizer()
|
| 413 |
+
except Exception as e:
|
| 414 |
+
results["errors"].append(f"face_worker: {type(e).__name__}: {e}")
|
| 415 |
+
# Visitor log
|
| 416 |
+
try:
|
| 417 |
+
if visitor_log is not None:
|
| 418 |
+
results["visits_removed"] = visitor_log.wipe_all()
|
| 419 |
+
except Exception as e:
|
| 420 |
+
results["errors"].append(f"visitor_log: {type(e).__name__}: {e}")
|
| 421 |
+
# Outbox
|
| 422 |
+
try:
|
| 423 |
+
results["outbox_removed"] = clear_outbox()
|
| 424 |
+
except Exception as e:
|
| 425 |
+
results["errors"].append(f"outbox: {type(e).__name__}: {e}")
|
| 426 |
+
# Session
|
| 427 |
+
try:
|
| 428 |
+
if session_manager is not None:
|
| 429 |
+
session_manager.reset()
|
| 430 |
+
results["session_reset"] = True
|
| 431 |
+
except Exception as e:
|
| 432 |
+
results["errors"].append(f"session: {type(e).__name__}: {e}")
|
| 433 |
+
results["ok"] = not results["errors"]
|
| 434 |
+
return JSONResponse(results)
|
| 435 |
+
|
| 436 |
+
# ------------------------------------------------------------------
|
| 437 |
+
# Employee CRUD — backs the Employees panel on the dashboard.
|
| 438 |
+
# (Body is imported at the top of _mount_dashboard_api.)
|
| 439 |
+
# ------------------------------------------------------------------
|
| 440 |
+
|
| 441 |
+
def _employee_store_or_503():
|
| 442 |
+
if employee_store is None:
|
| 443 |
+
return JSONResponse(
|
| 444 |
+
{"ok": False, "error": "Employee store not available"}, status_code=503,
|
| 445 |
+
)
|
| 446 |
+
return None
|
| 447 |
+
|
| 448 |
+
@app.get("/api/employees")
|
| 449 |
+
def _api_employees_list():
|
| 450 |
+
guard = _employee_store_or_503()
|
| 451 |
+
if guard is not None:
|
| 452 |
+
return guard
|
| 453 |
+
return JSONResponse({"employees": employee_store.list_all()})
|
| 454 |
+
|
| 455 |
+
@app.post("/api/employees")
|
| 456 |
+
def _api_employees_create(payload: dict = Body(...)):
|
| 457 |
+
guard = _employee_store_or_503()
|
| 458 |
+
if guard is not None:
|
| 459 |
+
return guard
|
| 460 |
+
try:
|
| 461 |
+
from reachy_mini_receptionist.employees_store import EmployeeExistsError
|
| 462 |
+
emp = employee_store.create(
|
| 463 |
+
name=payload.get("name", ""),
|
| 464 |
+
email=payload.get("email", ""),
|
| 465 |
+
aliases=payload.get("aliases") or [],
|
| 466 |
+
title=payload.get("title"),
|
| 467 |
+
)
|
| 468 |
+
return JSONResponse({"ok": True, "employee": emp})
|
| 469 |
+
except EmployeeExistsError as e:
|
| 470 |
+
return JSONResponse(
|
| 471 |
+
{"ok": False, "error": str(e)}, status_code=409,
|
| 472 |
+
)
|
| 473 |
+
except ValueError as e:
|
| 474 |
+
return JSONResponse(
|
| 475 |
+
{"ok": False, "error": str(e)}, status_code=400,
|
| 476 |
+
)
|
| 477 |
+
except Exception as e:
|
| 478 |
+
return JSONResponse(
|
| 479 |
+
{"ok": False, "error": f"{type(e).__name__}: {e}"}, status_code=500,
|
| 480 |
+
)
|
| 481 |
+
|
| 482 |
+
@app.patch("/api/employees/{employee_id}")
|
| 483 |
+
def _api_employees_update(employee_id: int, payload: dict = Body(...)):
|
| 484 |
+
guard = _employee_store_or_503()
|
| 485 |
+
if guard is not None:
|
| 486 |
+
return guard
|
| 487 |
+
try:
|
| 488 |
+
from reachy_mini_receptionist.employees_store import EmployeeExistsError
|
| 489 |
+
emp = employee_store.update(
|
| 490 |
+
employee_id,
|
| 491 |
+
name=payload.get("name"),
|
| 492 |
+
email=payload.get("email"),
|
| 493 |
+
aliases=payload.get("aliases"),
|
| 494 |
+
title=payload.get("title"),
|
| 495 |
+
)
|
| 496 |
+
if emp is None:
|
| 497 |
+
return JSONResponse(
|
| 498 |
+
{"ok": False, "error": "Employee not found"}, status_code=404,
|
| 499 |
+
)
|
| 500 |
+
return JSONResponse({"ok": True, "employee": emp})
|
| 501 |
+
except EmployeeExistsError as e:
|
| 502 |
+
return JSONResponse(
|
| 503 |
+
{"ok": False, "error": str(e)}, status_code=409,
|
| 504 |
+
)
|
| 505 |
+
except ValueError as e:
|
| 506 |
+
return JSONResponse(
|
| 507 |
+
{"ok": False, "error": str(e)}, status_code=400,
|
| 508 |
+
)
|
| 509 |
+
except Exception as e:
|
| 510 |
+
return JSONResponse(
|
| 511 |
+
{"ok": False, "error": f"{type(e).__name__}: {e}"}, status_code=500,
|
| 512 |
+
)
|
| 513 |
+
|
| 514 |
+
@app.delete("/api/employees/{employee_id}")
|
| 515 |
+
def _api_employees_delete(employee_id: int):
|
| 516 |
+
guard = _employee_store_or_503()
|
| 517 |
+
if guard is not None:
|
| 518 |
+
return guard
|
| 519 |
+
removed = employee_store.delete(employee_id)
|
| 520 |
+
if not removed:
|
| 521 |
+
return JSONResponse(
|
| 522 |
+
{"ok": False, "error": "Employee not found"}, status_code=404,
|
| 523 |
+
)
|
| 524 |
+
return JSONResponse({"ok": True})
|
| 525 |
+
|
| 526 |
+
# ------------------------------------------------------------------
|
| 527 |
+
# Diagnostics — surfaces the kind of failures that turned today into
|
| 528 |
+
# a 30-min debugging session (audio at 7%, daemon asleep, OpenAI
|
| 529 |
+
# latency, etc) BEFORE the demo, not during.
|
| 530 |
+
# ------------------------------------------------------------------
|
| 531 |
+
import socket
|
| 532 |
+
import subprocess
|
| 533 |
+
|
| 534 |
+
def _check_tcp(host: str, port: int, timeout: float = 2.0) -> dict:
|
| 535 |
+
start = time.monotonic()
|
| 536 |
+
try:
|
| 537 |
+
with socket.create_connection((host, port), timeout=timeout):
|
| 538 |
+
return {
|
| 539 |
+
"ok": True, "latency_ms": int((time.monotonic() - start) * 1000),
|
| 540 |
+
}
|
| 541 |
+
except Exception as e:
|
| 542 |
+
return {"ok": False, "error": f"{type(e).__name__}: {e}"}
|
| 543 |
+
|
| 544 |
+
def _check_daemon() -> dict:
|
| 545 |
+
try:
|
| 546 |
+
import httpx
|
| 547 |
+
t0 = time.monotonic()
|
| 548 |
+
r = httpx.get("http://localhost:8000/api/daemon/status", timeout=2.0)
|
| 549 |
+
latency = int((time.monotonic() - t0) * 1000)
|
| 550 |
+
if r.status_code != 200:
|
| 551 |
+
return {"ok": False, "latency_ms": latency, "error": f"HTTP {r.status_code}"}
|
| 552 |
+
body = r.json()
|
| 553 |
+
return {
|
| 554 |
+
"ok": body.get("state") == "started",
|
| 555 |
+
"latency_ms": latency,
|
| 556 |
+
"state": body.get("state"),
|
| 557 |
+
}
|
| 558 |
+
except Exception as e:
|
| 559 |
+
return {"ok": False, "error": f"{type(e).__name__}: {e}"}
|
| 560 |
+
|
| 561 |
+
def _check_wifi() -> dict:
|
| 562 |
+
try:
|
| 563 |
+
out = subprocess.check_output(
|
| 564 |
+
["iwconfig", "wlan0"],
|
| 565 |
+
stderr=subprocess.STDOUT, timeout=2.0,
|
| 566 |
+
).decode("utf-8", errors="replace")
|
| 567 |
+
except Exception as e:
|
| 568 |
+
return {"ok": False, "error": f"{type(e).__name__}: {e}"}
|
| 569 |
+
info: dict = {"raw": out.strip()}
|
| 570 |
+
import re
|
| 571 |
+
m = re.search(r"Link Quality=(\d+)/(\d+)", out)
|
| 572 |
+
if m:
|
| 573 |
+
info["link_quality"] = int(m.group(1))
|
| 574 |
+
info["link_quality_max"] = int(m.group(2))
|
| 575 |
+
info["link_quality_pct"] = round(int(m.group(1)) / int(m.group(2)) * 100, 1)
|
| 576 |
+
m = re.search(r"Signal level=(-?\d+)\s*dBm", out)
|
| 577 |
+
if m:
|
| 578 |
+
info["signal_dbm"] = int(m.group(1))
|
| 579 |
+
m = re.search(r"ESSID:\"([^\"]+)\"", out)
|
| 580 |
+
if m:
|
| 581 |
+
info["essid"] = m.group(1)
|
| 582 |
+
info["ok"] = info.get("link_quality_pct", 0) >= 50 if "link_quality_pct" in info else None
|
| 583 |
+
return info
|
| 584 |
+
|
| 585 |
+
def _check_audio() -> dict:
|
| 586 |
+
try:
|
| 587 |
+
out = subprocess.check_output(
|
| 588 |
+
["pactl", "list", "short", "sinks"],
|
| 589 |
+
stderr=subprocess.STDOUT, timeout=2.0,
|
| 590 |
+
).decode("utf-8", errors="replace")
|
| 591 |
+
sinks = [line.split("\t")[1] if "\t" in line else line for line in out.splitlines() if line.strip()]
|
| 592 |
+
return {"ok": len(sinks) > 0, "sinks": sinks}
|
| 593 |
+
except Exception as e:
|
| 594 |
+
return {"ok": False, "error": f"{type(e).__name__}: {e}"}
|
| 595 |
+
|
| 596 |
+
@app.get("/api/diagnostics/health")
|
| 597 |
+
def _api_diagnostics_health():
|
| 598 |
+
from reachy_mini_receptionist.config import config as _cfg
|
| 599 |
+
results = {
|
| 600 |
+
"openai_realtime": _check_tcp("api.openai.com", 443),
|
| 601 |
+
"resend": _check_tcp("api.resend.com", 443),
|
| 602 |
+
"daemon": _check_daemon(),
|
| 603 |
+
"wifi": _check_wifi(),
|
| 604 |
+
"audio": _check_audio(),
|
| 605 |
+
"config": {
|
| 606 |
+
"openai_key_set": bool(_cfg.OPENAI_API_KEY),
|
| 607 |
+
"resend_key_set": bool(os.getenv("RESEND_API_KEY")),
|
| 608 |
+
"resend_from": os.getenv("RESEND_FROM", "onboarding@resend.dev"),
|
| 609 |
+
"ical_url_set": bool(os.getenv("RECEPTION_ICS_URL")),
|
| 610 |
+
"model": _cfg.MODEL_NAME,
|
| 611 |
+
},
|
| 612 |
+
}
|
| 613 |
+
# Overall OK iff every "ok" we have is truthy (None counted as unknown -> ok)
|
| 614 |
+
overall = all(
|
| 615 |
+
(v.get("ok") in (True, None)) if isinstance(v, dict) else True
|
| 616 |
+
for k, v in results.items() if k != "config"
|
| 617 |
+
)
|
| 618 |
+
return JSONResponse({"ok": overall, "checks": results})
|
| 619 |
+
|
| 620 |
+
# ------------------------------------------------------------------
|
| 621 |
+
# Volume control — wraps pactl. Operators can change speaker volume
|
| 622 |
+
# without leaving the dashboard (previously required the Reachy Mini
|
| 623 |
+
# Control panel at :8000 or SSH'ing in).
|
| 624 |
+
# ------------------------------------------------------------------
|
| 625 |
+
|
| 626 |
+
# Reachy Mini uses ALSA directly (no PulseAudio), so we drive volume
|
| 627 |
+
# via `amixer`. The control name varies by device: Master is the
|
| 628 |
+
# standard ALSA name; PCM and Speaker are fallbacks on some Pi audio
|
| 629 |
+
# HATs; reachymini_audio_sink is the daemon's sink name on this image.
|
| 630 |
+
_AMIXER_CANDIDATES = (
|
| 631 |
+
"Master", "PCM", "Speaker", "Headphone", "reachymini_audio_sink",
|
| 632 |
+
)
|
| 633 |
+
|
| 634 |
+
def _amixer_active_control() -> Optional[str]:
|
| 635 |
+
"""Return the first amixer control that exists on this device."""
|
| 636 |
+
try:
|
| 637 |
+
out = subprocess.check_output(
|
| 638 |
+
["amixer", "scontrols"],
|
| 639 |
+
stderr=subprocess.STDOUT, timeout=2.0,
|
| 640 |
+
).decode("utf-8", errors="replace")
|
| 641 |
+
except Exception:
|
| 642 |
+
return None
|
| 643 |
+
import re
|
| 644 |
+
names = re.findall(r"Simple mixer control '([^']+)'", out)
|
| 645 |
+
for cand in _AMIXER_CANDIDATES:
|
| 646 |
+
if cand in names:
|
| 647 |
+
return cand
|
| 648 |
+
return names[0] if names else None
|
| 649 |
+
|
| 650 |
+
def _audio_get_volume() -> dict:
|
| 651 |
+
ctrl = _amixer_active_control()
|
| 652 |
+
if ctrl is None:
|
| 653 |
+
return {"ok": False, "error": "no amixer control found"}
|
| 654 |
+
try:
|
| 655 |
+
out = subprocess.check_output(
|
| 656 |
+
["amixer", "sget", ctrl],
|
| 657 |
+
stderr=subprocess.STDOUT, timeout=2.0,
|
| 658 |
+
).decode("utf-8", errors="replace")
|
| 659 |
+
except Exception as e:
|
| 660 |
+
return {"ok": False, "error": f"{type(e).__name__}: {e}"}
|
| 661 |
+
import re
|
| 662 |
+
m = re.search(r"\[(\d+)%\]", out)
|
| 663 |
+
percent = int(m.group(1)) if m else None
|
| 664 |
+
muted = "[off]" in out.lower()
|
| 665 |
+
return {"ok": True, "control": ctrl, "percent": percent, "muted": muted}
|
| 666 |
+
|
| 667 |
+
@app.get("/api/audio/volume")
|
| 668 |
+
def _api_audio_volume_get():
|
| 669 |
+
return JSONResponse(_audio_get_volume())
|
| 670 |
+
|
| 671 |
+
@app.post("/api/audio/volume")
|
| 672 |
+
def _api_audio_volume_set(payload: dict = Body(...)):
|
| 673 |
+
target = payload.get("percent")
|
| 674 |
+
if target is None:
|
| 675 |
+
return JSONResponse(
|
| 676 |
+
{"ok": False, "error": "percent (0-150) is required"},
|
| 677 |
+
status_code=400,
|
| 678 |
+
)
|
| 679 |
+
try:
|
| 680 |
+
pct = int(target)
|
| 681 |
+
except Exception:
|
| 682 |
+
return JSONResponse(
|
| 683 |
+
{"ok": False, "error": "percent must be an integer"},
|
| 684 |
+
status_code=400,
|
| 685 |
+
)
|
| 686 |
+
pct = max(0, min(150, pct))
|
| 687 |
+
ctrl = _amixer_active_control()
|
| 688 |
+
if ctrl is None:
|
| 689 |
+
return JSONResponse(
|
| 690 |
+
{"ok": False, "error": "no amixer control found"},
|
| 691 |
+
status_code=500,
|
| 692 |
+
)
|
| 693 |
+
try:
|
| 694 |
+
subprocess.check_output(
|
| 695 |
+
["amixer", "sset", ctrl, "unmute"],
|
| 696 |
+
stderr=subprocess.STDOUT, timeout=2.0,
|
| 697 |
+
)
|
| 698 |
+
subprocess.check_output(
|
| 699 |
+
["amixer", "sset", ctrl, f"{pct}%"],
|
| 700 |
+
stderr=subprocess.STDOUT, timeout=2.0,
|
| 701 |
+
)
|
| 702 |
+
except Exception as e:
|
| 703 |
+
return JSONResponse(
|
| 704 |
+
{"ok": False, "error": f"{type(e).__name__}: {e}"},
|
| 705 |
+
status_code=500,
|
| 706 |
+
)
|
| 707 |
+
return JSONResponse({"ok": True, **_audio_get_volume()})
|
| 708 |
+
|
| 709 |
+
@app.post("/api/diagnostics/speaker_test")
|
| 710 |
+
def _api_diagnostics_speaker_test():
|
| 711 |
+
try:
|
| 712 |
+
subprocess.Popen(
|
| 713 |
+
["speaker-test", "-c", "1", "-t", "sine", "-f", "440", "-l", "1"],
|
| 714 |
+
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
|
| 715 |
+
)
|
| 716 |
+
return JSONResponse({"ok": True, "message": "Playing 1-second 440Hz tone"})
|
| 717 |
+
except FileNotFoundError:
|
| 718 |
+
return JSONResponse(
|
| 719 |
+
{"ok": False, "error": "speaker-test binary not found"}, status_code=500,
|
| 720 |
+
)
|
| 721 |
+
except Exception as e:
|
| 722 |
+
return JSONResponse(
|
| 723 |
+
{"ok": False, "error": f"{type(e).__name__}: {e}"}, status_code=500,
|
| 724 |
+
)
|
| 725 |
+
|
| 726 |
+
# ------------------------------------------------------------------
|
| 727 |
+
# Settings — read/write a subset of .env keys via the dashboard so
|
| 728 |
+
# operators don't have to SSH and `nano .env` to set an API key.
|
| 729 |
+
# Sensitive values are masked on GET; full values written on PATCH.
|
| 730 |
+
# Changes that require app restart are flagged in the response.
|
| 731 |
+
# ------------------------------------------------------------------
|
| 732 |
+
_SETTINGS_KEYS = {
|
| 733 |
+
"VOICE_BACKEND": {"secret": False, "restart": True},
|
| 734 |
+
"GEMINI_LIVE_MODEL": {"secret": False, "restart": True},
|
| 735 |
+
"GEMINI_LIVE_VOICE": {"secret": False, "restart": True},
|
| 736 |
+
"OPENAI_API_KEY": {"secret": True, "restart": True},
|
| 737 |
+
"GEMINI_API_KEY": {"secret": True, "restart": True},
|
| 738 |
+
# GEMINI_MODEL removed 2026-05-21 — the name normalizer it
|
| 739 |
+
# configured is short-circuited in name_normalizer.py, so this
|
| 740 |
+
# key is unused. Re-add if/when the normalizer is reinstated.
|
| 741 |
+
"STT_MODEL": {"secret": False, "restart": True},
|
| 742 |
+
"STT_DISABLE_BIAS": {"secret": False, "restart": True},
|
| 743 |
+
"RESEND_API_KEY": {"secret": True, "restart": False},
|
| 744 |
+
"RESEND_FROM": {"secret": False, "restart": False},
|
| 745 |
+
"RECEPTION_ICS_URL": {"secret": False, "restart": False},
|
| 746 |
+
"FACE_TTL_DAYS": {"secret": False, "restart": True},
|
| 747 |
+
"VISITOR_LOG_RETENTION_DAYS": {"secret": False, "restart": True},
|
| 748 |
+
"FACE_LBPH_THRESHOLD": {"secret": False, "restart": True},
|
| 749 |
+
"MODEL_NAME": {"secret": False, "restart": True},
|
| 750 |
+
}
|
| 751 |
+
|
| 752 |
+
def _find_env_path() -> Optional[Path]:
|
| 753 |
+
try:
|
| 754 |
+
from dotenv import find_dotenv
|
| 755 |
+
p = find_dotenv(usecwd=True)
|
| 756 |
+
if p:
|
| 757 |
+
return Path(p)
|
| 758 |
+
except Exception:
|
| 759 |
+
pass
|
| 760 |
+
candidates = []
|
| 761 |
+
if instance_path:
|
| 762 |
+
candidates.append(Path(instance_path) / ".env")
|
| 763 |
+
candidates.append(Path.cwd() / ".env")
|
| 764 |
+
for c in candidates:
|
| 765 |
+
if c.exists():
|
| 766 |
+
return c
|
| 767 |
+
return candidates[0] if candidates else None
|
| 768 |
+
|
| 769 |
+
def _mask(value: str) -> str:
|
| 770 |
+
if not value:
|
| 771 |
+
return ""
|
| 772 |
+
if len(value) <= 6:
|
| 773 |
+
return "•" * len(value)
|
| 774 |
+
return value[:3] + "•" * max(0, len(value) - 7) + value[-4:]
|
| 775 |
+
|
| 776 |
+
@app.get("/api/settings")
|
| 777 |
+
def _api_settings_get():
|
| 778 |
+
env_path = _find_env_path()
|
| 779 |
+
settings = []
|
| 780 |
+
for key, meta in _SETTINGS_KEYS.items():
|
| 781 |
+
raw = os.getenv(key) or ""
|
| 782 |
+
display = _mask(raw) if meta["secret"] and raw else raw
|
| 783 |
+
settings.append({
|
| 784 |
+
"key": key,
|
| 785 |
+
"value": display,
|
| 786 |
+
"is_set": bool(raw),
|
| 787 |
+
"is_secret": meta["secret"],
|
| 788 |
+
"requires_restart": meta["restart"],
|
| 789 |
+
})
|
| 790 |
+
return JSONResponse({
|
| 791 |
+
"env_path": str(env_path) if env_path else None,
|
| 792 |
+
"settings": settings,
|
| 793 |
+
})
|
| 794 |
+
|
| 795 |
+
@app.patch("/api/settings")
|
| 796 |
+
def _api_settings_patch(payload: dict = Body(...)):
|
| 797 |
+
env_path = _find_env_path()
|
| 798 |
+
if env_path is None:
|
| 799 |
+
return JSONResponse(
|
| 800 |
+
{"ok": False, "error": "Could not locate .env file"}, status_code=500,
|
| 801 |
+
)
|
| 802 |
+
env_path.parent.mkdir(parents=True, exist_ok=True)
|
| 803 |
+
# Read existing .env (if any), preserving comments + ordering.
|
| 804 |
+
lines: List[str] = []
|
| 805 |
+
if env_path.exists():
|
| 806 |
+
lines = env_path.read_text(encoding="utf-8").splitlines()
|
| 807 |
+
updates = {
|
| 808 |
+
k: str(v) for k, v in (payload or {}).items()
|
| 809 |
+
if k in _SETTINGS_KEYS and v is not None
|
| 810 |
+
}
|
| 811 |
+
if not updates:
|
| 812 |
+
return JSONResponse(
|
| 813 |
+
{"ok": False, "error": "No valid keys to update"}, status_code=400,
|
| 814 |
+
)
|
| 815 |
+
# Rewrite — replace existing keys in place, append new ones.
|
| 816 |
+
seen: set[str] = set()
|
| 817 |
+
for i, line in enumerate(lines):
|
| 818 |
+
stripped = line.lstrip()
|
| 819 |
+
if not stripped or stripped.startswith("#"):
|
| 820 |
+
continue
|
| 821 |
+
if "=" not in stripped:
|
| 822 |
+
continue
|
| 823 |
+
key = stripped.split("=", 1)[0].strip()
|
| 824 |
+
if key in updates:
|
| 825 |
+
lines[i] = f"{key}={updates[key]}"
|
| 826 |
+
seen.add(key)
|
| 827 |
+
for key, val in updates.items():
|
| 828 |
+
if key not in seen:
|
| 829 |
+
lines.append(f"{key}={val}")
|
| 830 |
+
env_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
|
| 831 |
+
# Apply to current process where it's safe to do so without restart.
|
| 832 |
+
restart_required = False
|
| 833 |
+
for key, val in updates.items():
|
| 834 |
+
os.environ[key] = val
|
| 835 |
+
if _SETTINGS_KEYS[key]["restart"]:
|
| 836 |
+
restart_required = True
|
| 837 |
+
return JSONResponse({
|
| 838 |
+
"ok": True,
|
| 839 |
+
"updated": list(updates.keys()),
|
| 840 |
+
"restart_required": restart_required,
|
| 841 |
+
"env_path": str(env_path),
|
| 842 |
+
})
|
| 843 |
+
|
| 844 |
+
|
| 845 |
+
def update_chatbot(chatbot: List[Dict[str, Any]], response: Dict[str, Any]) -> List[Dict[str, Any]]:
|
| 846 |
+
"""Update the chatbot with AdditionalOutputs."""
|
| 847 |
+
chatbot.append(response)
|
| 848 |
+
return chatbot
|
| 849 |
+
|
| 850 |
+
|
| 851 |
+
def main() -> None:
|
| 852 |
+
"""Entrypoint for the Reachy Mini receptionist app."""
|
| 853 |
+
args, _ = parse_args()
|
| 854 |
+
run(args)
|
| 855 |
+
|
| 856 |
+
|
| 857 |
+
def run(
|
| 858 |
+
args: argparse.Namespace,
|
| 859 |
+
robot: ReachyMini = None,
|
| 860 |
+
app_stop_event: Optional[threading.Event] = None,
|
| 861 |
+
settings_app: Optional[FastAPI] = None,
|
| 862 |
+
instance_path: Optional[str] = None,
|
| 863 |
+
) -> None:
|
| 864 |
+
"""Run the Reachy Mini receptionist app."""
|
| 865 |
+
# Importing runtime dependencies lazily keeps startup flexible across install contexts.
|
| 866 |
+
from reachy_mini_receptionist.moves import MovementManager
|
| 867 |
+
from reachy_mini_receptionist.console import LocalStream
|
| 868 |
+
from reachy_mini_receptionist.openai_realtime import OpenaiRealtimeHandler
|
| 869 |
+
from reachy_mini_receptionist.tools.core_tools import ToolDependencies
|
| 870 |
+
from reachy_mini_receptionist.audio.head_wobbler import HeadWobbler
|
| 871 |
+
|
| 872 |
+
# Backend switch: VOICE_BACKEND=gemini (default) uses Google Gemini
|
| 873 |
+
# Live, "openai" uses OpenAI Realtime. Default flipped from openai to
|
| 874 |
+
# gemini on 2026-05-21 because (a) Gemini Live hears Indian names far
|
| 875 |
+
# more accurately than OpenAI's gpt-4o-transcribe, (b) operators on a
|
| 876 |
+
# free Gemini API key get more headroom than on free OpenAI credits.
|
| 877 |
+
# Imported lazily so a missing SDK doesn't break the alternate backend.
|
| 878 |
+
_voice_backend = (os.getenv("VOICE_BACKEND") or "gemini").strip().lower()
|
| 879 |
+
|
| 880 |
+
logger = setup_logger(args.debug)
|
| 881 |
+
logger.info("Starting Reachy Mini Receptionist App")
|
| 882 |
+
|
| 883 |
+
if args.no_camera and args.head_tracker is not None:
|
| 884 |
+
logger.warning(
|
| 885 |
+
"Head tracking disabled: --no-camera flag is set. "
|
| 886 |
+
"Remove --no-camera to enable head tracking."
|
| 887 |
+
)
|
| 888 |
+
|
| 889 |
+
if robot is None:
|
| 890 |
+
try:
|
| 891 |
+
robot_kwargs = {}
|
| 892 |
+
if args.robot_name is not None:
|
| 893 |
+
robot_kwargs["robot_name"] = args.robot_name
|
| 894 |
+
|
| 895 |
+
logger.info("Initializing ReachyMini (SDK will auto-detect appropriate backend)")
|
| 896 |
+
robot = ReachyMini(**robot_kwargs)
|
| 897 |
+
|
| 898 |
+
except TimeoutError as e:
|
| 899 |
+
logger.error(
|
| 900 |
+
"Connection timeout: Failed to connect to Reachy Mini daemon. "
|
| 901 |
+
f"Details: {e}"
|
| 902 |
+
)
|
| 903 |
+
log_connection_troubleshooting(logger, args.robot_name)
|
| 904 |
+
sys.exit(1)
|
| 905 |
+
|
| 906 |
+
except ConnectionError as e:
|
| 907 |
+
logger.error(
|
| 908 |
+
"Connection failed: Unable to establish connection to Reachy Mini. "
|
| 909 |
+
f"Details: {e}"
|
| 910 |
+
)
|
| 911 |
+
log_connection_troubleshooting(logger, args.robot_name)
|
| 912 |
+
sys.exit(1)
|
| 913 |
+
|
| 914 |
+
except Exception as e:
|
| 915 |
+
logger.error(
|
| 916 |
+
f"Unexpected error during robot initialization: {type(e).__name__}: {e}"
|
| 917 |
+
)
|
| 918 |
+
logger.error("Please check your configuration and try again.")
|
| 919 |
+
sys.exit(1)
|
| 920 |
+
|
| 921 |
+
# Auto-enable Gradio in simulation mode (both MuJoCo for daemon and mockup-sim for desktop app)
|
| 922 |
+
status = robot.client.get_status()
|
| 923 |
+
if isinstance(status, dict):
|
| 924 |
+
simulation_enabled = status.get("simulation_enabled", False)
|
| 925 |
+
mockup_sim_enabled = status.get("mockup_sim_enabled", False)
|
| 926 |
+
else:
|
| 927 |
+
simulation_enabled = getattr(status, "simulation_enabled", False)
|
| 928 |
+
mockup_sim_enabled = getattr(status, "mockup_sim_enabled", False)
|
| 929 |
+
|
| 930 |
+
is_simulation = simulation_enabled or mockup_sim_enabled
|
| 931 |
+
|
| 932 |
+
if is_simulation and not args.gradio:
|
| 933 |
+
logger.info("Simulation mode detected. Automatically enabling gradio flag.")
|
| 934 |
+
args.gradio = True
|
| 935 |
+
|
| 936 |
+
camera_worker, _, vision_manager = handle_vision_stuff(args, robot)
|
| 937 |
+
|
| 938 |
+
# ------------------------------------------------------------------
|
| 939 |
+
# Receptionist: Face DB + Face Recognition Worker
|
| 940 |
+
# ------------------------------------------------------------------
|
| 941 |
+
from reachy_mini_receptionist.face_db import FaceDatabase
|
| 942 |
+
from reachy_mini_receptionist.face_recognition_worker import FaceRecognitionWorker
|
| 943 |
+
|
| 944 |
+
db_dir = Path(instance_path) if instance_path else Path.cwd()
|
| 945 |
+
face_db = FaceDatabase(db_dir / "guests.db")
|
| 946 |
+
face_worker = FaceRecognitionWorker(face_db, camera_worker=camera_worker)
|
| 947 |
+
|
| 948 |
+
# ------------------------------------------------------------------
|
| 949 |
+
# Receptionist: Session state machine + visitor log
|
| 950 |
+
# ------------------------------------------------------------------
|
| 951 |
+
from reachy_mini_receptionist.session_manager import SessionManager
|
| 952 |
+
from reachy_mini_receptionist.conversation_controller import ConversationController
|
| 953 |
+
from reachy_mini_receptionist.visitor_log import VisitorLog
|
| 954 |
+
|
| 955 |
+
visitor_log = VisitorLog(db_dir / "visitor_log.db")
|
| 956 |
+
session_manager = SessionManager(visitor_log=visitor_log)
|
| 957 |
+
conversation_controller = ConversationController(session_manager)
|
| 958 |
+
|
| 959 |
+
# Employee directory — SQLite-backed CRUD. Seeded from the hardcoded
|
| 960 |
+
# _SEED_EMPLOYEES list in employees.py on a brand-new install; after
|
| 961 |
+
# that, the dashboard's Employees panel is the source of truth.
|
| 962 |
+
from reachy_mini_receptionist.employees_store import EmployeeStore
|
| 963 |
+
from reachy_mini_receptionist import employees as _employees_module
|
| 964 |
+
employee_store = EmployeeStore(db_dir / "employees.db")
|
| 965 |
+
try:
|
| 966 |
+
seeded = employee_store.seed_if_empty(_employees_module._SEED_EMPLOYEES)
|
| 967 |
+
if seeded:
|
| 968 |
+
print(f"[employees] Seeded {seeded} employee(s) on first run")
|
| 969 |
+
except Exception as e:
|
| 970 |
+
print(f"[employees] Seed failed: {e}")
|
| 971 |
+
_employees_module.set_store(employee_store)
|
| 972 |
+
|
| 973 |
+
# Privacy retention — best-effort cleanup at startup. Defaults match the
|
| 974 |
+
# Day-2 plan (face TTL 30d, visit log 90d). Set the env var to 0 to
|
| 975 |
+
# disable either one. Runs once per app start — restart weekly or add a
|
| 976 |
+
# scheduled task if you keep the app up for months.
|
| 977 |
+
try:
|
| 978 |
+
face_ttl = float(os.getenv("FACE_TTL_DAYS", "30"))
|
| 979 |
+
removed_faces = face_db.cleanup_older_than(face_ttl)
|
| 980 |
+
if removed_faces:
|
| 981 |
+
print(f"[retention] Face DB: removed {removed_faces} guest(s) older than {face_ttl} days")
|
| 982 |
+
except Exception as e:
|
| 983 |
+
print(f"[retention] Face DB cleanup failed: {e}")
|
| 984 |
+
try:
|
| 985 |
+
log_retention = float(os.getenv("VISITOR_LOG_RETENTION_DAYS", "90"))
|
| 986 |
+
removed_visits = visitor_log.cleanup_older_than(log_retention)
|
| 987 |
+
if removed_visits:
|
| 988 |
+
print(f"[retention] Visitor log: removed {removed_visits} row(s) older than {log_retention} days")
|
| 989 |
+
except Exception as e:
|
| 990 |
+
print(f"[retention] Visitor log cleanup failed: {e}")
|
| 991 |
+
|
| 992 |
+
movement_manager = MovementManager(
|
| 993 |
+
current_robot=robot,
|
| 994 |
+
camera_worker=camera_worker,
|
| 995 |
+
)
|
| 996 |
+
|
| 997 |
+
head_wobbler = HeadWobbler(set_speech_offsets=movement_manager.set_speech_offsets)
|
| 998 |
+
|
| 999 |
+
deps = ToolDependencies(
|
| 1000 |
+
reachy_mini=robot,
|
| 1001 |
+
movement_manager=movement_manager,
|
| 1002 |
+
camera_worker=camera_worker,
|
| 1003 |
+
vision_manager=vision_manager,
|
| 1004 |
+
head_wobbler=head_wobbler,
|
| 1005 |
+
face_worker=face_worker,
|
| 1006 |
+
face_db=face_db,
|
| 1007 |
+
session_manager=session_manager,
|
| 1008 |
+
conversation_controller=conversation_controller,
|
| 1009 |
+
)
|
| 1010 |
+
current_file_path = os.path.dirname(os.path.abspath(__file__))
|
| 1011 |
+
logger.debug(f"Current file absolute path: {current_file_path}")
|
| 1012 |
+
chatbot = gr.Chatbot(
|
| 1013 |
+
type="messages",
|
| 1014 |
+
resizable=True,
|
| 1015 |
+
avatar_images=(
|
| 1016 |
+
os.path.join(current_file_path, "images", "user_avatar.png"),
|
| 1017 |
+
os.path.join(current_file_path, "images", "reachymini_avatar.png"),
|
| 1018 |
+
),
|
| 1019 |
+
)
|
| 1020 |
+
logger.debug(f"Chatbot avatar images: {chatbot.avatar_images}")
|
| 1021 |
+
|
| 1022 |
+
if _voice_backend == "gemini":
|
| 1023 |
+
from reachy_mini_receptionist.gemini_live import GeminiLiveHandler
|
| 1024 |
+
logger.info("VOICE_BACKEND=gemini — using Gemini Live handler")
|
| 1025 |
+
handler = GeminiLiveHandler(
|
| 1026 |
+
deps,
|
| 1027 |
+
gradio_mode=args.gradio,
|
| 1028 |
+
instance_path=instance_path,
|
| 1029 |
+
session_manager=session_manager,
|
| 1030 |
+
controller=conversation_controller,
|
| 1031 |
+
)
|
| 1032 |
+
else:
|
| 1033 |
+
logger.info("VOICE_BACKEND=openai (default) — using OpenAI Realtime handler")
|
| 1034 |
+
handler = OpenaiRealtimeHandler(
|
| 1035 |
+
deps,
|
| 1036 |
+
gradio_mode=args.gradio,
|
| 1037 |
+
instance_path=instance_path,
|
| 1038 |
+
session_manager=session_manager,
|
| 1039 |
+
controller=conversation_controller,
|
| 1040 |
+
)
|
| 1041 |
+
|
| 1042 |
+
def _face_event_forwarder(event: Dict[str, Any]) -> None:
|
| 1043 |
+
"""Route a face event through the controller, then to the LLM context."""
|
| 1044 |
+
try:
|
| 1045 |
+
conversation_controller.on_face_event(event)
|
| 1046 |
+
except Exception as exc:
|
| 1047 |
+
logger.warning("ConversationController.on_face_event raised %s: %s", type(exc).__name__, exc)
|
| 1048 |
+
handler.notify_external_face_event(event)
|
| 1049 |
+
|
| 1050 |
+
face_worker.set_face_event_callback(_face_event_forwarder)
|
| 1051 |
+
|
| 1052 |
+
# Subscribe the handler BEFORE face_worker.start() so the first
|
| 1053 |
+
# transitions aren't dropped. SessionManager buffers events that fire
|
| 1054 |
+
# before the realtime websocket is connected.
|
| 1055 |
+
session_manager.subscribe(handler.notify_session_event)
|
| 1056 |
+
|
| 1057 |
+
stream_manager: gr.Blocks | LocalStream | None = None
|
| 1058 |
+
|
| 1059 |
+
if args.gradio:
|
| 1060 |
+
api_key_textbox = gr.Textbox(
|
| 1061 |
+
label="OPENAI API Key",
|
| 1062 |
+
type="password",
|
| 1063 |
+
value=os.getenv("OPENAI_API_KEY") if not get_space() else "",
|
| 1064 |
+
)
|
| 1065 |
+
|
| 1066 |
+
from reachy_mini_receptionist.gradio_personality import PersonalityUI
|
| 1067 |
+
|
| 1068 |
+
personality_ui = PersonalityUI()
|
| 1069 |
+
personality_ui.create_components()
|
| 1070 |
+
|
| 1071 |
+
stream = Stream(
|
| 1072 |
+
handler=handler,
|
| 1073 |
+
mode="send-receive",
|
| 1074 |
+
modality="audio",
|
| 1075 |
+
additional_inputs=[
|
| 1076 |
+
chatbot,
|
| 1077 |
+
api_key_textbox,
|
| 1078 |
+
*personality_ui.additional_inputs_ordered(),
|
| 1079 |
+
],
|
| 1080 |
+
additional_outputs=[chatbot],
|
| 1081 |
+
additional_outputs_handler=update_chatbot,
|
| 1082 |
+
ui_args={"title": "Talk with Reachy Mini"},
|
| 1083 |
+
)
|
| 1084 |
+
stream_manager = stream.ui
|
| 1085 |
+
if not settings_app:
|
| 1086 |
+
app = FastAPI()
|
| 1087 |
+
else:
|
| 1088 |
+
app = settings_app
|
| 1089 |
+
|
| 1090 |
+
personality_ui.wire_events(handler, stream_manager)
|
| 1091 |
+
|
| 1092 |
+
# ------------------------------------------------------------------
|
| 1093 |
+
# Mount dashboard API endpoints BEFORE wrapping with Gradio so that
|
| 1094 |
+
# /video_feed, /api/*, /dashboard routes are available on the same app.
|
| 1095 |
+
# ------------------------------------------------------------------
|
| 1096 |
+
_mount_dashboard_api(
|
| 1097 |
+
app, face_worker, face_db, handler, session_manager, visitor_log,
|
| 1098 |
+
employee_store=employee_store, instance_path=instance_path,
|
| 1099 |
+
)
|
| 1100 |
+
logger.info("📊 Receptionist dashboard available at: http://localhost:7860/dashboard")
|
| 1101 |
+
|
| 1102 |
+
app = gr.mount_gradio_app(app, stream.ui, path="/")
|
| 1103 |
+
else:
|
| 1104 |
+
# In headless mode, wire settings_app + instance_path to console LocalStream
|
| 1105 |
+
stream_manager = LocalStream(
|
| 1106 |
+
handler,
|
| 1107 |
+
robot,
|
| 1108 |
+
settings_app=settings_app,
|
| 1109 |
+
instance_path=instance_path,
|
| 1110 |
+
)
|
| 1111 |
+
|
| 1112 |
+
# ------------------------------------------------------------------
|
| 1113 |
+
# Mount dashboard API endpoints on settings_app when headless
|
| 1114 |
+
# ------------------------------------------------------------------
|
| 1115 |
+
_mount_dashboard_api(
|
| 1116 |
+
settings_app, face_worker, face_db, handler, session_manager, visitor_log,
|
| 1117 |
+
employee_store=employee_store, instance_path=instance_path,
|
| 1118 |
+
)
|
| 1119 |
+
|
| 1120 |
+
# Each async service → its own thread/loop
|
| 1121 |
+
movement_manager.start()
|
| 1122 |
+
head_wobbler.start()
|
| 1123 |
+
face_worker.start()
|
| 1124 |
+
if camera_worker:
|
| 1125 |
+
camera_worker.start()
|
| 1126 |
+
if vision_manager:
|
| 1127 |
+
vision_manager.start()
|
| 1128 |
+
|
| 1129 |
+
def poll_stop_event() -> None:
|
| 1130 |
+
"""Poll the stop event to allow graceful shutdown."""
|
| 1131 |
+
if app_stop_event is not None:
|
| 1132 |
+
app_stop_event.wait()
|
| 1133 |
+
|
| 1134 |
+
logger.info("App stop event detected, shutting down...")
|
| 1135 |
+
try:
|
| 1136 |
+
stream_manager.close()
|
| 1137 |
+
except Exception as e:
|
| 1138 |
+
logger.error(f"Error while closing stream manager: {e}")
|
| 1139 |
+
|
| 1140 |
+
if app_stop_event:
|
| 1141 |
+
threading.Thread(target=poll_stop_event, daemon=True).start()
|
| 1142 |
+
|
| 1143 |
+
try:
|
| 1144 |
+
stream_manager.launch()
|
| 1145 |
+
except KeyboardInterrupt:
|
| 1146 |
+
logger.info("Keyboard interruption in main thread... closing server.")
|
| 1147 |
+
finally:
|
| 1148 |
+
movement_manager.stop()
|
| 1149 |
+
head_wobbler.stop()
|
| 1150 |
+
face_worker.stop()
|
| 1151 |
+
if camera_worker:
|
| 1152 |
+
camera_worker.stop()
|
| 1153 |
+
if vision_manager:
|
| 1154 |
+
vision_manager.stop()
|
| 1155 |
+
|
| 1156 |
+
# Ensure media is explicitly closed before disconnecting
|
| 1157 |
+
try:
|
| 1158 |
+
robot.media.close()
|
| 1159 |
+
except Exception as e:
|
| 1160 |
+
logger.debug(f"Error closing media during shutdown: {e}")
|
| 1161 |
+
|
| 1162 |
+
# prevent connection to keep alive some threads
|
| 1163 |
+
robot.client.disconnect()
|
| 1164 |
+
time.sleep(1)
|
| 1165 |
+
logger.info("Shutdown complete.")
|
| 1166 |
+
|
| 1167 |
+
|
| 1168 |
+
class ReachyMiniReceptionist(ReachyMiniApp): # type: ignore[misc]
|
| 1169 |
+
"""Reachy Mini Apps entry point for the receptionist app."""
|
| 1170 |
+
|
| 1171 |
+
custom_app_url = "http://0.0.0.0:7860/"
|
| 1172 |
+
dont_start_webserver = False
|
| 1173 |
+
|
| 1174 |
+
def run(self, reachy_mini: ReachyMini, stop_event: threading.Event) -> None:
|
| 1175 |
+
"""Run the Reachy Mini receptionist app."""
|
| 1176 |
+
loop = asyncio.new_event_loop()
|
| 1177 |
+
asyncio.set_event_loop(loop)
|
| 1178 |
+
|
| 1179 |
+
args, _ = parse_args()
|
| 1180 |
+
|
| 1181 |
+
# is_wireless = reachy_mini.client.get_status()["wireless_version"]
|
| 1182 |
+
# args.head_tracker = None if is_wireless else "mediapipe"
|
| 1183 |
+
|
| 1184 |
+
instance_path = self._get_instance_path().parent
|
| 1185 |
+
run(
|
| 1186 |
+
args,
|
| 1187 |
+
robot=reachy_mini,
|
| 1188 |
+
app_stop_event=stop_event,
|
| 1189 |
+
settings_app=self.settings_app,
|
| 1190 |
+
instance_path=instance_path,
|
| 1191 |
+
)
|
| 1192 |
+
|
| 1193 |
+
|
| 1194 |
+
if __name__ == "__main__":
|
| 1195 |
+
app = ReachyMiniReceptionist()
|
| 1196 |
+
try:
|
| 1197 |
+
app.wrapped_run()
|
| 1198 |
+
except KeyboardInterrupt:
|
| 1199 |
+
app.stop()
|
src/reachy_mini_receptionist/moves.py
ADDED
|
@@ -0,0 +1,849 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Movement system with sequential primary moves and additive secondary moves.
|
| 2 |
+
|
| 3 |
+
Design overview
|
| 4 |
+
- Primary moves (emotions, dances, goto, breathing) are mutually exclusive and run
|
| 5 |
+
sequentially.
|
| 6 |
+
- Secondary moves (speech sway, face tracking) are additive offsets applied on top
|
| 7 |
+
of the current primary pose.
|
| 8 |
+
- There is a single control point to the robot: `ReachyMini.set_target`.
|
| 9 |
+
- The control loop runs near 100 Hz and is phase-aligned via a monotonic clock.
|
| 10 |
+
- Idle behaviour starts an infinite `BreathingMove` after a short inactivity delay
|
| 11 |
+
unless listening is active.
|
| 12 |
+
|
| 13 |
+
Threading model
|
| 14 |
+
- A dedicated worker thread owns all real-time state and issues `set_target`
|
| 15 |
+
commands.
|
| 16 |
+
- Other threads communicate via a command queue (enqueue moves, mark activity,
|
| 17 |
+
toggle listening).
|
| 18 |
+
- Secondary offset producers set pending values guarded by locks; the worker
|
| 19 |
+
snaps them atomically.
|
| 20 |
+
|
| 21 |
+
Units and frames
|
| 22 |
+
- Secondary offsets are interpreted as metres for x/y/z and radians for
|
| 23 |
+
roll/pitch/yaw in the world frame (unless noted by `compose_world_offset`).
|
| 24 |
+
- Antennas and `body_yaw` are in radians.
|
| 25 |
+
- Head pose composition uses `compose_world_offset(primary_head, secondary_head)`;
|
| 26 |
+
the secondary offset must therefore be expressed in the world frame.
|
| 27 |
+
|
| 28 |
+
Safety
|
| 29 |
+
- Listening freezes antennas, then blends them back on unfreeze.
|
| 30 |
+
- Interpolations and blends are used to avoid jumps at all times.
|
| 31 |
+
- `set_target` errors are rate-limited in logs.
|
| 32 |
+
"""
|
| 33 |
+
|
| 34 |
+
from __future__ import annotations
|
| 35 |
+
import time
|
| 36 |
+
import logging
|
| 37 |
+
import threading
|
| 38 |
+
from queue import Empty, Queue
|
| 39 |
+
from typing import Any, Dict, Tuple
|
| 40 |
+
from collections import deque
|
| 41 |
+
from dataclasses import dataclass
|
| 42 |
+
|
| 43 |
+
import numpy as np
|
| 44 |
+
from numpy.typing import NDArray
|
| 45 |
+
|
| 46 |
+
from reachy_mini import ReachyMini
|
| 47 |
+
from reachy_mini.utils import create_head_pose
|
| 48 |
+
from reachy_mini.motion.move import Move
|
| 49 |
+
from reachy_mini.utils.interpolation import (
|
| 50 |
+
compose_world_offset,
|
| 51 |
+
linear_pose_interpolation,
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
logger = logging.getLogger(__name__)
|
| 56 |
+
|
| 57 |
+
# Configuration constants
|
| 58 |
+
CONTROL_LOOP_FREQUENCY_HZ = 100.0 # Hz - Target frequency for the movement control loop
|
| 59 |
+
|
| 60 |
+
# Type definitions
|
| 61 |
+
FullBodyPose = Tuple[NDArray[np.float32], Tuple[float, float], float] # (head_pose_4x4, antennas, body_yaw)
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
class BreathingMove(Move): # type: ignore
|
| 65 |
+
"""Breathing move with interpolation to neutral and then continuous breathing patterns."""
|
| 66 |
+
|
| 67 |
+
def __init__(
|
| 68 |
+
self,
|
| 69 |
+
interpolation_start_pose: NDArray[np.float32],
|
| 70 |
+
interpolation_start_antennas: Tuple[float, float],
|
| 71 |
+
interpolation_duration: float = 1.0,
|
| 72 |
+
):
|
| 73 |
+
"""Initialize breathing move.
|
| 74 |
+
|
| 75 |
+
Args:
|
| 76 |
+
interpolation_start_pose: 4x4 matrix of current head pose to interpolate from
|
| 77 |
+
interpolation_start_antennas: Current antenna positions to interpolate from
|
| 78 |
+
interpolation_duration: Duration of interpolation to neutral (seconds)
|
| 79 |
+
|
| 80 |
+
"""
|
| 81 |
+
self.interpolation_start_pose = interpolation_start_pose
|
| 82 |
+
self.interpolation_start_antennas = np.array(interpolation_start_antennas)
|
| 83 |
+
self.interpolation_duration = interpolation_duration
|
| 84 |
+
|
| 85 |
+
# Neutral positions for breathing base
|
| 86 |
+
self.neutral_head_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
|
| 87 |
+
self.neutral_antennas = np.array([0.0, 0.0])
|
| 88 |
+
|
| 89 |
+
# Breathing parameters
|
| 90 |
+
self.breathing_z_amplitude = 0.005 # 5mm gentle breathing
|
| 91 |
+
self.breathing_frequency = 0.1 # Hz (6 breaths per minute)
|
| 92 |
+
self.antenna_sway_amplitude = np.deg2rad(15) # 15 degrees
|
| 93 |
+
self.antenna_frequency = 0.5 # Hz (faster antenna sway)
|
| 94 |
+
|
| 95 |
+
@property
|
| 96 |
+
def duration(self) -> float:
|
| 97 |
+
"""Duration property required by official Move interface."""
|
| 98 |
+
return float("inf") # Continuous breathing (never ends naturally)
|
| 99 |
+
|
| 100 |
+
def evaluate(self, t: float) -> tuple[NDArray[np.float64] | None, NDArray[np.float64] | None, float | None]:
|
| 101 |
+
"""Evaluate breathing move at time t."""
|
| 102 |
+
if t < self.interpolation_duration:
|
| 103 |
+
# Phase 1: Interpolate to neutral base position
|
| 104 |
+
interpolation_t = t / self.interpolation_duration
|
| 105 |
+
|
| 106 |
+
# Interpolate head pose
|
| 107 |
+
head_pose = linear_pose_interpolation(
|
| 108 |
+
self.interpolation_start_pose, self.neutral_head_pose, interpolation_t,
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
# Interpolate antennas
|
| 112 |
+
antennas_interp = (
|
| 113 |
+
1 - interpolation_t
|
| 114 |
+
) * self.interpolation_start_antennas + interpolation_t * self.neutral_antennas
|
| 115 |
+
antennas = antennas_interp.astype(np.float64)
|
| 116 |
+
|
| 117 |
+
else:
|
| 118 |
+
# Phase 2: Breathing patterns from neutral base
|
| 119 |
+
breathing_time = t - self.interpolation_duration
|
| 120 |
+
|
| 121 |
+
# Gentle z-axis breathing
|
| 122 |
+
z_offset = self.breathing_z_amplitude * np.sin(2 * np.pi * self.breathing_frequency * breathing_time)
|
| 123 |
+
head_pose = create_head_pose(x=0, y=0, z=z_offset, roll=0, pitch=0, yaw=0, degrees=True, mm=False)
|
| 124 |
+
|
| 125 |
+
# Antenna sway (opposite directions)
|
| 126 |
+
antenna_sway = self.antenna_sway_amplitude * np.sin(2 * np.pi * self.antenna_frequency * breathing_time)
|
| 127 |
+
antennas = np.array([antenna_sway, -antenna_sway], dtype=np.float64)
|
| 128 |
+
|
| 129 |
+
# Return in official Move interface format: (head_pose, antennas_array, body_yaw)
|
| 130 |
+
return (head_pose, antennas, 0.0)
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
def combine_full_body(primary_pose: FullBodyPose, secondary_pose: FullBodyPose) -> FullBodyPose:
|
| 134 |
+
"""Combine primary and secondary full body poses.
|
| 135 |
+
|
| 136 |
+
Args:
|
| 137 |
+
primary_pose: (head_pose, antennas, body_yaw) - primary move
|
| 138 |
+
secondary_pose: (head_pose, antennas, body_yaw) - secondary offsets
|
| 139 |
+
|
| 140 |
+
Returns:
|
| 141 |
+
Combined full body pose (head_pose, antennas, body_yaw)
|
| 142 |
+
|
| 143 |
+
"""
|
| 144 |
+
primary_head, primary_antennas, primary_body_yaw = primary_pose
|
| 145 |
+
secondary_head, secondary_antennas, secondary_body_yaw = secondary_pose
|
| 146 |
+
|
| 147 |
+
# Combine head poses using compose_world_offset; the secondary pose must be an
|
| 148 |
+
# offset expressed in the world frame (T_off_world) applied to the absolute
|
| 149 |
+
# primary transform (T_abs).
|
| 150 |
+
combined_head = compose_world_offset(primary_head, secondary_head, reorthonormalize=True)
|
| 151 |
+
|
| 152 |
+
# Sum antennas and body_yaw
|
| 153 |
+
combined_antennas = (
|
| 154 |
+
primary_antennas[0] + secondary_antennas[0],
|
| 155 |
+
primary_antennas[1] + secondary_antennas[1],
|
| 156 |
+
)
|
| 157 |
+
combined_body_yaw = primary_body_yaw + secondary_body_yaw
|
| 158 |
+
|
| 159 |
+
return (combined_head, combined_antennas, combined_body_yaw)
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
def clone_full_body_pose(pose: FullBodyPose) -> FullBodyPose:
|
| 163 |
+
"""Create a deep copy of a full body pose tuple."""
|
| 164 |
+
head, antennas, body_yaw = pose
|
| 165 |
+
return (head.copy(), (float(antennas[0]), float(antennas[1])), float(body_yaw))
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
@dataclass
|
| 169 |
+
class MovementState:
|
| 170 |
+
"""State tracking for the movement system."""
|
| 171 |
+
|
| 172 |
+
# Primary move state
|
| 173 |
+
current_move: Move | None = None
|
| 174 |
+
move_start_time: float | None = None
|
| 175 |
+
last_activity_time: float = 0.0
|
| 176 |
+
|
| 177 |
+
# Secondary move state (offsets)
|
| 178 |
+
speech_offsets: Tuple[float, float, float, float, float, float] = (
|
| 179 |
+
0.0,
|
| 180 |
+
0.0,
|
| 181 |
+
0.0,
|
| 182 |
+
0.0,
|
| 183 |
+
0.0,
|
| 184 |
+
0.0,
|
| 185 |
+
)
|
| 186 |
+
face_tracking_offsets: Tuple[float, float, float, float, float, float] = (
|
| 187 |
+
0.0,
|
| 188 |
+
0.0,
|
| 189 |
+
0.0,
|
| 190 |
+
0.0,
|
| 191 |
+
0.0,
|
| 192 |
+
0.0,
|
| 193 |
+
)
|
| 194 |
+
|
| 195 |
+
# Status flags
|
| 196 |
+
last_primary_pose: FullBodyPose | None = None
|
| 197 |
+
|
| 198 |
+
def update_activity(self) -> None:
|
| 199 |
+
"""Update the last activity time."""
|
| 200 |
+
self.last_activity_time = time.monotonic()
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
@dataclass
|
| 204 |
+
class LoopFrequencyStats:
|
| 205 |
+
"""Track rolling loop frequency statistics."""
|
| 206 |
+
|
| 207 |
+
mean: float = 0.0
|
| 208 |
+
m2: float = 0.0
|
| 209 |
+
min_freq: float = float("inf")
|
| 210 |
+
count: int = 0
|
| 211 |
+
last_freq: float = 0.0
|
| 212 |
+
potential_freq: float = 0.0
|
| 213 |
+
|
| 214 |
+
def reset(self) -> None:
|
| 215 |
+
"""Reset accumulators while keeping the last potential frequency."""
|
| 216 |
+
self.mean = 0.0
|
| 217 |
+
self.m2 = 0.0
|
| 218 |
+
self.min_freq = float("inf")
|
| 219 |
+
self.count = 0
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
class MovementManager:
|
| 223 |
+
"""Coordinate sequential moves, additive offsets, and robot output at 100 Hz.
|
| 224 |
+
|
| 225 |
+
Responsibilities:
|
| 226 |
+
- Own a real-time loop that samples the current primary move (if any), fuses
|
| 227 |
+
secondary offsets, and calls `set_target` exactly once per tick.
|
| 228 |
+
- Start an idle `BreathingMove` after `idle_inactivity_delay` when not
|
| 229 |
+
listening and no moves are queued.
|
| 230 |
+
- Expose thread-safe APIs so other threads can enqueue moves, mark activity,
|
| 231 |
+
or feed secondary offsets without touching internal state.
|
| 232 |
+
|
| 233 |
+
Timing:
|
| 234 |
+
- All elapsed-time calculations rely on `time.monotonic()` through `self._now`
|
| 235 |
+
to avoid wall-clock jumps.
|
| 236 |
+
- The loop attempts 100 Hz
|
| 237 |
+
|
| 238 |
+
Concurrency:
|
| 239 |
+
- External threads communicate via `_command_queue` messages.
|
| 240 |
+
- Secondary offsets are staged via dirty flags guarded by locks and consumed
|
| 241 |
+
atomically inside the worker loop.
|
| 242 |
+
"""
|
| 243 |
+
|
| 244 |
+
def __init__(
|
| 245 |
+
self,
|
| 246 |
+
current_robot: ReachyMini,
|
| 247 |
+
camera_worker: "Any" = None,
|
| 248 |
+
):
|
| 249 |
+
"""Initialize movement manager."""
|
| 250 |
+
self.current_robot = current_robot
|
| 251 |
+
self.camera_worker = camera_worker
|
| 252 |
+
|
| 253 |
+
# Single timing source for durations
|
| 254 |
+
self._now = time.monotonic
|
| 255 |
+
|
| 256 |
+
# Movement state
|
| 257 |
+
self.state = MovementState()
|
| 258 |
+
self.state.last_activity_time = self._now()
|
| 259 |
+
neutral_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
|
| 260 |
+
self.state.last_primary_pose = (neutral_pose, (0.0, 0.0), 0.0)
|
| 261 |
+
|
| 262 |
+
# Move queue (primary moves)
|
| 263 |
+
self.move_queue: deque[Move] = deque()
|
| 264 |
+
|
| 265 |
+
# Configuration
|
| 266 |
+
self.idle_inactivity_delay = 0.3 # seconds
|
| 267 |
+
self.target_frequency = CONTROL_LOOP_FREQUENCY_HZ
|
| 268 |
+
self.target_period = 1.0 / self.target_frequency
|
| 269 |
+
|
| 270 |
+
self._stop_event = threading.Event()
|
| 271 |
+
self._thread: threading.Thread | None = None
|
| 272 |
+
self._is_listening = False
|
| 273 |
+
self._last_commanded_pose: FullBodyPose = clone_full_body_pose(self.state.last_primary_pose)
|
| 274 |
+
self._listening_antennas: Tuple[float, float] = self._last_commanded_pose[1]
|
| 275 |
+
self._antenna_unfreeze_blend = 1.0
|
| 276 |
+
self._antenna_blend_duration = 0.4 # seconds to blend back after listening
|
| 277 |
+
self._last_listening_blend_time = self._now()
|
| 278 |
+
self._breathing_active = False # true when breathing move is running or queued
|
| 279 |
+
self._listening_debounce_s = 0.15
|
| 280 |
+
self._last_listening_toggle_time = self._now()
|
| 281 |
+
self._last_set_target_err = 0.0
|
| 282 |
+
self._set_target_err_interval = 1.0 # seconds between error logs
|
| 283 |
+
self._set_target_err_suppressed = 0
|
| 284 |
+
|
| 285 |
+
# Cross-thread signalling
|
| 286 |
+
self._command_queue: "Queue[Tuple[str, Any]]" = Queue()
|
| 287 |
+
self._speech_offsets_lock = threading.Lock()
|
| 288 |
+
self._pending_speech_offsets: Tuple[float, float, float, float, float, float] = (
|
| 289 |
+
0.0,
|
| 290 |
+
0.0,
|
| 291 |
+
0.0,
|
| 292 |
+
0.0,
|
| 293 |
+
0.0,
|
| 294 |
+
0.0,
|
| 295 |
+
)
|
| 296 |
+
self._speech_offsets_dirty = False
|
| 297 |
+
|
| 298 |
+
self._face_offsets_lock = threading.Lock()
|
| 299 |
+
self._pending_face_offsets: Tuple[float, float, float, float, float, float] = (
|
| 300 |
+
0.0,
|
| 301 |
+
0.0,
|
| 302 |
+
0.0,
|
| 303 |
+
0.0,
|
| 304 |
+
0.0,
|
| 305 |
+
0.0,
|
| 306 |
+
)
|
| 307 |
+
self._face_offsets_dirty = False
|
| 308 |
+
|
| 309 |
+
self._shared_state_lock = threading.Lock()
|
| 310 |
+
self._shared_last_activity_time = self.state.last_activity_time
|
| 311 |
+
self._shared_is_listening = self._is_listening
|
| 312 |
+
self._status_lock = threading.Lock()
|
| 313 |
+
self._freq_stats = LoopFrequencyStats()
|
| 314 |
+
self._freq_snapshot = LoopFrequencyStats()
|
| 315 |
+
|
| 316 |
+
def queue_move(self, move: Move) -> None:
|
| 317 |
+
"""Queue a primary move to run after the currently executing one.
|
| 318 |
+
|
| 319 |
+
Thread-safe: the move is enqueued via the worker command queue so the
|
| 320 |
+
control loop remains the sole mutator of movement state.
|
| 321 |
+
"""
|
| 322 |
+
self._command_queue.put(("queue_move", move))
|
| 323 |
+
|
| 324 |
+
def clear_move_queue(self) -> None:
|
| 325 |
+
"""Stop the active move and discard any queued primary moves.
|
| 326 |
+
|
| 327 |
+
Thread-safe: executed by the worker thread via the command queue.
|
| 328 |
+
"""
|
| 329 |
+
self._command_queue.put(("clear_queue", None))
|
| 330 |
+
|
| 331 |
+
def set_speech_offsets(self, offsets: Tuple[float, float, float, float, float, float]) -> None:
|
| 332 |
+
"""Update speech-induced secondary offsets (x, y, z, roll, pitch, yaw).
|
| 333 |
+
|
| 334 |
+
Offsets are interpreted as metres for translation and radians for
|
| 335 |
+
rotation in the world frame. Thread-safe via a pending snapshot.
|
| 336 |
+
"""
|
| 337 |
+
with self._speech_offsets_lock:
|
| 338 |
+
self._pending_speech_offsets = offsets
|
| 339 |
+
self._speech_offsets_dirty = True
|
| 340 |
+
|
| 341 |
+
def set_moving_state(self, duration: float) -> None:
|
| 342 |
+
"""Mark the robot as actively moving for the provided duration.
|
| 343 |
+
|
| 344 |
+
Legacy hook used by goto helpers to keep inactivity and breathing logic
|
| 345 |
+
aware of manual motions. Thread-safe via the command queue.
|
| 346 |
+
"""
|
| 347 |
+
self._command_queue.put(("set_moving_state", duration))
|
| 348 |
+
|
| 349 |
+
def is_idle(self) -> bool:
|
| 350 |
+
"""Return True when the robot has been inactive longer than the idle delay."""
|
| 351 |
+
with self._shared_state_lock:
|
| 352 |
+
last_activity = self._shared_last_activity_time
|
| 353 |
+
listening = self._shared_is_listening
|
| 354 |
+
|
| 355 |
+
if listening:
|
| 356 |
+
return False
|
| 357 |
+
|
| 358 |
+
return self._now() - last_activity >= self.idle_inactivity_delay
|
| 359 |
+
|
| 360 |
+
def set_listening(self, listening: bool) -> None:
|
| 361 |
+
"""Enable or disable listening mode without touching shared state directly.
|
| 362 |
+
|
| 363 |
+
While listening:
|
| 364 |
+
- Antenna positions are frozen at the last commanded values.
|
| 365 |
+
- Blending is reset so that upon unfreezing the antennas return smoothly.
|
| 366 |
+
- Idle breathing is suppressed.
|
| 367 |
+
|
| 368 |
+
Thread-safe: the change is posted to the worker command queue.
|
| 369 |
+
"""
|
| 370 |
+
with self._shared_state_lock:
|
| 371 |
+
if self._shared_is_listening == listening:
|
| 372 |
+
return
|
| 373 |
+
self._command_queue.put(("set_listening", listening))
|
| 374 |
+
|
| 375 |
+
def _poll_signals(self, current_time: float) -> None:
|
| 376 |
+
"""Apply queued commands and pending offset updates."""
|
| 377 |
+
self._apply_pending_offsets()
|
| 378 |
+
|
| 379 |
+
while True:
|
| 380 |
+
try:
|
| 381 |
+
command, payload = self._command_queue.get_nowait()
|
| 382 |
+
except Empty:
|
| 383 |
+
break
|
| 384 |
+
self._handle_command(command, payload, current_time)
|
| 385 |
+
|
| 386 |
+
def _apply_pending_offsets(self) -> None:
|
| 387 |
+
"""Apply the most recent speech/face offset updates."""
|
| 388 |
+
speech_offsets: Tuple[float, float, float, float, float, float] | None = None
|
| 389 |
+
with self._speech_offsets_lock:
|
| 390 |
+
if self._speech_offsets_dirty:
|
| 391 |
+
speech_offsets = self._pending_speech_offsets
|
| 392 |
+
self._speech_offsets_dirty = False
|
| 393 |
+
|
| 394 |
+
if speech_offsets is not None:
|
| 395 |
+
self.state.speech_offsets = speech_offsets
|
| 396 |
+
self.state.update_activity()
|
| 397 |
+
|
| 398 |
+
face_offsets: Tuple[float, float, float, float, float, float] | None = None
|
| 399 |
+
with self._face_offsets_lock:
|
| 400 |
+
if self._face_offsets_dirty:
|
| 401 |
+
face_offsets = self._pending_face_offsets
|
| 402 |
+
self._face_offsets_dirty = False
|
| 403 |
+
|
| 404 |
+
if face_offsets is not None:
|
| 405 |
+
self.state.face_tracking_offsets = face_offsets
|
| 406 |
+
self.state.update_activity()
|
| 407 |
+
|
| 408 |
+
def _handle_command(self, command: str, payload: Any, current_time: float) -> None:
|
| 409 |
+
"""Handle a single cross-thread command."""
|
| 410 |
+
if command == "queue_move":
|
| 411 |
+
if isinstance(payload, Move):
|
| 412 |
+
self.move_queue.append(payload)
|
| 413 |
+
self.state.update_activity()
|
| 414 |
+
duration = getattr(payload, "duration", None)
|
| 415 |
+
if duration is not None:
|
| 416 |
+
try:
|
| 417 |
+
duration_str = f"{float(duration):.2f}"
|
| 418 |
+
except (TypeError, ValueError):
|
| 419 |
+
duration_str = str(duration)
|
| 420 |
+
else:
|
| 421 |
+
duration_str = "?"
|
| 422 |
+
logger.debug(
|
| 423 |
+
"Queued move with duration %ss, queue size: %s",
|
| 424 |
+
duration_str,
|
| 425 |
+
len(self.move_queue),
|
| 426 |
+
)
|
| 427 |
+
else:
|
| 428 |
+
logger.warning("Ignored queue_move command with invalid payload: %s", payload)
|
| 429 |
+
elif command == "clear_queue":
|
| 430 |
+
self.move_queue.clear()
|
| 431 |
+
self.state.current_move = None
|
| 432 |
+
self.state.move_start_time = None
|
| 433 |
+
self._breathing_active = False
|
| 434 |
+
logger.info("Cleared move queue and stopped current move")
|
| 435 |
+
elif command == "set_moving_state":
|
| 436 |
+
try:
|
| 437 |
+
duration = float(payload)
|
| 438 |
+
except (TypeError, ValueError):
|
| 439 |
+
logger.warning("Invalid moving state duration: %s", payload)
|
| 440 |
+
return
|
| 441 |
+
self.state.update_activity()
|
| 442 |
+
elif command == "mark_activity":
|
| 443 |
+
self.state.update_activity()
|
| 444 |
+
elif command == "set_listening":
|
| 445 |
+
desired_state = bool(payload)
|
| 446 |
+
now = self._now()
|
| 447 |
+
if now - self._last_listening_toggle_time < self._listening_debounce_s:
|
| 448 |
+
return
|
| 449 |
+
self._last_listening_toggle_time = now
|
| 450 |
+
|
| 451 |
+
if self._is_listening == desired_state:
|
| 452 |
+
return
|
| 453 |
+
|
| 454 |
+
self._is_listening = desired_state
|
| 455 |
+
self._last_listening_blend_time = now
|
| 456 |
+
if desired_state:
|
| 457 |
+
# Freeze: snapshot current commanded antennas and reset blend
|
| 458 |
+
self._listening_antennas = (
|
| 459 |
+
float(self._last_commanded_pose[1][0]),
|
| 460 |
+
float(self._last_commanded_pose[1][1]),
|
| 461 |
+
)
|
| 462 |
+
self._antenna_unfreeze_blend = 0.0
|
| 463 |
+
else:
|
| 464 |
+
# Unfreeze: restart blending from frozen pose
|
| 465 |
+
self._antenna_unfreeze_blend = 0.0
|
| 466 |
+
self.state.update_activity()
|
| 467 |
+
else:
|
| 468 |
+
logger.warning("Unknown command received by MovementManager: %s", command)
|
| 469 |
+
|
| 470 |
+
def _publish_shared_state(self) -> None:
|
| 471 |
+
"""Expose idle-related state for external threads."""
|
| 472 |
+
with self._shared_state_lock:
|
| 473 |
+
self._shared_last_activity_time = self.state.last_activity_time
|
| 474 |
+
self._shared_is_listening = self._is_listening
|
| 475 |
+
|
| 476 |
+
def _manage_move_queue(self, current_time: float) -> None:
|
| 477 |
+
"""Manage the primary move queue (sequential execution)."""
|
| 478 |
+
if self.state.current_move is None or (
|
| 479 |
+
self.state.move_start_time is not None
|
| 480 |
+
and current_time - self.state.move_start_time >= self.state.current_move.duration
|
| 481 |
+
):
|
| 482 |
+
self.state.current_move = None
|
| 483 |
+
self.state.move_start_time = None
|
| 484 |
+
|
| 485 |
+
if self.move_queue:
|
| 486 |
+
self.state.current_move = self.move_queue.popleft()
|
| 487 |
+
self.state.move_start_time = current_time
|
| 488 |
+
# Any real move cancels breathing mode flag
|
| 489 |
+
self._breathing_active = isinstance(self.state.current_move, BreathingMove)
|
| 490 |
+
logger.debug(f"Starting new move, duration: {self.state.current_move.duration}s")
|
| 491 |
+
|
| 492 |
+
def _manage_breathing(self, current_time: float) -> None:
|
| 493 |
+
"""Manage automatic breathing when idle."""
|
| 494 |
+
if (
|
| 495 |
+
self.state.current_move is None
|
| 496 |
+
and not self.move_queue
|
| 497 |
+
and not self._is_listening
|
| 498 |
+
and not self._breathing_active
|
| 499 |
+
):
|
| 500 |
+
idle_for = current_time - self.state.last_activity_time
|
| 501 |
+
if idle_for >= self.idle_inactivity_delay:
|
| 502 |
+
try:
|
| 503 |
+
# These 2 functions return the latest available sensor data from the robot, but don't perform I/O synchronously.
|
| 504 |
+
# Therefore, we accept calling them inside the control loop.
|
| 505 |
+
_, current_antennas = self.current_robot.get_current_joint_positions()
|
| 506 |
+
current_head_pose = self.current_robot.get_current_head_pose()
|
| 507 |
+
|
| 508 |
+
self._breathing_active = True
|
| 509 |
+
self.state.update_activity()
|
| 510 |
+
|
| 511 |
+
breathing_move = BreathingMove(
|
| 512 |
+
interpolation_start_pose=current_head_pose,
|
| 513 |
+
interpolation_start_antennas=current_antennas,
|
| 514 |
+
interpolation_duration=1.0,
|
| 515 |
+
)
|
| 516 |
+
self.move_queue.append(breathing_move)
|
| 517 |
+
logger.debug("Started breathing after %.1fs of inactivity", idle_for)
|
| 518 |
+
except Exception as e:
|
| 519 |
+
self._breathing_active = False
|
| 520 |
+
logger.error("Failed to start breathing: %s", e)
|
| 521 |
+
|
| 522 |
+
if isinstance(self.state.current_move, BreathingMove) and self.move_queue:
|
| 523 |
+
self.state.current_move = None
|
| 524 |
+
self.state.move_start_time = None
|
| 525 |
+
self._breathing_active = False
|
| 526 |
+
logger.debug("Stopping breathing due to new move activity")
|
| 527 |
+
|
| 528 |
+
if self.state.current_move is not None and not isinstance(self.state.current_move, BreathingMove):
|
| 529 |
+
self._breathing_active = False
|
| 530 |
+
|
| 531 |
+
def _get_primary_pose(self, current_time: float) -> FullBodyPose:
|
| 532 |
+
"""Get the primary full body pose from current move or neutral."""
|
| 533 |
+
# When a primary move is playing, sample it and cache the resulting pose
|
| 534 |
+
if self.state.current_move is not None and self.state.move_start_time is not None:
|
| 535 |
+
move_time = current_time - self.state.move_start_time
|
| 536 |
+
head, antennas, body_yaw = self.state.current_move.evaluate(move_time)
|
| 537 |
+
|
| 538 |
+
if head is None:
|
| 539 |
+
head = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
|
| 540 |
+
if antennas is None:
|
| 541 |
+
antennas = np.array([0.0, 0.0])
|
| 542 |
+
if body_yaw is None:
|
| 543 |
+
body_yaw = 0.0
|
| 544 |
+
|
| 545 |
+
antennas_tuple = (float(antennas[0]), float(antennas[1]))
|
| 546 |
+
head_copy = head.copy()
|
| 547 |
+
primary_full_body_pose = (
|
| 548 |
+
head_copy,
|
| 549 |
+
antennas_tuple,
|
| 550 |
+
float(body_yaw),
|
| 551 |
+
)
|
| 552 |
+
|
| 553 |
+
self.state.last_primary_pose = clone_full_body_pose(primary_full_body_pose)
|
| 554 |
+
# Otherwise reuse the last primary pose so we avoid jumps between moves
|
| 555 |
+
elif self.state.last_primary_pose is not None:
|
| 556 |
+
primary_full_body_pose = clone_full_body_pose(self.state.last_primary_pose)
|
| 557 |
+
else:
|
| 558 |
+
neutral_head_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
|
| 559 |
+
primary_full_body_pose = (neutral_head_pose, (0.0, 0.0), 0.0)
|
| 560 |
+
self.state.last_primary_pose = clone_full_body_pose(primary_full_body_pose)
|
| 561 |
+
|
| 562 |
+
return primary_full_body_pose
|
| 563 |
+
|
| 564 |
+
def _get_secondary_pose(self) -> FullBodyPose:
|
| 565 |
+
"""Get the secondary full body pose from speech and face tracking offsets."""
|
| 566 |
+
# Combine speech sway offsets + face tracking offsets for secondary pose
|
| 567 |
+
secondary_offsets = [
|
| 568 |
+
self.state.speech_offsets[0] + self.state.face_tracking_offsets[0],
|
| 569 |
+
self.state.speech_offsets[1] + self.state.face_tracking_offsets[1],
|
| 570 |
+
self.state.speech_offsets[2] + self.state.face_tracking_offsets[2],
|
| 571 |
+
self.state.speech_offsets[3] + self.state.face_tracking_offsets[3],
|
| 572 |
+
self.state.speech_offsets[4] + self.state.face_tracking_offsets[4],
|
| 573 |
+
self.state.speech_offsets[5] + self.state.face_tracking_offsets[5],
|
| 574 |
+
]
|
| 575 |
+
|
| 576 |
+
secondary_head_pose = create_head_pose(
|
| 577 |
+
x=secondary_offsets[0],
|
| 578 |
+
y=secondary_offsets[1],
|
| 579 |
+
z=secondary_offsets[2],
|
| 580 |
+
roll=secondary_offsets[3],
|
| 581 |
+
pitch=secondary_offsets[4],
|
| 582 |
+
yaw=secondary_offsets[5],
|
| 583 |
+
degrees=False,
|
| 584 |
+
mm=False,
|
| 585 |
+
)
|
| 586 |
+
return (secondary_head_pose, (0.0, 0.0), 0.0)
|
| 587 |
+
|
| 588 |
+
def _compose_full_body_pose(self, current_time: float) -> FullBodyPose:
|
| 589 |
+
"""Compose primary and secondary poses into a single command pose."""
|
| 590 |
+
primary = self._get_primary_pose(current_time)
|
| 591 |
+
secondary = self._get_secondary_pose()
|
| 592 |
+
return combine_full_body(primary, secondary)
|
| 593 |
+
|
| 594 |
+
def _update_primary_motion(self, current_time: float) -> None:
|
| 595 |
+
"""Advance queue state and idle behaviours for this tick."""
|
| 596 |
+
self._manage_move_queue(current_time)
|
| 597 |
+
self._manage_breathing(current_time)
|
| 598 |
+
|
| 599 |
+
def _calculate_blended_antennas(self, target_antennas: Tuple[float, float]) -> Tuple[float, float]:
|
| 600 |
+
"""Blend target antennas with listening freeze state and update blending."""
|
| 601 |
+
now = self._now()
|
| 602 |
+
listening = self._is_listening
|
| 603 |
+
listening_antennas = self._listening_antennas
|
| 604 |
+
blend = self._antenna_unfreeze_blend
|
| 605 |
+
blend_duration = self._antenna_blend_duration
|
| 606 |
+
last_update = self._last_listening_blend_time
|
| 607 |
+
self._last_listening_blend_time = now
|
| 608 |
+
|
| 609 |
+
if listening:
|
| 610 |
+
antennas_cmd = listening_antennas
|
| 611 |
+
new_blend = 0.0
|
| 612 |
+
else:
|
| 613 |
+
dt = max(0.0, now - last_update)
|
| 614 |
+
if blend_duration <= 0:
|
| 615 |
+
new_blend = 1.0
|
| 616 |
+
else:
|
| 617 |
+
new_blend = min(1.0, blend + dt / blend_duration)
|
| 618 |
+
antennas_cmd = (
|
| 619 |
+
listening_antennas[0] * (1.0 - new_blend) + target_antennas[0] * new_blend,
|
| 620 |
+
listening_antennas[1] * (1.0 - new_blend) + target_antennas[1] * new_blend,
|
| 621 |
+
)
|
| 622 |
+
|
| 623 |
+
if listening:
|
| 624 |
+
self._antenna_unfreeze_blend = 0.0
|
| 625 |
+
else:
|
| 626 |
+
self._antenna_unfreeze_blend = new_blend
|
| 627 |
+
if new_blend >= 1.0:
|
| 628 |
+
self._listening_antennas = (
|
| 629 |
+
float(target_antennas[0]),
|
| 630 |
+
float(target_antennas[1]),
|
| 631 |
+
)
|
| 632 |
+
|
| 633 |
+
return antennas_cmd
|
| 634 |
+
|
| 635 |
+
def _issue_control_command(self, head: NDArray[np.float32], antennas: Tuple[float, float], body_yaw: float) -> None:
|
| 636 |
+
"""Send the fused pose to the robot with throttled error logging."""
|
| 637 |
+
try:
|
| 638 |
+
self.current_robot.set_target(head=head, antennas=antennas, body_yaw=body_yaw)
|
| 639 |
+
except Exception as e:
|
| 640 |
+
now = self._now()
|
| 641 |
+
if now - self._last_set_target_err >= self._set_target_err_interval:
|
| 642 |
+
msg = f"Failed to set robot target: {e}"
|
| 643 |
+
if self._set_target_err_suppressed:
|
| 644 |
+
msg += f" (suppressed {self._set_target_err_suppressed} repeats)"
|
| 645 |
+
self._set_target_err_suppressed = 0
|
| 646 |
+
logger.error(msg)
|
| 647 |
+
self._last_set_target_err = now
|
| 648 |
+
else:
|
| 649 |
+
self._set_target_err_suppressed += 1
|
| 650 |
+
else:
|
| 651 |
+
with self._status_lock:
|
| 652 |
+
self._last_commanded_pose = clone_full_body_pose((head, antennas, body_yaw))
|
| 653 |
+
|
| 654 |
+
def _update_frequency_stats(
|
| 655 |
+
self, loop_start: float, prev_loop_start: float, stats: LoopFrequencyStats,
|
| 656 |
+
) -> LoopFrequencyStats:
|
| 657 |
+
"""Update frequency statistics based on the current loop start time."""
|
| 658 |
+
period = loop_start - prev_loop_start
|
| 659 |
+
if period > 0:
|
| 660 |
+
stats.last_freq = 1.0 / period
|
| 661 |
+
stats.count += 1
|
| 662 |
+
delta = stats.last_freq - stats.mean
|
| 663 |
+
stats.mean += delta / stats.count
|
| 664 |
+
stats.m2 += delta * (stats.last_freq - stats.mean)
|
| 665 |
+
stats.min_freq = min(stats.min_freq, stats.last_freq)
|
| 666 |
+
return stats
|
| 667 |
+
|
| 668 |
+
def _schedule_next_tick(self, loop_start: float, stats: LoopFrequencyStats) -> Tuple[float, LoopFrequencyStats]:
|
| 669 |
+
"""Compute sleep time to maintain target frequency and update potential freq."""
|
| 670 |
+
computation_time = self._now() - loop_start
|
| 671 |
+
stats.potential_freq = 1.0 / computation_time if computation_time > 0 else float("inf")
|
| 672 |
+
sleep_time = max(0.0, self.target_period - computation_time)
|
| 673 |
+
return sleep_time, stats
|
| 674 |
+
|
| 675 |
+
def _record_frequency_snapshot(self, stats: LoopFrequencyStats) -> None:
|
| 676 |
+
"""Store a thread-safe snapshot of current frequency statistics."""
|
| 677 |
+
with self._status_lock:
|
| 678 |
+
self._freq_snapshot = LoopFrequencyStats(
|
| 679 |
+
mean=stats.mean,
|
| 680 |
+
m2=stats.m2,
|
| 681 |
+
min_freq=stats.min_freq,
|
| 682 |
+
count=stats.count,
|
| 683 |
+
last_freq=stats.last_freq,
|
| 684 |
+
potential_freq=stats.potential_freq,
|
| 685 |
+
)
|
| 686 |
+
|
| 687 |
+
def _maybe_log_frequency(self, loop_count: int, print_interval_loops: int, stats: LoopFrequencyStats) -> None:
|
| 688 |
+
"""Emit frequency telemetry when enough loops have elapsed."""
|
| 689 |
+
if loop_count % print_interval_loops != 0 or stats.count == 0:
|
| 690 |
+
return
|
| 691 |
+
|
| 692 |
+
variance = stats.m2 / stats.count if stats.count > 0 else 0.0
|
| 693 |
+
lowest = stats.min_freq if stats.min_freq != float("inf") else 0.0
|
| 694 |
+
logger.debug(
|
| 695 |
+
"Loop freq - avg: %.2fHz, variance: %.4f, min: %.2fHz, last: %.2fHz, potential: %.2fHz, target: %.1fHz",
|
| 696 |
+
stats.mean,
|
| 697 |
+
variance,
|
| 698 |
+
lowest,
|
| 699 |
+
stats.last_freq,
|
| 700 |
+
stats.potential_freq,
|
| 701 |
+
self.target_frequency,
|
| 702 |
+
)
|
| 703 |
+
stats.reset()
|
| 704 |
+
|
| 705 |
+
def _update_face_tracking(self, current_time: float) -> None:
|
| 706 |
+
"""Get face tracking offsets from camera worker thread."""
|
| 707 |
+
if self.camera_worker is not None:
|
| 708 |
+
# Get face tracking offsets from camera worker thread
|
| 709 |
+
offsets = self.camera_worker.get_face_tracking_offsets()
|
| 710 |
+
self.state.face_tracking_offsets = offsets
|
| 711 |
+
else:
|
| 712 |
+
# No camera worker, use neutral offsets
|
| 713 |
+
self.state.face_tracking_offsets = (0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
|
| 714 |
+
|
| 715 |
+
def start(self) -> None:
|
| 716 |
+
"""Start the worker thread that drives the 100 Hz control loop."""
|
| 717 |
+
if self._thread is not None and self._thread.is_alive():
|
| 718 |
+
logger.warning("Move worker already running; start() ignored")
|
| 719 |
+
return
|
| 720 |
+
self._stop_event.clear()
|
| 721 |
+
self._thread = threading.Thread(target=self.working_loop, daemon=True)
|
| 722 |
+
self._thread.start()
|
| 723 |
+
logger.debug("Move worker started")
|
| 724 |
+
|
| 725 |
+
def stop(self) -> None:
|
| 726 |
+
"""Request the worker thread to stop and wait for it to exit.
|
| 727 |
+
|
| 728 |
+
Before stopping, resets the robot to a neutral position.
|
| 729 |
+
"""
|
| 730 |
+
if self._thread is None or not self._thread.is_alive():
|
| 731 |
+
logger.debug("Move worker not running; stop() ignored")
|
| 732 |
+
return
|
| 733 |
+
|
| 734 |
+
logger.info("Stopping movement manager and resetting to neutral position...")
|
| 735 |
+
|
| 736 |
+
# Clear any queued moves and stop current move
|
| 737 |
+
self.clear_move_queue()
|
| 738 |
+
|
| 739 |
+
# Stop the worker thread first so it doesn't interfere
|
| 740 |
+
self._stop_event.set()
|
| 741 |
+
if self._thread is not None:
|
| 742 |
+
self._thread.join()
|
| 743 |
+
self._thread = None
|
| 744 |
+
logger.debug("Move worker stopped")
|
| 745 |
+
|
| 746 |
+
# Reset to neutral position using goto_target (same approach as wake_up)
|
| 747 |
+
try:
|
| 748 |
+
neutral_head_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
|
| 749 |
+
neutral_antennas = [0.0, 0.0]
|
| 750 |
+
neutral_body_yaw = 0.0
|
| 751 |
+
|
| 752 |
+
# Use goto_target directly on the robot
|
| 753 |
+
self.current_robot.goto_target(
|
| 754 |
+
head=neutral_head_pose,
|
| 755 |
+
antennas=neutral_antennas,
|
| 756 |
+
duration=2.0,
|
| 757 |
+
body_yaw=neutral_body_yaw,
|
| 758 |
+
)
|
| 759 |
+
|
| 760 |
+
logger.info("Reset to neutral position completed")
|
| 761 |
+
|
| 762 |
+
except Exception as e:
|
| 763 |
+
logger.error(f"Failed to reset to neutral position: {e}")
|
| 764 |
+
|
| 765 |
+
def get_status(self) -> Dict[str, Any]:
|
| 766 |
+
"""Return a lightweight status snapshot for observability."""
|
| 767 |
+
with self._status_lock:
|
| 768 |
+
pose_snapshot = clone_full_body_pose(self._last_commanded_pose)
|
| 769 |
+
freq_snapshot = LoopFrequencyStats(
|
| 770 |
+
mean=self._freq_snapshot.mean,
|
| 771 |
+
m2=self._freq_snapshot.m2,
|
| 772 |
+
min_freq=self._freq_snapshot.min_freq,
|
| 773 |
+
count=self._freq_snapshot.count,
|
| 774 |
+
last_freq=self._freq_snapshot.last_freq,
|
| 775 |
+
potential_freq=self._freq_snapshot.potential_freq,
|
| 776 |
+
)
|
| 777 |
+
|
| 778 |
+
head_matrix = pose_snapshot[0].tolist() if pose_snapshot else None
|
| 779 |
+
antennas = pose_snapshot[1] if pose_snapshot else None
|
| 780 |
+
body_yaw = pose_snapshot[2] if pose_snapshot else None
|
| 781 |
+
|
| 782 |
+
return {
|
| 783 |
+
"queue_size": len(self.move_queue),
|
| 784 |
+
"is_listening": self._is_listening,
|
| 785 |
+
"breathing_active": self._breathing_active,
|
| 786 |
+
"last_commanded_pose": {
|
| 787 |
+
"head": head_matrix,
|
| 788 |
+
"antennas": antennas,
|
| 789 |
+
"body_yaw": body_yaw,
|
| 790 |
+
},
|
| 791 |
+
"loop_frequency": {
|
| 792 |
+
"last": freq_snapshot.last_freq,
|
| 793 |
+
"mean": freq_snapshot.mean,
|
| 794 |
+
"min": freq_snapshot.min_freq,
|
| 795 |
+
"potential": freq_snapshot.potential_freq,
|
| 796 |
+
"samples": freq_snapshot.count,
|
| 797 |
+
},
|
| 798 |
+
}
|
| 799 |
+
|
| 800 |
+
def working_loop(self) -> None:
|
| 801 |
+
"""Control loop main movements - reproduces main_works.py control architecture.
|
| 802 |
+
|
| 803 |
+
Single set_target() call with pose fusion.
|
| 804 |
+
"""
|
| 805 |
+
logger.debug("Starting enhanced movement control loop (100Hz)")
|
| 806 |
+
|
| 807 |
+
loop_count = 0
|
| 808 |
+
prev_loop_start = self._now()
|
| 809 |
+
print_interval_loops = max(1, int(self.target_frequency * 2))
|
| 810 |
+
freq_stats = self._freq_stats
|
| 811 |
+
|
| 812 |
+
while not self._stop_event.is_set():
|
| 813 |
+
loop_start = self._now()
|
| 814 |
+
loop_count += 1
|
| 815 |
+
|
| 816 |
+
if loop_count > 1:
|
| 817 |
+
freq_stats = self._update_frequency_stats(loop_start, prev_loop_start, freq_stats)
|
| 818 |
+
prev_loop_start = loop_start
|
| 819 |
+
|
| 820 |
+
# 1) Poll external commands and apply pending offsets (atomic snapshot)
|
| 821 |
+
self._poll_signals(loop_start)
|
| 822 |
+
|
| 823 |
+
# 2) Manage the primary move queue (start new move, end finished move, breathing)
|
| 824 |
+
self._update_primary_motion(loop_start)
|
| 825 |
+
|
| 826 |
+
# 3) Update vision-based secondary offsets
|
| 827 |
+
self._update_face_tracking(loop_start)
|
| 828 |
+
|
| 829 |
+
# 4) Build primary and secondary full-body poses, then fuse them
|
| 830 |
+
head, antennas, body_yaw = self._compose_full_body_pose(loop_start)
|
| 831 |
+
|
| 832 |
+
# 5) Apply listening antenna freeze or blend-back
|
| 833 |
+
antennas_cmd = self._calculate_blended_antennas(antennas)
|
| 834 |
+
|
| 835 |
+
# 6) Single set_target call - the only control point
|
| 836 |
+
self._issue_control_command(head, antennas_cmd, body_yaw)
|
| 837 |
+
|
| 838 |
+
# 7) Adaptive sleep to align to next tick, then publish shared state
|
| 839 |
+
sleep_time, freq_stats = self._schedule_next_tick(loop_start, freq_stats)
|
| 840 |
+
self._publish_shared_state()
|
| 841 |
+
self._record_frequency_snapshot(freq_stats)
|
| 842 |
+
|
| 843 |
+
# 8) Periodic telemetry on loop frequency
|
| 844 |
+
self._maybe_log_frequency(loop_count, print_interval_loops, freq_stats)
|
| 845 |
+
|
| 846 |
+
if sleep_time > 0:
|
| 847 |
+
time.sleep(sleep_time)
|
| 848 |
+
|
| 849 |
+
logger.debug("Movement control loop stopped")
|
src/reachy_mini_receptionist/name_normalizer.py
ADDED
|
@@ -0,0 +1,228 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Gemini-backed name disambiguation.
|
| 2 |
+
|
| 3 |
+
OpenAI's gpt-4o-transcribe mishears short non-English names (Arav -> Lee Win,
|
| 4 |
+
Krishna -> Christina, Mukul -> Michael). The realtime audio loop is locked
|
| 5 |
+
to the OpenAI stack — too risky to swap mid-pilot — but we can run the
|
| 6 |
+
transcribed candidate through Gemini as a cheap post-processing step to
|
| 7 |
+
recover the intended name.
|
| 8 |
+
|
| 9 |
+
Pipeline:
|
| 10 |
+
visitor says "Arav"
|
| 11 |
+
-> OpenAI STT returns "Lee Win"
|
| 12 |
+
-> normalize_name("Lee Win", candidates=[Henry, Krishna, Arav, ...])
|
| 13 |
+
-> asks Gemini: "Which of these is closest phonetically to 'Lee Win'?"
|
| 14 |
+
-> returns "Arav" if confident, original "Lee Win" if not
|
| 15 |
+
-> register_guest("Arav", confirmed=true) — saves the right face
|
| 16 |
+
|
| 17 |
+
Fails open: when GEMINI_API_KEY is unset, the http call errors, or Gemini
|
| 18 |
+
returns garbage, we return the original transcribed name unchanged. The
|
| 19 |
+
worst case is "same behaviour as before".
|
| 20 |
+
"""
|
| 21 |
+
from __future__ import annotations
|
| 22 |
+
|
| 23 |
+
import json
|
| 24 |
+
import logging
|
| 25 |
+
import os
|
| 26 |
+
from typing import Iterable, Optional
|
| 27 |
+
|
| 28 |
+
import httpx
|
| 29 |
+
|
| 30 |
+
logger = logging.getLogger(__name__)
|
| 31 |
+
|
| 32 |
+
# Default to gemini-3.5-flash (Google's latest Flash-tier model,
|
| 33 |
+
# launched at I/O 2026 — faster and more accurate than the 2.5
|
| 34 |
+
# variants on short-prompt disambiguation). Override via the
|
| 35 |
+
# GEMINI_MODEL env var without restarting (read on each call) so the
|
| 36 |
+
# operator can fall back to 2.5-flash / 2.5-flash-lite if needed.
|
| 37 |
+
_DEFAULT_GEMINI_MODEL = "gemini-3.5-flash"
|
| 38 |
+
_GEMINI_URL_TEMPLATE = (
|
| 39 |
+
"https://generativelanguage.googleapis.com/v1beta/models/"
|
| 40 |
+
"{model}:generateContent"
|
| 41 |
+
)
|
| 42 |
+
_HTTP_TIMEOUT_SECONDS = 4.0
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def _gemini_url() -> str:
|
| 46 |
+
model = os.getenv("GEMINI_MODEL", "").strip() or _DEFAULT_GEMINI_MODEL
|
| 47 |
+
return _GEMINI_URL_TEMPLATE.format(model=model)
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def _build_prompt(transcribed: str, candidates: list[str]) -> str:
|
| 51 |
+
cand_list = ", ".join(candidates) if candidates else "(none)"
|
| 52 |
+
return (
|
| 53 |
+
"You are a speech-recognition error corrector for a reception desk. "
|
| 54 |
+
f"A visitor said their name and the speech-to-text returned: '{transcribed}'. "
|
| 55 |
+
f"The visitor is likely one of these scheduled or known people: {cand_list}. "
|
| 56 |
+
"Return a candidate name ONLY when it is very phonetically close to "
|
| 57 |
+
"the transcribed value — sharing most of the syllables, vowel sounds, "
|
| 58 |
+
"or stressed sound. If NO candidate is a clear phonetic match, you "
|
| 59 |
+
"MUST return the original transcribed value unchanged. Do not stretch "
|
| 60 |
+
"for a match. When in doubt, keep the original. "
|
| 61 |
+
"Good corrections (close phonetic match): "
|
| 62 |
+
"'Lee Win' -> 'Arav' (sounds like 'Le-win' ~ 'A-rav'? No — return 'Lee Win'). "
|
| 63 |
+
"'Christina' -> 'Krishna' (yes, similar syllables). "
|
| 64 |
+
"'Michael' -> 'Mukul' (yes, M-K consonants and similar vowels). "
|
| 65 |
+
"Bad corrections (NEVER do these — return the original instead): "
|
| 66 |
+
"'Bruh' -> stays 'Bruh' (no candidate sounds like Bruh). "
|
| 67 |
+
"'Bob' -> stays 'Bob' (no phonetic match in the list). "
|
| 68 |
+
"'Sarah' -> stays 'Sarah' (if Sarah is in the list, fine; if not, KEEP IT). "
|
| 69 |
+
"Return only the chosen name, no extra words, no quotes, no punctuation."
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def _phonetic_similarity(a: str, b: str) -> float:
|
| 74 |
+
"""Cheap phonetic similarity score in [0, 1].
|
| 75 |
+
|
| 76 |
+
Uses Python's stdlib difflib SequenceMatcher on the lowercased
|
| 77 |
+
strings. Not phonetic-perfect (no Soundex / Metaphone) but good
|
| 78 |
+
enough to reject "Henry" -> "Arjun" type hallucinations from
|
| 79 |
+
Gemini. Anything below 0.5 is "not really similar".
|
| 80 |
+
"""
|
| 81 |
+
from difflib import SequenceMatcher
|
| 82 |
+
return SequenceMatcher(None, a.lower(), b.lower()).ratio()
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
def normalize_name(
|
| 86 |
+
transcribed: str,
|
| 87 |
+
candidates: Iterable[str],
|
| 88 |
+
api_key: Optional[str] = None,
|
| 89 |
+
) -> str:
|
| 90 |
+
"""Map a transcribed name to the closest candidate via Gemini.
|
| 91 |
+
|
| 92 |
+
Returns the original ``transcribed`` value unchanged when:
|
| 93 |
+
- GEMINI_API_KEY is unset
|
| 94 |
+
- the http call fails / times out / returns non-2xx
|
| 95 |
+
- the response is empty or obviously wrong
|
| 96 |
+
- Gemini's choice is not phonetically similar to the transcribed value
|
| 97 |
+
"""
|
| 98 |
+
raw = (transcribed or "").strip()
|
| 99 |
+
if not raw:
|
| 100 |
+
return raw
|
| 101 |
+
|
| 102 |
+
# ──────────────────────────────────────────────────────────────────
|
| 103 |
+
# DISABLED (2026-05-21): Gemini-3.5-flash name normalizer.
|
| 104 |
+
# Built when OpenAI's gpt-4o-transcribe was mishearing short names
|
| 105 |
+
# (Arav -> Lee Win). Gemini Live hears names cleanly natively, so the
|
| 106 |
+
# extra REST call to gemini-3.5-flash before every register_guest /
|
| 107 |
+
# lookup_employee was pure latency (~1-2s per call) plus 429s on the
|
| 108 |
+
# free tier. Re-enable by removing this early-return when switching
|
| 109 |
+
# VOICE_BACKEND back to "openai".
|
| 110 |
+
# ──────────────────────────────────────────────────────────────────
|
| 111 |
+
return raw
|
| 112 |
+
|
| 113 |
+
key = api_key or os.getenv("GEMINI_API_KEY", "").strip()
|
| 114 |
+
if not key:
|
| 115 |
+
logger.debug("normalize_name: no GEMINI_API_KEY, returning original")
|
| 116 |
+
return raw
|
| 117 |
+
|
| 118 |
+
cand_list = [c for c in (candidates or []) if c and isinstance(c, str)]
|
| 119 |
+
if not cand_list:
|
| 120 |
+
return raw
|
| 121 |
+
|
| 122 |
+
# Pre-filter: only ask Gemini about candidates with at least some
|
| 123 |
+
# surface similarity to the transcribed name. Cuts API cost AND
|
| 124 |
+
# prevents Gemini from being prompted with totally-unrelated options.
|
| 125 |
+
_MIN_SIMILARITY = 0.4
|
| 126 |
+
similar = [c for c in cand_list if _phonetic_similarity(raw, c) >= _MIN_SIMILARITY]
|
| 127 |
+
if not similar:
|
| 128 |
+
logger.debug(
|
| 129 |
+
"normalize_name: no candidate similar enough to %r (best=%.2f) — keeping original",
|
| 130 |
+
raw, max((_phonetic_similarity(raw, c) for c in cand_list), default=0.0),
|
| 131 |
+
)
|
| 132 |
+
return raw
|
| 133 |
+
cand_list = similar
|
| 134 |
+
|
| 135 |
+
payload = {
|
| 136 |
+
"contents": [{
|
| 137 |
+
"role": "user",
|
| 138 |
+
"parts": [{"text": _build_prompt(raw, cand_list)}],
|
| 139 |
+
}],
|
| 140 |
+
"generationConfig": {
|
| 141 |
+
"temperature": 0.0,
|
| 142 |
+
"maxOutputTokens": 24,
|
| 143 |
+
},
|
| 144 |
+
}
|
| 145 |
+
try:
|
| 146 |
+
resp = httpx.post(
|
| 147 |
+
f"{_gemini_url()}?key={key}",
|
| 148 |
+
json=payload,
|
| 149 |
+
timeout=_HTTP_TIMEOUT_SECONDS,
|
| 150 |
+
)
|
| 151 |
+
if resp.status_code >= 400:
|
| 152 |
+
logger.debug("normalize_name: gemini HTTP %d: %s", resp.status_code, resp.text[:200])
|
| 153 |
+
return raw
|
| 154 |
+
data = resp.json()
|
| 155 |
+
except Exception as e:
|
| 156 |
+
logger.debug("normalize_name: gemini call failed (%s)", e)
|
| 157 |
+
return raw
|
| 158 |
+
|
| 159 |
+
try:
|
| 160 |
+
candidate_text = (
|
| 161 |
+
data["candidates"][0]["content"]["parts"][0]["text"]
|
| 162 |
+
).strip()
|
| 163 |
+
except Exception:
|
| 164 |
+
logger.debug("normalize_name: unexpected gemini response shape: %s", json.dumps(data)[:200])
|
| 165 |
+
return raw
|
| 166 |
+
|
| 167 |
+
# Sanitize: Gemini sometimes adds punctuation despite the instruction.
|
| 168 |
+
for ch in ('"', "'", "."):
|
| 169 |
+
candidate_text = candidate_text.replace(ch, "")
|
| 170 |
+
candidate_text = candidate_text.strip()
|
| 171 |
+
if not candidate_text:
|
| 172 |
+
return raw
|
| 173 |
+
|
| 174 |
+
# Only trust a Gemini reply that is either (a) exactly one of the
|
| 175 |
+
# candidates (case-insensitive) or (b) the original transcript. This
|
| 176 |
+
# prevents Gemini from hallucinating a new name we never gave it.
|
| 177 |
+
cand_lower = {c.lower(): c for c in cand_list}
|
| 178 |
+
g_lower = candidate_text.lower()
|
| 179 |
+
if g_lower in cand_lower:
|
| 180 |
+
chosen = cand_lower[g_lower]
|
| 181 |
+
if chosen.lower() != raw.lower():
|
| 182 |
+
logger.info(
|
| 183 |
+
"normalize_name: '%s' -> '%s' (gemini disambiguation)",
|
| 184 |
+
raw, chosen,
|
| 185 |
+
)
|
| 186 |
+
return chosen
|
| 187 |
+
if g_lower == raw.lower():
|
| 188 |
+
return raw
|
| 189 |
+
logger.debug(
|
| 190 |
+
"normalize_name: gemini returned %r which isn't in candidates %r — keeping original %r",
|
| 191 |
+
candidate_text, cand_list, raw,
|
| 192 |
+
)
|
| 193 |
+
return raw
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
def collect_known_names() -> list[str]:
|
| 197 |
+
"""Return employee names + aliases for the candidate list.
|
| 198 |
+
|
| 199 |
+
Used by ``register_guest`` (visitor name disambiguation against
|
| 200 |
+
employees, NOT against calendar visitors) and ``lookup_employee``
|
| 201 |
+
(host name -> directory entries). Calendar visitor names are
|
| 202 |
+
excluded — including them was causing Gemini to map every
|
| 203 |
+
transcribed visitor name onto the next-scheduled visitor.
|
| 204 |
+
|
| 205 |
+
Failures are silenced so a degraded source never blocks the lookup.
|
| 206 |
+
"""
|
| 207 |
+
names: list[str] = []
|
| 208 |
+
try:
|
| 209 |
+
from reachy_mini_receptionist import employees
|
| 210 |
+
for emp in employees.get_all_employees():
|
| 211 |
+
n = (emp.get("name") or "").strip()
|
| 212 |
+
if n:
|
| 213 |
+
names.append(n)
|
| 214 |
+
for alias in (emp.get("aliases") or []):
|
| 215 |
+
a = (alias or "").strip()
|
| 216 |
+
if a:
|
| 217 |
+
names.append(a)
|
| 218 |
+
except Exception:
|
| 219 |
+
pass
|
| 220 |
+
seen: set[str] = set()
|
| 221 |
+
unique: list[str] = []
|
| 222 |
+
for n in names:
|
| 223 |
+
k = n.lower()
|
| 224 |
+
if k in seen:
|
| 225 |
+
continue
|
| 226 |
+
seen.add(k)
|
| 227 |
+
unique.append(n)
|
| 228 |
+
return unique
|
src/reachy_mini_receptionist/openai_realtime.py
ADDED
|
@@ -0,0 +1,1839 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import os
|
| 3 |
+
import uuid
|
| 4 |
+
import base64
|
| 5 |
+
import random
|
| 6 |
+
import asyncio
|
| 7 |
+
import logging
|
| 8 |
+
import threading
|
| 9 |
+
import time
|
| 10 |
+
from typing import Any, Final, Tuple, Literal, Optional
|
| 11 |
+
from pathlib import Path
|
| 12 |
+
from datetime import datetime
|
| 13 |
+
|
| 14 |
+
import cv2
|
| 15 |
+
import numpy as np
|
| 16 |
+
import gradio as gr
|
| 17 |
+
from openai import AsyncOpenAI
|
| 18 |
+
from fastrtc import AdditionalOutputs, AsyncStreamHandler, wait_for_item, audio_to_int16
|
| 19 |
+
from numpy.typing import NDArray
|
| 20 |
+
from scipy.signal import resample
|
| 21 |
+
from websockets.exceptions import ConnectionClosedError
|
| 22 |
+
|
| 23 |
+
from reachy_mini_receptionist.config import config
|
| 24 |
+
from reachy_mini_receptionist.prompts import get_session_voice, get_session_instructions
|
| 25 |
+
from reachy_mini_receptionist.tools.core_tools import (
|
| 26 |
+
ToolDependencies,
|
| 27 |
+
get_tool_specs,
|
| 28 |
+
)
|
| 29 |
+
from reachy_mini_receptionist.tools.background_tool_manager import (
|
| 30 |
+
ToolCallRoutine,
|
| 31 |
+
ToolNotification,
|
| 32 |
+
BackgroundToolManager,
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
logger = logging.getLogger(__name__)
|
| 37 |
+
|
| 38 |
+
OPEN_AI_INPUT_SAMPLE_RATE: Final[Literal[24000]] = 24000
|
| 39 |
+
OPEN_AI_OUTPUT_SAMPLE_RATE: Final[Literal[24000]] = 24000
|
| 40 |
+
|
| 41 |
+
# Cost tracking from usage data (pricing as of Feb 2026 https://openai.com/api/pricing/)
|
| 42 |
+
AUDIO_INPUT_COST_PER_1M = 32.0
|
| 43 |
+
AUDIO_OUTPUT_COST_PER_1M = 64.0
|
| 44 |
+
TEXT_INPUT_COST_PER_1M = 4.0
|
| 45 |
+
TEXT_OUTPUT_COST_PER_1M = 16.0
|
| 46 |
+
IMAGE_INPUT_COST_PER_1M = 5.0
|
| 47 |
+
|
| 48 |
+
_RESPONSE_DONE_TIMEOUT: Final[float] = 30.0
|
| 49 |
+
|
| 50 |
+
# How often to rebuild the STT bias prompt + push it to the realtime session
|
| 51 |
+
# so calendar additions made AFTER the session connected (visitors added on
|
| 52 |
+
# the fly) still benefit from name-biased transcription. 300s aligns with
|
| 53 |
+
# the iCal cache TTL so each refresh either reuses the cache or triggers
|
| 54 |
+
# at most one fetch.
|
| 55 |
+
_STT_BIAS_REFRESH_SECONDS: Final[float] = 300.0
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def _format_bias_prompt(names: list[str]) -> str:
|
| 59 |
+
seen: set[str] = set()
|
| 60 |
+
unique: list[str] = []
|
| 61 |
+
for n in names:
|
| 62 |
+
key = n.lower()
|
| 63 |
+
if key in seen:
|
| 64 |
+
continue
|
| 65 |
+
seen.add(key)
|
| 66 |
+
unique.append(n)
|
| 67 |
+
|
| 68 |
+
if not unique:
|
| 69 |
+
return (
|
| 70 |
+
"Reception lobby check-in conversation. Visitor names and host names."
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
# Plain comma-separated bias list. We tried beefing this up with
|
| 74 |
+
# "I am <name>. Here to see <name>." sentences per name to increase
|
| 75 |
+
# the bias signal — but gpt-4o-transcribe started ECHOING those
|
| 76 |
+
# sentences back as the user transcript when audio was unclear,
|
| 77 |
+
# so real visitor speech ("I am David, here to see Andrew") was
|
| 78 |
+
# being mistranscribed as "I am David. Here to see David." (a
|
| 79 |
+
# name from the prompt copied to both slots). Going back to the
|
| 80 |
+
# simple list keeps the bias direction without the echo failure.
|
| 81 |
+
#
|
| 82 |
+
# OpenAI's Realtime API caps the transcription prompt at 1024 chars.
|
| 83 |
+
_MAX_PROMPT_CHARS = 1000
|
| 84 |
+
body = ", ".join(unique)
|
| 85 |
+
msg = (
|
| 86 |
+
"Reception lobby check-in. Expected visitor and host names include: "
|
| 87 |
+
+ body
|
| 88 |
+
+ "."
|
| 89 |
+
)
|
| 90 |
+
if len(msg) > _MAX_PROMPT_CHARS:
|
| 91 |
+
msg = msg[: _MAX_PROMPT_CHARS - 1] + "."
|
| 92 |
+
return msg
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def _collect_employee_names() -> list[str]:
|
| 96 |
+
names: list[str] = []
|
| 97 |
+
try:
|
| 98 |
+
from reachy_mini_receptionist import employees
|
| 99 |
+
for emp in employees.get_all_employees():
|
| 100 |
+
n = (emp.get("name") if isinstance(emp, dict) else getattr(emp, "name", "")) or ""
|
| 101 |
+
n = n.strip()
|
| 102 |
+
if n:
|
| 103 |
+
names.append(n)
|
| 104 |
+
aliases = (
|
| 105 |
+
emp.get("aliases") if isinstance(emp, dict) else getattr(emp, "aliases", None)
|
| 106 |
+
) or []
|
| 107 |
+
for alias in aliases:
|
| 108 |
+
a = (alias or "").strip()
|
| 109 |
+
if a:
|
| 110 |
+
names.append(a)
|
| 111 |
+
except Exception:
|
| 112 |
+
pass
|
| 113 |
+
return names
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def _collect_appointment_names_sync() -> list[str]:
|
| 117 |
+
# See _collect_appointment_names_async — calendar visitor names are
|
| 118 |
+
# intentionally excluded from STT bias to stop the model defaulting
|
| 119 |
+
# every short utterance to the next scheduled visitor.
|
| 120 |
+
return []
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
async def _collect_appointment_names_async() -> list[str]:
|
| 124 |
+
# Calendar visitor names are intentionally NOT included in the STT
|
| 125 |
+
# bias prompt. We observed STT picking "Henry" (the next calendar
|
| 126 |
+
# entry) for any short utterance from a visitor, because the bias
|
| 127 |
+
# heavily prefers names that appear in the prompt. The employee
|
| 128 |
+
# directory provides enough name coverage for hosts; visitor names
|
| 129 |
+
# have to be heard fresh from speech.
|
| 130 |
+
return []
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
def _build_transcription_bias_prompt() -> str:
|
| 134 |
+
"""Sync variant — kept for callers outside an event loop. Blocks on iCal
|
| 135 |
+
HTTP. Prefer ``_build_transcription_bias_prompt_async`` in async paths.
|
| 136 |
+
"""
|
| 137 |
+
return _format_bias_prompt(_collect_appointment_names_sync() + _collect_employee_names())
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
async def _build_transcription_bias_prompt_async() -> str:
|
| 141 |
+
"""Assemble the STT bias prompt without blocking the event loop on iCal.
|
| 142 |
+
|
| 143 |
+
OpenAI's transcription API accepts a free-form ``prompt`` string. When
|
| 144 |
+
the recognizer hears mumbled audio it leans toward words/phrases in this
|
| 145 |
+
prompt. Feeding today's calendar visitors + the employee directory
|
| 146 |
+
makes non-English names (Mukul, Krishna, Shyam, etc.) far more likely
|
| 147 |
+
to come back correct instead of collapsing to "Michael"/"Christina".
|
| 148 |
+
|
| 149 |
+
Fails open — if a source raises, the generic prompt is returned.
|
| 150 |
+
"""
|
| 151 |
+
return _format_bias_prompt(
|
| 152 |
+
await _collect_appointment_names_async() + _collect_employee_names()
|
| 153 |
+
)
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
def _compute_response_cost(usage: Any) -> float:
|
| 157 |
+
"""Compute dollar cost from a response usage object."""
|
| 158 |
+
inp = getattr(usage, "input_token_details", None)
|
| 159 |
+
out = getattr(usage, "output_token_details", None)
|
| 160 |
+
cost = 0.0
|
| 161 |
+
if inp:
|
| 162 |
+
cost += (getattr(inp, "audio_tokens", 0) or 0) * AUDIO_INPUT_COST_PER_1M / 1e6
|
| 163 |
+
cost += (getattr(inp, "text_tokens", 0) or 0) * TEXT_INPUT_COST_PER_1M / 1e6
|
| 164 |
+
cost += (getattr(inp, "image_tokens", 0) or 0) * IMAGE_INPUT_COST_PER_1M / 1e6
|
| 165 |
+
if out:
|
| 166 |
+
cost += (getattr(out, "audio_tokens", 0) or 0) * AUDIO_OUTPUT_COST_PER_1M / 1e6
|
| 167 |
+
cost += (getattr(out, "text_tokens", 0) or 0) * TEXT_OUTPUT_COST_PER_1M / 1e6
|
| 168 |
+
return cost
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
class OpenaiRealtimeHandler(AsyncStreamHandler):
|
| 172 |
+
"""An OpenAI realtime handler for fastrtc Stream."""
|
| 173 |
+
|
| 174 |
+
def __init__(
|
| 175 |
+
self,
|
| 176 |
+
deps: ToolDependencies,
|
| 177 |
+
gradio_mode: bool = False,
|
| 178 |
+
instance_path: Optional[str] = None,
|
| 179 |
+
session_manager: Any | None = None,
|
| 180 |
+
controller: Any | None = None,
|
| 181 |
+
):
|
| 182 |
+
"""Initialize the handler.
|
| 183 |
+
|
| 184 |
+
``session_manager`` and ``controller`` are optional so existing test
|
| 185 |
+
harnesses that construct the handler directly keep working.
|
| 186 |
+
"""
|
| 187 |
+
super().__init__(
|
| 188 |
+
expected_layout="mono",
|
| 189 |
+
output_sample_rate=OPEN_AI_OUTPUT_SAMPLE_RATE,
|
| 190 |
+
input_sample_rate=OPEN_AI_INPUT_SAMPLE_RATE,
|
| 191 |
+
)
|
| 192 |
+
|
| 193 |
+
# Override typing of the sample rates to match OpenAI's requirements
|
| 194 |
+
self.output_sample_rate: Literal[24000] = self.output_sample_rate
|
| 195 |
+
self.input_sample_rate: Literal[24000] = self.input_sample_rate
|
| 196 |
+
|
| 197 |
+
self.deps = deps
|
| 198 |
+
self._session_manager = session_manager
|
| 199 |
+
self._controller = controller
|
| 200 |
+
|
| 201 |
+
# Override type annotations for OpenAI strict typing (only for values used in API)
|
| 202 |
+
self.output_sample_rate = OPEN_AI_OUTPUT_SAMPLE_RATE
|
| 203 |
+
self.input_sample_rate = OPEN_AI_INPUT_SAMPLE_RATE
|
| 204 |
+
|
| 205 |
+
self.connection: Any = None
|
| 206 |
+
self.output_queue: "asyncio.Queue[Tuple[int, NDArray[np.int16]] | AdditionalOutputs]" = asyncio.Queue()
|
| 207 |
+
|
| 208 |
+
self.last_activity_time = asyncio.get_event_loop().time()
|
| 209 |
+
self.start_time = asyncio.get_event_loop().time()
|
| 210 |
+
self.is_idle_tool_call = False
|
| 211 |
+
self.gradio_mode = gradio_mode
|
| 212 |
+
self.instance_path = instance_path
|
| 213 |
+
# Track how the API key was provided (env vs textbox) and its value
|
| 214 |
+
self._key_source: Literal["env", "textbox"] = "env"
|
| 215 |
+
self._provided_api_key: str | None = None
|
| 216 |
+
|
| 217 |
+
# Debouncing for partial transcripts
|
| 218 |
+
self.partial_transcript_task: asyncio.Task[None] | None = None
|
| 219 |
+
self.partial_transcript_sequence: int = 0 # sequence counter to prevent stale emissions
|
| 220 |
+
self.partial_debounce_delay = 0.5 # seconds
|
| 221 |
+
|
| 222 |
+
# Internal lifecycle flags
|
| 223 |
+
self._shutdown_requested: bool = False
|
| 224 |
+
self._connected_event: asyncio.Event = asyncio.Event()
|
| 225 |
+
|
| 226 |
+
# Background tool manager
|
| 227 |
+
self.tool_manager = BackgroundToolManager()
|
| 228 |
+
|
| 229 |
+
# Cost tracking
|
| 230 |
+
self.cumulative_cost: float = 0.0
|
| 231 |
+
|
| 232 |
+
# Response-in-progress guard: the Realtime API only allows one active
|
| 233 |
+
# response per conversation at a time. A dedicated worker task
|
| 234 |
+
# (_response_sender_loop) dequeues and sends one request at a time
|
| 235 |
+
self._pending_responses: asyncio.Queue[dict[str, Any]] = asyncio.Queue()
|
| 236 |
+
self._response_done_event: asyncio.Event = asyncio.Event()
|
| 237 |
+
self._response_done_event.set()
|
| 238 |
+
self._last_response_rejected: bool = False
|
| 239 |
+
self._runtime_loop: asyncio.AbstractEventLoop | None = None
|
| 240 |
+
|
| 241 |
+
# Last successfully pushed external face context event.
|
| 242 |
+
self._face_event_lock = threading.Lock()
|
| 243 |
+
self._last_face_event_sent: dict[str, Any] | None = None
|
| 244 |
+
# Last pending external face event waiting for runtime loop/connection.
|
| 245 |
+
self._pending_face_event_lock = threading.Lock()
|
| 246 |
+
self._pending_face_event: dict[str, Any] | None = None
|
| 247 |
+
|
| 248 |
+
# Tool args by call_id, populated when a tool starts and consumed
|
| 249 |
+
# when its result arrives (so the ConversationController gets both
|
| 250 |
+
# the args and the result).
|
| 251 |
+
self._tool_call_args: dict[str, dict[str, Any]] = {}
|
| 252 |
+
|
| 253 |
+
# Last successfully pushed session context event + pending buffer.
|
| 254 |
+
self._session_event_lock = threading.Lock()
|
| 255 |
+
self._last_session_event_sent: dict[str, Any] | None = None
|
| 256 |
+
self._pending_session_event_lock = threading.Lock()
|
| 257 |
+
self._pending_session_event: dict[str, Any] | None = None
|
| 258 |
+
|
| 259 |
+
# One-shot guard so the IDLE-state workflow hint is only pushed
|
| 260 |
+
# once per IDLE stretch, not on every speech_started event.
|
| 261 |
+
self._idle_speech_cue_pushed: bool = False
|
| 262 |
+
|
| 263 |
+
# Last STT bias prompt actually sent to the realtime session, so the
|
| 264 |
+
# periodic refresh loop can skip the session.update when nothing has
|
| 265 |
+
# changed (calendar quiet, employee directory unchanged).
|
| 266 |
+
self._last_stt_bias_prompt: Optional[str] = None
|
| 267 |
+
|
| 268 |
+
def _stash_pending_face_event(self, face_event: dict[str, Any]) -> None:
|
| 269 |
+
"""Keep only the latest pending face event for eventual delivery."""
|
| 270 |
+
with self._pending_face_event_lock:
|
| 271 |
+
self._pending_face_event = dict(face_event)
|
| 272 |
+
|
| 273 |
+
def _pop_pending_face_event(self) -> dict[str, Any] | None:
|
| 274 |
+
"""Pop and clear latest pending face event."""
|
| 275 |
+
with self._pending_face_event_lock:
|
| 276 |
+
pending = self._pending_face_event
|
| 277 |
+
self._pending_face_event = None
|
| 278 |
+
return pending
|
| 279 |
+
|
| 280 |
+
async def _flush_pending_face_event(self) -> None:
|
| 281 |
+
"""Try sending one buffered face event once the session is ready."""
|
| 282 |
+
pending = self._pop_pending_face_event()
|
| 283 |
+
if pending is None:
|
| 284 |
+
return
|
| 285 |
+
try:
|
| 286 |
+
await self._push_face_context_event(pending)
|
| 287 |
+
except Exception as e:
|
| 288 |
+
logger.debug("Failed to flush pending face event: %s", e)
|
| 289 |
+
self._stash_pending_face_event(pending)
|
| 290 |
+
|
| 291 |
+
def copy(self) -> "OpenaiRealtimeHandler":
|
| 292 |
+
"""Create a copy of the handler."""
|
| 293 |
+
return OpenaiRealtimeHandler(
|
| 294 |
+
self.deps,
|
| 295 |
+
self.gradio_mode,
|
| 296 |
+
self.instance_path,
|
| 297 |
+
session_manager=self._session_manager,
|
| 298 |
+
controller=self._controller,
|
| 299 |
+
)
|
| 300 |
+
|
| 301 |
+
async def apply_personality(self, profile: str | None) -> str:
|
| 302 |
+
"""Apply a new personality (profile) at runtime if possible.
|
| 303 |
+
|
| 304 |
+
- Updates the global config's selected profile for subsequent calls.
|
| 305 |
+
- If a realtime connection is active, sends a session.update with the
|
| 306 |
+
freshly resolved instructions so the change takes effect immediately.
|
| 307 |
+
|
| 308 |
+
Returns a short status message for UI feedback.
|
| 309 |
+
"""
|
| 310 |
+
try:
|
| 311 |
+
# Update the in-process config value and env
|
| 312 |
+
from reachy_mini_receptionist.config import config as _config
|
| 313 |
+
from reachy_mini_receptionist.config import set_custom_profile
|
| 314 |
+
|
| 315 |
+
set_custom_profile(profile)
|
| 316 |
+
logger.info(
|
| 317 |
+
"Set custom profile to %r (config=%r)", profile, getattr(_config, "REACHY_MINI_CUSTOM_PROFILE", None)
|
| 318 |
+
)
|
| 319 |
+
|
| 320 |
+
try:
|
| 321 |
+
instructions = get_session_instructions()
|
| 322 |
+
voice = get_session_voice()
|
| 323 |
+
except BaseException as e: # catch SystemExit from prompt loader without crashing
|
| 324 |
+
logger.error("Failed to resolve personality content: %s", e)
|
| 325 |
+
return f"Failed to apply personality: {e}"
|
| 326 |
+
|
| 327 |
+
# Attempt a live update first, then force a full restart to ensure it sticks
|
| 328 |
+
if self.connection is not None:
|
| 329 |
+
try:
|
| 330 |
+
await self.connection.session.update(
|
| 331 |
+
session={
|
| 332 |
+
"type": "realtime",
|
| 333 |
+
"instructions": instructions,
|
| 334 |
+
"audio": {"output": {"voice": voice}},
|
| 335 |
+
},
|
| 336 |
+
)
|
| 337 |
+
logger.info("Applied personality via live update: %s", profile or "built-in default")
|
| 338 |
+
except Exception as e:
|
| 339 |
+
logger.warning("Live update failed; will restart session: %s", e)
|
| 340 |
+
|
| 341 |
+
# Force a real restart to guarantee the new instructions/voice
|
| 342 |
+
try:
|
| 343 |
+
await self._restart_session()
|
| 344 |
+
return "Applied personality and restarted realtime session."
|
| 345 |
+
except Exception as e:
|
| 346 |
+
logger.warning("Failed to restart session after apply: %s", e)
|
| 347 |
+
return "Applied personality. Will take effect on next connection."
|
| 348 |
+
else:
|
| 349 |
+
logger.info(
|
| 350 |
+
"Applied personality recorded: %s (no live connection; will apply on next session)",
|
| 351 |
+
profile or "built-in default",
|
| 352 |
+
)
|
| 353 |
+
return "Applied personality. Will take effect on next connection."
|
| 354 |
+
except Exception as e:
|
| 355 |
+
logger.error("Error applying personality '%s': %s", profile, e)
|
| 356 |
+
return f"Failed to apply personality: {e}"
|
| 357 |
+
|
| 358 |
+
async def _emit_debounced_partial(self, transcript: str, sequence: int) -> None:
|
| 359 |
+
"""Emit partial transcript after debounce delay."""
|
| 360 |
+
try:
|
| 361 |
+
await asyncio.sleep(self.partial_debounce_delay)
|
| 362 |
+
# Only emit if this is still the latest partial (by sequence number)
|
| 363 |
+
if self.partial_transcript_sequence == sequence:
|
| 364 |
+
await self.output_queue.put(AdditionalOutputs({"role": "user_partial", "content": transcript}))
|
| 365 |
+
logger.debug(f"Debounced partial emitted: {transcript}")
|
| 366 |
+
except asyncio.CancelledError:
|
| 367 |
+
logger.debug("Debounced partial cancelled")
|
| 368 |
+
raise
|
| 369 |
+
|
| 370 |
+
async def start_up(self) -> None:
|
| 371 |
+
"""Start the handler with minimal retries on unexpected websocket closure."""
|
| 372 |
+
self._runtime_loop = asyncio.get_running_loop()
|
| 373 |
+
openai_api_key = config.OPENAI_API_KEY
|
| 374 |
+
if self.gradio_mode and not openai_api_key:
|
| 375 |
+
# api key was not found in .env or in the environment variables
|
| 376 |
+
await self.wait_for_args() # type: ignore[no-untyped-call]
|
| 377 |
+
args = list(self.latest_args)
|
| 378 |
+
textbox_api_key = args[3] if len(args[3]) > 0 else None
|
| 379 |
+
if textbox_api_key is not None:
|
| 380 |
+
openai_api_key = textbox_api_key
|
| 381 |
+
self._key_source = "textbox"
|
| 382 |
+
self._provided_api_key = textbox_api_key
|
| 383 |
+
else:
|
| 384 |
+
openai_api_key = config.OPENAI_API_KEY
|
| 385 |
+
else:
|
| 386 |
+
if not openai_api_key or not openai_api_key.strip():
|
| 387 |
+
# In headless console mode, LocalStream now blocks startup until the key is provided.
|
| 388 |
+
# However, unit tests may invoke this handler directly with a stubbed client.
|
| 389 |
+
# To keep tests hermetic without requiring a real key, fall back to a placeholder.
|
| 390 |
+
logger.warning("OPENAI_API_KEY missing. Proceeding with a placeholder (tests/offline).")
|
| 391 |
+
openai_api_key = "DUMMY"
|
| 392 |
+
|
| 393 |
+
self.client = AsyncOpenAI(api_key=openai_api_key)
|
| 394 |
+
|
| 395 |
+
max_attempts = 3
|
| 396 |
+
for attempt in range(1, max_attempts + 1):
|
| 397 |
+
try:
|
| 398 |
+
await self._run_realtime_session()
|
| 399 |
+
# Normal exit from the session, stop retrying
|
| 400 |
+
return
|
| 401 |
+
except ConnectionClosedError as e:
|
| 402 |
+
# Abrupt close (e.g., "no close frame received or sent") → retry
|
| 403 |
+
logger.warning("Realtime websocket closed unexpectedly (attempt %d/%d): %s", attempt, max_attempts, e)
|
| 404 |
+
if attempt < max_attempts:
|
| 405 |
+
# exponential backoff with jitter
|
| 406 |
+
base_delay = 2 ** (attempt - 1) # 1s, 2s, 4s, 8s, etc.
|
| 407 |
+
jitter = random.uniform(0, 0.5)
|
| 408 |
+
delay = base_delay + jitter
|
| 409 |
+
logger.info("Retrying in %.1f seconds...", delay)
|
| 410 |
+
await asyncio.sleep(delay)
|
| 411 |
+
continue
|
| 412 |
+
raise
|
| 413 |
+
finally:
|
| 414 |
+
# never keep a stale reference
|
| 415 |
+
self.connection = None
|
| 416 |
+
try:
|
| 417 |
+
self._connected_event.clear()
|
| 418 |
+
except Exception:
|
| 419 |
+
pass
|
| 420 |
+
|
| 421 |
+
def notify_external_face_event(self, face_event: dict[str, Any]) -> None:
|
| 422 |
+
"""Thread-safe entrypoint for face worker state transition events.
|
| 423 |
+
|
| 424 |
+
This injects context into the conversation via conversation.item.create
|
| 425 |
+
without forcing a response.
|
| 426 |
+
"""
|
| 427 |
+
loop = self._runtime_loop
|
| 428 |
+
if loop is None or loop.is_closed():
|
| 429 |
+
logger.debug("Deferring face event (runtime loop not ready): %s", face_event)
|
| 430 |
+
self._stash_pending_face_event(face_event)
|
| 431 |
+
return
|
| 432 |
+
|
| 433 |
+
try:
|
| 434 |
+
future = asyncio.run_coroutine_threadsafe(self._push_face_context_event(face_event), loop)
|
| 435 |
+
|
| 436 |
+
def _on_done(fut: "asyncio.Future[None]") -> None:
|
| 437 |
+
try:
|
| 438 |
+
fut.result()
|
| 439 |
+
except Exception as e:
|
| 440 |
+
logger.debug("Face context push failed: %s", e)
|
| 441 |
+
self._stash_pending_face_event(face_event)
|
| 442 |
+
|
| 443 |
+
future.add_done_callback(_on_done)
|
| 444 |
+
except Exception as e:
|
| 445 |
+
logger.debug("Failed to schedule face context event: %s", e)
|
| 446 |
+
self._stash_pending_face_event(face_event)
|
| 447 |
+
|
| 448 |
+
async def _push_face_context_event(self, face_event: dict[str, Any]) -> None:
|
| 449 |
+
"""Push a face state change as external context without triggering a response."""
|
| 450 |
+
if not self.connection:
|
| 451 |
+
logger.debug("Deferring face context event (no connection): %s", face_event)
|
| 452 |
+
self._stash_pending_face_event(face_event)
|
| 453 |
+
return
|
| 454 |
+
|
| 455 |
+
state = str(face_event.get("state", "unknown"))
|
| 456 |
+
name = face_event.get("name")
|
| 457 |
+
previous_state = str(face_event.get("previous_state", "unknown"))
|
| 458 |
+
previous_name = face_event.get("previous_name")
|
| 459 |
+
lbph_conf = face_event.get("lbph_confidence", 0.0)
|
| 460 |
+
det_conf = face_event.get("detection_confidence", 0.0)
|
| 461 |
+
|
| 462 |
+
msg = (
|
| 463 |
+
f"[External face update {self.format_timestamp()}] "
|
| 464 |
+
f"state={state}; name={name}; previous_state={previous_state}; "
|
| 465 |
+
f"previous_name={previous_name}; lbph={lbph_conf}; det={det_conf}. "
|
| 466 |
+
"This is context only. Do not respond unless the user speaks."
|
| 467 |
+
)
|
| 468 |
+
|
| 469 |
+
await self.connection.conversation.item.create(
|
| 470 |
+
item={
|
| 471 |
+
"type": "message",
|
| 472 |
+
"role": "user",
|
| 473 |
+
"content": [{"type": "input_text", "text": msg}],
|
| 474 |
+
},
|
| 475 |
+
)
|
| 476 |
+
|
| 477 |
+
sent_at_epoch = time.time()
|
| 478 |
+
sent_payload = {
|
| 479 |
+
"state": state,
|
| 480 |
+
"name": name,
|
| 481 |
+
"previous_state": previous_state,
|
| 482 |
+
"previous_name": previous_name,
|
| 483 |
+
"lbph_confidence": float(lbph_conf),
|
| 484 |
+
"detection_confidence": float(det_conf),
|
| 485 |
+
"sent_at": sent_at_epoch,
|
| 486 |
+
"sent_at_iso": datetime.fromtimestamp(sent_at_epoch).strftime("%Y-%m-%d %H:%M:%S"),
|
| 487 |
+
}
|
| 488 |
+
with self._face_event_lock:
|
| 489 |
+
self._last_face_event_sent = sent_payload
|
| 490 |
+
|
| 491 |
+
logger.info("Pushed external face context event: %s", msg)
|
| 492 |
+
|
| 493 |
+
async def _prime_no_face_context(self) -> None:
|
| 494 |
+
"""Prime a fresh session with an explicit no-face context event."""
|
| 495 |
+
try:
|
| 496 |
+
await self._push_face_context_event(
|
| 497 |
+
{
|
| 498 |
+
"event": "face_state_changed",
|
| 499 |
+
"state": "no_face",
|
| 500 |
+
"name": None,
|
| 501 |
+
"previous_state": "unknown",
|
| 502 |
+
"previous_name": None,
|
| 503 |
+
"lbph_confidence": 0.0,
|
| 504 |
+
"detection_confidence": 0.0,
|
| 505 |
+
"timestamp": time.time(),
|
| 506 |
+
}
|
| 507 |
+
)
|
| 508 |
+
logger.info("Primed startup face context: state=no_face")
|
| 509 |
+
except Exception as e:
|
| 510 |
+
logger.debug("Failed to prime startup no-face context: %s", e)
|
| 511 |
+
|
| 512 |
+
def get_last_face_event_sent(self) -> dict[str, Any] | None:
|
| 513 |
+
"""Return the last face context event that was successfully sent to the model."""
|
| 514 |
+
with self._face_event_lock:
|
| 515 |
+
if self._last_face_event_sent is None:
|
| 516 |
+
return None
|
| 517 |
+
return dict(self._last_face_event_sent)
|
| 518 |
+
|
| 519 |
+
async def _push_idle_speech_cue_if_needed(self) -> None:
|
| 520 |
+
"""Push the IDLE-state workflow hint when a visitor speaks first.
|
| 521 |
+
|
| 522 |
+
Per-state hints normally arrive via ``notify_session_event`` on
|
| 523 |
+
transitions. While state stays IDLE no transition fires, so the
|
| 524 |
+
LLM has no guidance when the camera hasn't caught the visitor's
|
| 525 |
+
face yet. We rate-limit to once per IDLE stretch so we don't
|
| 526 |
+
push the same cue on every breath.
|
| 527 |
+
"""
|
| 528 |
+
if not self.connection:
|
| 529 |
+
return
|
| 530 |
+
if self._session_manager is None:
|
| 531 |
+
return
|
| 532 |
+
try:
|
| 533 |
+
from reachy_mini_receptionist.receptionist_state import ReceptionState
|
| 534 |
+
from reachy_mini_receptionist.conversation_controller import next_action_hint
|
| 535 |
+
current = self._session_manager.current_state
|
| 536 |
+
if current != ReceptionState.IDLE:
|
| 537 |
+
return
|
| 538 |
+
hint = next_action_hint(current)
|
| 539 |
+
if not hint:
|
| 540 |
+
return
|
| 541 |
+
if getattr(self, "_idle_speech_cue_pushed", False):
|
| 542 |
+
return
|
| 543 |
+
msg = (
|
| 544 |
+
f"[Backend idle-speech cue {self.format_timestamp()}] "
|
| 545 |
+
f"Visitor just started speaking while state=idle. {hint}"
|
| 546 |
+
)
|
| 547 |
+
await self.connection.conversation.item.create(
|
| 548 |
+
item={
|
| 549 |
+
"type": "message",
|
| 550 |
+
"role": "user",
|
| 551 |
+
"content": [{"type": "input_text", "text": msg}],
|
| 552 |
+
},
|
| 553 |
+
)
|
| 554 |
+
self._idle_speech_cue_pushed = True
|
| 555 |
+
logger.info("Pushed idle-speech cue: %s", msg)
|
| 556 |
+
except Exception as e:
|
| 557 |
+
logger.debug("idle-speech cue push error: %s", e)
|
| 558 |
+
|
| 559 |
+
# ------------------------------------------------------------------
|
| 560 |
+
# Session context events (mirror of the face-event channel)
|
| 561 |
+
# ------------------------------------------------------------------
|
| 562 |
+
|
| 563 |
+
def _stash_pending_session_event(self, payload: dict[str, Any]) -> None:
|
| 564 |
+
with self._pending_session_event_lock:
|
| 565 |
+
self._pending_session_event = dict(payload)
|
| 566 |
+
|
| 567 |
+
def _pop_pending_session_event(self) -> dict[str, Any] | None:
|
| 568 |
+
with self._pending_session_event_lock:
|
| 569 |
+
pending = self._pending_session_event
|
| 570 |
+
self._pending_session_event = None
|
| 571 |
+
return pending
|
| 572 |
+
|
| 573 |
+
async def _flush_pending_session_event(self) -> None:
|
| 574 |
+
pending = self._pop_pending_session_event()
|
| 575 |
+
if pending is None:
|
| 576 |
+
return
|
| 577 |
+
try:
|
| 578 |
+
await self._push_session_context_event(pending)
|
| 579 |
+
except Exception as e:
|
| 580 |
+
logger.debug("Failed to flush pending session event: %s", e)
|
| 581 |
+
self._stash_pending_session_event(pending)
|
| 582 |
+
|
| 583 |
+
def notify_session_event(
|
| 584 |
+
self,
|
| 585 |
+
previous_state: Any,
|
| 586 |
+
new_state: Any,
|
| 587 |
+
snapshot: Any,
|
| 588 |
+
) -> None:
|
| 589 |
+
"""Subscriber callback for SessionManager — thread-safe.
|
| 590 |
+
|
| 591 |
+
Schedules a coroutine on the runtime loop that pushes the session
|
| 592 |
+
state change to the LLM as a context-only conversation item.
|
| 593 |
+
"""
|
| 594 |
+
try:
|
| 595 |
+
payload = {
|
| 596 |
+
"previous_state": getattr(previous_state, "value", str(previous_state)),
|
| 597 |
+
"new_state": getattr(new_state, "value", str(new_state)),
|
| 598 |
+
"snapshot": snapshot.to_dict() if hasattr(snapshot, "to_dict") else {},
|
| 599 |
+
}
|
| 600 |
+
except Exception as e:
|
| 601 |
+
logger.debug("notify_session_event: failed to build payload: %s", e)
|
| 602 |
+
return
|
| 603 |
+
|
| 604 |
+
# Reset the idle-speech cue flag whenever the session transitions
|
| 605 |
+
# back to IDLE (visitor walked away, session reset, timeout). The
|
| 606 |
+
# next visitor's first utterance will re-push the cue.
|
| 607 |
+
try:
|
| 608 |
+
new_state_value = payload["new_state"]
|
| 609 |
+
if new_state_value == "idle":
|
| 610 |
+
self._idle_speech_cue_pushed = False
|
| 611 |
+
except Exception:
|
| 612 |
+
pass
|
| 613 |
+
|
| 614 |
+
loop = self._runtime_loop
|
| 615 |
+
if loop is None or loop.is_closed():
|
| 616 |
+
logger.debug("Deferring session event (runtime loop not ready): %s", payload)
|
| 617 |
+
self._stash_pending_session_event(payload)
|
| 618 |
+
return
|
| 619 |
+
|
| 620 |
+
try:
|
| 621 |
+
future = asyncio.run_coroutine_threadsafe(
|
| 622 |
+
self._push_session_context_event(payload), loop
|
| 623 |
+
)
|
| 624 |
+
|
| 625 |
+
def _on_done(fut: "asyncio.Future[None]") -> None:
|
| 626 |
+
try:
|
| 627 |
+
fut.result()
|
| 628 |
+
except Exception as e:
|
| 629 |
+
logger.debug("Session context push failed: %s", e)
|
| 630 |
+
self._stash_pending_session_event(payload)
|
| 631 |
+
|
| 632 |
+
future.add_done_callback(_on_done)
|
| 633 |
+
except Exception as e:
|
| 634 |
+
logger.debug("Failed to schedule session context event: %s", e)
|
| 635 |
+
self._stash_pending_session_event(payload)
|
| 636 |
+
|
| 637 |
+
async def _push_session_context_event(self, payload: dict[str, Any]) -> None:
|
| 638 |
+
"""Push a session state change as context-only conversation item.
|
| 639 |
+
|
| 640 |
+
Includes a per-state ``Next:`` directive (from
|
| 641 |
+
``conversation_controller.next_action_hint``) so the LLM knows what
|
| 642 |
+
to do when the visitor next speaks — without that workflow being
|
| 643 |
+
baked into the system prompt.
|
| 644 |
+
"""
|
| 645 |
+
if not self.connection:
|
| 646 |
+
logger.debug("Deferring session context event (no connection): %s", payload)
|
| 647 |
+
self._stash_pending_session_event(payload)
|
| 648 |
+
return
|
| 649 |
+
|
| 650 |
+
snap = payload.get("snapshot") or {}
|
| 651 |
+
new_state_value = payload.get("new_state")
|
| 652 |
+
|
| 653 |
+
hint = ""
|
| 654 |
+
speak_now = False
|
| 655 |
+
try:
|
| 656 |
+
from reachy_mini_receptionist.conversation_controller import (
|
| 657 |
+
next_action_hint,
|
| 658 |
+
should_speak_immediately,
|
| 659 |
+
)
|
| 660 |
+
from reachy_mini_receptionist.receptionist_state import ReceptionState
|
| 661 |
+
if new_state_value:
|
| 662 |
+
new_state_enum = ReceptionState(new_state_value)
|
| 663 |
+
hint = next_action_hint(new_state_enum)
|
| 664 |
+
speak_now = should_speak_immediately(new_state_enum)
|
| 665 |
+
except Exception as e:
|
| 666 |
+
logger.debug("Could not compute next_action_hint: %s", e)
|
| 667 |
+
|
| 668 |
+
base = (
|
| 669 |
+
f"[Backend session update {self.format_timestamp()}] "
|
| 670 |
+
f"state: {payload.get('previous_state')} -> {new_state_value}; "
|
| 671 |
+
f"visitor={snap.get('visitor_name')}; "
|
| 672 |
+
f"employee={snap.get('employee_name')}; "
|
| 673 |
+
f"appointment={(snap.get('matched_appointment') or {}).get('time')}; "
|
| 674 |
+
f"email_sent_to={snap.get('email_sent_to')}."
|
| 675 |
+
)
|
| 676 |
+
|
| 677 |
+
if hint and speak_now:
|
| 678 |
+
# The visitor is waiting for the bot to finish a sequence it
|
| 679 |
+
# started. Tell the LLM to speak now — no "stay quiet" suffix.
|
| 680 |
+
msg = f"{base} SPEAK NOW: {hint}"
|
| 681 |
+
elif hint:
|
| 682 |
+
# State change happened passively (face event, etc.) — the bot
|
| 683 |
+
# should not blurt anything; act on the hint when the visitor
|
| 684 |
+
# next speaks.
|
| 685 |
+
msg = (
|
| 686 |
+
f"{base} Next: {hint} "
|
| 687 |
+
"(Stay quiet until the visitor speaks; this is context only.)"
|
| 688 |
+
)
|
| 689 |
+
else:
|
| 690 |
+
msg = f"{base} This is context only. Do not respond unless the user speaks."
|
| 691 |
+
|
| 692 |
+
await self.connection.conversation.item.create(
|
| 693 |
+
item={
|
| 694 |
+
"type": "message",
|
| 695 |
+
"role": "user",
|
| 696 |
+
"content": [{"type": "input_text", "text": msg}],
|
| 697 |
+
},
|
| 698 |
+
)
|
| 699 |
+
|
| 700 |
+
sent_payload = {**payload, "sent_at": time.time(), "hint": hint}
|
| 701 |
+
with self._session_event_lock:
|
| 702 |
+
self._last_session_event_sent = sent_payload
|
| 703 |
+
|
| 704 |
+
logger.info("Pushed session context event: %s", msg)
|
| 705 |
+
|
| 706 |
+
# For speak-now transitions, actively trigger a response so the
|
| 707 |
+
# LLM speaks immediately. Face-driven transitions to RECOGNIZED
|
| 708 |
+
# have no in-flight response cycle to piggyback on, and even
|
| 709 |
+
# tool-driven transitions previously stalled when the SPEAK NOW
|
| 710 |
+
# cue conflicted with the older "stay quiet" suffix. The sender
|
| 711 |
+
# worker serializes any duplicate response.create with the
|
| 712 |
+
# tool-result handler's call.
|
| 713 |
+
if speak_now:
|
| 714 |
+
try:
|
| 715 |
+
await self._safe_response_create(
|
| 716 |
+
response={
|
| 717 |
+
"instructions": (
|
| 718 |
+
"Use the latest [Backend session update] context "
|
| 719 |
+
"and speak to the visitor now. Keep it concise."
|
| 720 |
+
),
|
| 721 |
+
},
|
| 722 |
+
)
|
| 723 |
+
except Exception as e:
|
| 724 |
+
logger.debug("Failed to queue speak-now response.create: %s", e)
|
| 725 |
+
|
| 726 |
+
def get_last_session_event_sent(self) -> dict[str, Any] | None:
|
| 727 |
+
"""Return the last session context event sent to the model."""
|
| 728 |
+
with self._session_event_lock:
|
| 729 |
+
if self._last_session_event_sent is None:
|
| 730 |
+
return None
|
| 731 |
+
return dict(self._last_session_event_sent)
|
| 732 |
+
|
| 733 |
+
async def _restart_session(self) -> None:
|
| 734 |
+
"""Force-close the current session and start a fresh one in background.
|
| 735 |
+
|
| 736 |
+
Does not block the caller while the new session is establishing.
|
| 737 |
+
"""
|
| 738 |
+
try:
|
| 739 |
+
if self.connection is not None:
|
| 740 |
+
try:
|
| 741 |
+
await self.connection.close()
|
| 742 |
+
except Exception:
|
| 743 |
+
pass
|
| 744 |
+
finally:
|
| 745 |
+
self.connection = None
|
| 746 |
+
|
| 747 |
+
# Ensure we have a client (start_up must have run once)
|
| 748 |
+
if getattr(self, "client", None) is None:
|
| 749 |
+
logger.warning("Cannot restart: OpenAI client not initialized yet.")
|
| 750 |
+
return
|
| 751 |
+
|
| 752 |
+
# Fire-and-forget new session and wait briefly for connection
|
| 753 |
+
try:
|
| 754 |
+
self._connected_event.clear()
|
| 755 |
+
except Exception:
|
| 756 |
+
pass
|
| 757 |
+
asyncio.create_task(self._run_realtime_session(), name="openai-realtime-restart")
|
| 758 |
+
try:
|
| 759 |
+
await asyncio.wait_for(self._connected_event.wait(), timeout=5.0)
|
| 760 |
+
logger.info("Realtime session restarted and connected.")
|
| 761 |
+
except asyncio.TimeoutError:
|
| 762 |
+
logger.warning("Realtime session restart timed out; continuing in background.")
|
| 763 |
+
except Exception as e:
|
| 764 |
+
logger.warning("_restart_session failed: %s", e)
|
| 765 |
+
|
| 766 |
+
async def _safe_response_create(self, **kwargs: Any) -> None:
|
| 767 |
+
"""Enqueue a response.create() kwargs for the sender worker _response_sender_loop().
|
| 768 |
+
|
| 769 |
+
This method never blocks the caller.
|
| 770 |
+
"""
|
| 771 |
+
await self._pending_responses.put(kwargs)
|
| 772 |
+
|
| 773 |
+
async def _stt_bias_refresh_loop(self) -> None:
|
| 774 |
+
"""DISABLED for the demo cut. The periodic session.update was
|
| 775 |
+
suspected of breaking server VAD mid-session (yes/no replies
|
| 776 |
+
stopped being captured). Re-enable after the demo with more
|
| 777 |
+
testing.
|
| 778 |
+
"""
|
| 779 |
+
return
|
| 780 |
+
try: # noqa: unreachable until re-enabled
|
| 781 |
+
while self.connection:
|
| 782 |
+
try:
|
| 783 |
+
await asyncio.sleep(_STT_BIAS_REFRESH_SECONDS)
|
| 784 |
+
except asyncio.CancelledError:
|
| 785 |
+
return
|
| 786 |
+
|
| 787 |
+
if not self.connection:
|
| 788 |
+
return
|
| 789 |
+
|
| 790 |
+
try:
|
| 791 |
+
prompt = await _build_transcription_bias_prompt_async()
|
| 792 |
+
except Exception as e:
|
| 793 |
+
logger.debug("STT bias refresh: build failed: %s", e)
|
| 794 |
+
continue
|
| 795 |
+
|
| 796 |
+
if prompt == self._last_stt_bias_prompt:
|
| 797 |
+
continue
|
| 798 |
+
|
| 799 |
+
try:
|
| 800 |
+
# Re-send both transcription AND turn_detection together.
|
| 801 |
+
# The realtime API treats nested updates as full
|
| 802 |
+
# replacement of the parent object on some server
|
| 803 |
+
# versions — sending only `transcription` could reset
|
| 804 |
+
# `turn_detection` back to stock defaults (threshold
|
| 805 |
+
# 0.5 / silence 500ms) and silently change VAD behavior
|
| 806 |
+
# mid-conversation. Pinning both keeps VAD consistent.
|
| 807 |
+
await self.connection.session.update(
|
| 808 |
+
session={
|
| 809 |
+
"type": "realtime",
|
| 810 |
+
"audio": {
|
| 811 |
+
"input": {
|
| 812 |
+
"transcription": {
|
| 813 |
+
"model": "gpt-4o-transcribe",
|
| 814 |
+
"language": "en",
|
| 815 |
+
"prompt": prompt,
|
| 816 |
+
},
|
| 817 |
+
"turn_detection": {
|
| 818 |
+
"type": "server_vad",
|
| 819 |
+
"threshold": 0.5,
|
| 820 |
+
"silence_duration_ms": 600,
|
| 821 |
+
"prefix_padding_ms": 300,
|
| 822 |
+
"interrupt_response": True,
|
| 823 |
+
"create_response": True,
|
| 824 |
+
},
|
| 825 |
+
},
|
| 826 |
+
},
|
| 827 |
+
},
|
| 828 |
+
)
|
| 829 |
+
self._last_stt_bias_prompt = prompt
|
| 830 |
+
logger.info(
|
| 831 |
+
"STT bias refreshed (%d chars) — calendar/directory change detected",
|
| 832 |
+
len(prompt),
|
| 833 |
+
)
|
| 834 |
+
except Exception as e:
|
| 835 |
+
logger.debug("STT bias refresh: session.update failed: %s", e)
|
| 836 |
+
except asyncio.CancelledError:
|
| 837 |
+
return
|
| 838 |
+
|
| 839 |
+
def _just_entered_speak_now_state(self) -> bool:
|
| 840 |
+
"""True if the session is currently in a SPEAK_NOW state.
|
| 841 |
+
|
| 842 |
+
Used in ``_handle_tool_result`` to skip the generic post-tool
|
| 843 |
+
narration when a controller-driven SPEAK_NOW transition has already
|
| 844 |
+
enqueued a state-specific ``response.create``.
|
| 845 |
+
"""
|
| 846 |
+
if self._session_manager is None:
|
| 847 |
+
return False
|
| 848 |
+
try:
|
| 849 |
+
from reachy_mini_receptionist.conversation_controller import should_speak_immediately
|
| 850 |
+
return bool(should_speak_immediately(self._session_manager.current_state))
|
| 851 |
+
except Exception:
|
| 852 |
+
return False
|
| 853 |
+
|
| 854 |
+
async def _response_sender_loop(self) -> None:
|
| 855 |
+
"""Dedicated worker that sends ``response.create()`` calls serially.
|
| 856 |
+
|
| 857 |
+
This logic was designed to comply with the response.create() docstring specification for event ordering:
|
| 858 |
+
https://github.com/openai/openai-python/blob/3e0c05b84a2056870abf3bd6a5e7849020209cc3/src/openai/resources/realtime/realtime.py#L649C1-L651C30
|
| 859 |
+
|
| 860 |
+
For each queued request the worker:
|
| 861 |
+
1. Waits until no response is active (_response_done_event).
|
| 862 |
+
2. Sends response.create().
|
| 863 |
+
3. Waits for the response cycle to complete (response.done).
|
| 864 |
+
4. If the server rejected with active_response, retries from step 1.
|
| 865 |
+
"""
|
| 866 |
+
while self.connection:
|
| 867 |
+
try:
|
| 868 |
+
kwargs = await self._pending_responses.get()
|
| 869 |
+
except asyncio.CancelledError:
|
| 870 |
+
return
|
| 871 |
+
|
| 872 |
+
sent = False
|
| 873 |
+
max_retries = 5
|
| 874 |
+
attempts = 0
|
| 875 |
+
while not sent and self.connection and attempts < max_retries:
|
| 876 |
+
try:
|
| 877 |
+
await asyncio.wait_for(self._response_done_event.wait(), timeout=_RESPONSE_DONE_TIMEOUT)
|
| 878 |
+
except asyncio.TimeoutError:
|
| 879 |
+
logger.debug("Timed out waiting for previous response to finish; forcing ahead")
|
| 880 |
+
self._response_done_event.set()
|
| 881 |
+
|
| 882 |
+
if not self.connection:
|
| 883 |
+
break
|
| 884 |
+
|
| 885 |
+
self._last_response_rejected = False
|
| 886 |
+
try:
|
| 887 |
+
await self.connection.response.create(**kwargs)
|
| 888 |
+
except Exception as e:
|
| 889 |
+
logger.debug("_response_sender_loop: send failed: %s", e)
|
| 890 |
+
self._response_done_event.set()
|
| 891 |
+
break
|
| 892 |
+
|
| 893 |
+
try:
|
| 894 |
+
await asyncio.wait_for(self._response_done_event.wait(), timeout=_RESPONSE_DONE_TIMEOUT)
|
| 895 |
+
except asyncio.TimeoutError:
|
| 896 |
+
logger.debug("Timed out waiting for response.done; assuming response completed")
|
| 897 |
+
self._response_done_event.set()
|
| 898 |
+
break
|
| 899 |
+
|
| 900 |
+
# Check if we were rejected
|
| 901 |
+
if self._last_response_rejected:
|
| 902 |
+
attempts += 1
|
| 903 |
+
if attempts >= max_retries:
|
| 904 |
+
logger.debug("response.create rejected %d times; giving up", attempts)
|
| 905 |
+
break
|
| 906 |
+
logger.debug("response.create was rejected; retrying (%d/%d)", attempts, max_retries)
|
| 907 |
+
continue
|
| 908 |
+
|
| 909 |
+
sent = True
|
| 910 |
+
|
| 911 |
+
async def _handle_tool_result(self, bg_tool: ToolNotification) -> None:
|
| 912 |
+
"""Process the result of a tool call."""
|
| 913 |
+
if bg_tool.error is not None:
|
| 914 |
+
logger.error("Tool '%s' (id=%s) failed with error: %s", bg_tool.tool_name, bg_tool.id, bg_tool.error)
|
| 915 |
+
tool_result = {"error": bg_tool.error}
|
| 916 |
+
elif bg_tool.result is not None:
|
| 917 |
+
tool_result = bg_tool.result
|
| 918 |
+
logger.info(
|
| 919 |
+
"Tool '%s' (id=%s) executed successfully.",
|
| 920 |
+
bg_tool.tool_name, bg_tool.id,
|
| 921 |
+
)
|
| 922 |
+
logger.debug("Tool '%s' full result: %s", bg_tool.tool_name, tool_result)
|
| 923 |
+
else:
|
| 924 |
+
logger.warning("Tool '%s' (id=%s) returned no result and no error", bg_tool.tool_name, bg_tool.id)
|
| 925 |
+
tool_result = {"error": "No result returned from tool execution"}
|
| 926 |
+
|
| 927 |
+
call_args = self._tool_call_args.pop(bg_tool.id, {})
|
| 928 |
+
|
| 929 |
+
# Connection may have closed while tool was running
|
| 930 |
+
if not self.connection:
|
| 931 |
+
logger.warning("Connection closed during tool '%s' (id=%s) execution; cannot send result back", bg_tool.tool_name, bg_tool.id)
|
| 932 |
+
# Even if we can't send the function_call_output, the controller
|
| 933 |
+
# still needs to advance state so the dashboard reflects reality
|
| 934 |
+
# and a future reconnect lands in the right state.
|
| 935 |
+
if self._controller is not None:
|
| 936 |
+
try:
|
| 937 |
+
await self._controller.on_tool_completed_async(
|
| 938 |
+
bg_tool.tool_name, call_args, tool_result,
|
| 939 |
+
)
|
| 940 |
+
except Exception as e:
|
| 941 |
+
logger.warning(
|
| 942 |
+
"ConversationController.on_tool_completed_async raised %s: %s",
|
| 943 |
+
type(e).__name__, e,
|
| 944 |
+
)
|
| 945 |
+
return
|
| 946 |
+
|
| 947 |
+
try:
|
| 948 |
+
# Send the tool result back to the model FIRST. The controller
|
| 949 |
+
# callback below may push a SPEAK_NOW context event + enqueue a
|
| 950 |
+
# response.create — if that ``response.create`` reaches the
|
| 951 |
+
# server before this ``function_call_output``, the model
|
| 952 |
+
# generates a response without seeing the tool result. Order
|
| 953 |
+
# matters: function_call_output → controller → context push →
|
| 954 |
+
# response.create.
|
| 955 |
+
if isinstance(bg_tool.id, str):
|
| 956 |
+
await self.connection.conversation.item.create(
|
| 957 |
+
item={
|
| 958 |
+
"type": "function_call_output",
|
| 959 |
+
"call_id": bg_tool.id,
|
| 960 |
+
"output": json.dumps(tool_result),
|
| 961 |
+
},
|
| 962 |
+
)
|
| 963 |
+
|
| 964 |
+
# Notify the conversation controller about the completion so it
|
| 965 |
+
# can transition the session. Now that the function_call_output
|
| 966 |
+
# is in flight ahead of us, any SPEAK_NOW response.create enqueued
|
| 967 |
+
# by the resulting transition will be ordered correctly.
|
| 968 |
+
if self._controller is not None:
|
| 969 |
+
try:
|
| 970 |
+
await self._controller.on_tool_completed_async(
|
| 971 |
+
bg_tool.tool_name, call_args, tool_result,
|
| 972 |
+
)
|
| 973 |
+
except Exception as e:
|
| 974 |
+
logger.warning(
|
| 975 |
+
"ConversationController.on_tool_completed_async raised %s: %s",
|
| 976 |
+
type(e).__name__, e,
|
| 977 |
+
)
|
| 978 |
+
|
| 979 |
+
await self.output_queue.put(
|
| 980 |
+
AdditionalOutputs(
|
| 981 |
+
{
|
| 982 |
+
"role": "assistant",
|
| 983 |
+
"content": json.dumps(tool_result),
|
| 984 |
+
# Gradio UI metadata.status accept only "pending" and "done". Do not accept bg.tool.status values.
|
| 985 |
+
"metadata": {
|
| 986 |
+
"title": f"🛠️ Used tool {bg_tool.tool_name}",
|
| 987 |
+
"status": "done",
|
| 988 |
+
},
|
| 989 |
+
},
|
| 990 |
+
),
|
| 991 |
+
)
|
| 992 |
+
|
| 993 |
+
if bg_tool.tool_name == "camera" and "b64_im" in tool_result:
|
| 994 |
+
# use raw base64, don't json.dumps (which adds quotes)
|
| 995 |
+
b64_im = tool_result["b64_im"]
|
| 996 |
+
if not isinstance(b64_im, str):
|
| 997 |
+
logger.warning("Unexpected type for b64_im: %s", type(b64_im))
|
| 998 |
+
b64_im = str(b64_im)
|
| 999 |
+
await self.connection.conversation.item.create(
|
| 1000 |
+
item={
|
| 1001 |
+
"type": "message",
|
| 1002 |
+
"role": "user",
|
| 1003 |
+
"content": [
|
| 1004 |
+
{
|
| 1005 |
+
"type": "input_image",
|
| 1006 |
+
"image_url": f"data:image/jpeg;base64,{b64_im}",
|
| 1007 |
+
},
|
| 1008 |
+
],
|
| 1009 |
+
},
|
| 1010 |
+
)
|
| 1011 |
+
logger.info("Added camera image to conversation")
|
| 1012 |
+
|
| 1013 |
+
if self.deps.camera_worker is not None:
|
| 1014 |
+
np_img = self.deps.camera_worker.get_latest_frame()
|
| 1015 |
+
if np_img is not None:
|
| 1016 |
+
# Camera frames are BGR from OpenCV; convert so Gradio displays correct colors.
|
| 1017 |
+
rgb_frame = cv2.cvtColor(np_img, cv2.COLOR_BGR2RGB)
|
| 1018 |
+
else:
|
| 1019 |
+
rgb_frame = None
|
| 1020 |
+
img = gr.Image(value=rgb_frame)
|
| 1021 |
+
|
| 1022 |
+
await self.output_queue.put(
|
| 1023 |
+
AdditionalOutputs(
|
| 1024 |
+
{
|
| 1025 |
+
"role": "assistant",
|
| 1026 |
+
"content": img,
|
| 1027 |
+
},
|
| 1028 |
+
),
|
| 1029 |
+
)
|
| 1030 |
+
|
| 1031 |
+
# If this tool call was triggered by an idle signal, don't make the robot speak.
|
| 1032 |
+
# For other tool calls, let the robot reply out loud — UNLESS the
|
| 1033 |
+
# controller has just driven the session into a SPEAK_NOW state
|
| 1034 |
+
# (RECOGNIZED, APPOINTMENT_MATCHED, NO_APPOINTMENT, NOTIFIED, …).
|
| 1035 |
+
# Those transitions already enqueued a state-specific
|
| 1036 |
+
# ``response.create`` via the session-event push, and stacking
|
| 1037 |
+
# the generic "Use the tool result and answer concisely" on top
|
| 1038 |
+
# makes the bot speak twice in a row.
|
| 1039 |
+
if not bg_tool.is_idle_tool_call and not self._just_entered_speak_now_state():
|
| 1040 |
+
# If the tool was BLOCKED / refused (success=False with a
|
| 1041 |
+
# blocked_reason), the visitor doesn't know — bot must
|
| 1042 |
+
# speak the friendly version of the error out loud,
|
| 1043 |
+
# not just sit on the rejection.
|
| 1044 |
+
blocked_reason = None
|
| 1045 |
+
if isinstance(tool_result, dict):
|
| 1046 |
+
blocked_reason = tool_result.get("blocked_reason")
|
| 1047 |
+
if blocked_reason:
|
| 1048 |
+
instructions = (
|
| 1049 |
+
"The last tool call was rejected by the backend. "
|
| 1050 |
+
"Tell the visitor briefly and naturally what to do "
|
| 1051 |
+
"next based on the error message you just received — "
|
| 1052 |
+
"for example, ask their name again, ask for "
|
| 1053 |
+
"confirmation, or offer a numbered choice of similar "
|
| 1054 |
+
"names. ALWAYS speak; never go silent after a tool "
|
| 1055 |
+
"error."
|
| 1056 |
+
)
|
| 1057 |
+
else:
|
| 1058 |
+
instructions = "Use the tool result just returned and answer concisely in speech."
|
| 1059 |
+
await self._safe_response_create(
|
| 1060 |
+
response={"instructions": instructions},
|
| 1061 |
+
)
|
| 1062 |
+
|
| 1063 |
+
# Re-synchronize the head wobble after a tool call that may have taken some time
|
| 1064 |
+
if self.deps.head_wobbler is not None:
|
| 1065 |
+
self.deps.head_wobbler.reset()
|
| 1066 |
+
|
| 1067 |
+
except ConnectionClosedError:
|
| 1068 |
+
logger.warning("Connection closed while sending tool result")
|
| 1069 |
+
self.connection = None
|
| 1070 |
+
self._response_done_event.set()
|
| 1071 |
+
|
| 1072 |
+
async def _run_realtime_session(self) -> None:
|
| 1073 |
+
"""Establish and manage a single realtime session."""
|
| 1074 |
+
async with self.client.realtime.connect(model=config.MODEL_NAME) as conn:
|
| 1075 |
+
# Build the transcription block. We attempt the rich form first
|
| 1076 |
+
# (language="en" + bias prompt of expected names) and fall back
|
| 1077 |
+
# to a minimal {"model": ...} form if the realtime API rejects
|
| 1078 |
+
# any of the optional fields. Losing the bias prompt is a
|
| 1079 |
+
# quality regression, not a correctness one — but losing the
|
| 1080 |
+
# whole session.update would mean NO tools / NO instructions.
|
| 1081 |
+
# STT_MODEL env var lets us switch between gpt-4o-transcribe
|
| 1082 |
+
# (default, fast but hallucinates) and whisper-1 (slower,
|
| 1083 |
+
# more conservative, returns "" on uncertainty which our
|
| 1084 |
+
# pipeline handles cleanly).
|
| 1085 |
+
stt_model = (os.getenv("STT_MODEL") or "gpt-4o-transcribe").strip() or "gpt-4o-transcribe"
|
| 1086 |
+
|
| 1087 |
+
# STT_DISABLE_BIAS=1 turns off the name bias prompt entirely.
|
| 1088 |
+
# The bias prompt is what causes gpt-4o-transcribe to echo
|
| 1089 |
+
# back its own prompt as the user transcript; disabling it
|
| 1090 |
+
# trades some name-accuracy for no echo bug at all.
|
| 1091 |
+
bias_disabled = (os.getenv("STT_DISABLE_BIAS") or "").strip().lower() in {"1", "true", "yes"}
|
| 1092 |
+
|
| 1093 |
+
initial_bias_prompt = (
|
| 1094 |
+
"" if bias_disabled else await _build_transcription_bias_prompt_async()
|
| 1095 |
+
)
|
| 1096 |
+
transcription_full = {
|
| 1097 |
+
"model": stt_model,
|
| 1098 |
+
"language": "en",
|
| 1099 |
+
}
|
| 1100 |
+
if initial_bias_prompt:
|
| 1101 |
+
transcription_full["prompt"] = initial_bias_prompt
|
| 1102 |
+
logger.info(
|
| 1103 |
+
"STT config: model=%s bias_chars=%d",
|
| 1104 |
+
stt_model, len(initial_bias_prompt),
|
| 1105 |
+
)
|
| 1106 |
+
transcription_min = {"model": "gpt-4o-transcribe"}
|
| 1107 |
+
|
| 1108 |
+
def _build_session(transcription: dict[str, Any]) -> dict[str, Any]:
|
| 1109 |
+
return {
|
| 1110 |
+
"type": "realtime",
|
| 1111 |
+
# NOTE: "language" is NOT a valid top-level Realtime API
|
| 1112 |
+
# session field (it lives under audio.input.transcription).
|
| 1113 |
+
# Output language is controlled via the instructions prompt
|
| 1114 |
+
# (the locked profile already says "You ONLY speak ENGLISH").
|
| 1115 |
+
"instructions": get_session_instructions(),
|
| 1116 |
+
"audio": {
|
| 1117 |
+
"input": {
|
| 1118 |
+
"format": {
|
| 1119 |
+
"type": "audio/pcm",
|
| 1120 |
+
"rate": self.input_sample_rate,
|
| 1121 |
+
},
|
| 1122 |
+
"transcription": transcription,
|
| 1123 |
+
# Lobby-tuned VAD: defaults (threshold 0.5,
|
| 1124 |
+
# silence 500ms) are too aggressive — ambient
|
| 1125 |
+
# noise interrupts the bot mid-sentence and
|
| 1126 |
+
# hesitant speakers get cut off before
|
| 1127 |
+
# finishing. Bump threshold + silence so the
|
| 1128 |
+
# bot waits for a clear, complete utterance.
|
| 1129 |
+
"turn_detection": {
|
| 1130 |
+
"type": "server_vad",
|
| 1131 |
+
"threshold": 0.5,
|
| 1132 |
+
"silence_duration_ms": 500,
|
| 1133 |
+
"prefix_padding_ms": 300,
|
| 1134 |
+
# interrupt_response=true was causing the
|
| 1135 |
+
# mic+VAD path to lock up when the bot's
|
| 1136 |
+
# own audio bled into the mic — server VAD
|
| 1137 |
+
# never saw a clean silence to commit the
|
| 1138 |
+
# visitor's "yes". Off is safer for a
|
| 1139 |
+
# speakerphone lobby setup.
|
| 1140 |
+
"interrupt_response": False,
|
| 1141 |
+
"create_response": True,
|
| 1142 |
+
},
|
| 1143 |
+
},
|
| 1144 |
+
"output": {
|
| 1145 |
+
"format": {
|
| 1146 |
+
"type": "audio/pcm",
|
| 1147 |
+
"rate": self.output_sample_rate,
|
| 1148 |
+
},
|
| 1149 |
+
"voice": get_session_voice(),
|
| 1150 |
+
},
|
| 1151 |
+
},
|
| 1152 |
+
"tools": get_tool_specs(), # type: ignore[typeddict-item]
|
| 1153 |
+
"tool_choice": "auto",
|
| 1154 |
+
}
|
| 1155 |
+
|
| 1156 |
+
try:
|
| 1157 |
+
try:
|
| 1158 |
+
await conn.session.update(session=_build_session(transcription_full))
|
| 1159 |
+
self._last_stt_bias_prompt = initial_bias_prompt
|
| 1160 |
+
logger.info(
|
| 1161 |
+
"Realtime session: applied gpt-4o-transcribe with language=en + name bias (%d chars)",
|
| 1162 |
+
len(transcription_full["prompt"]),
|
| 1163 |
+
)
|
| 1164 |
+
except Exception as e:
|
| 1165 |
+
logger.warning(
|
| 1166 |
+
"Realtime session.update rejected the rich transcription block "
|
| 1167 |
+
"(language/prompt) — retrying without them: %s", e,
|
| 1168 |
+
)
|
| 1169 |
+
await conn.session.update(session=_build_session(transcription_min))
|
| 1170 |
+
self._last_stt_bias_prompt = None
|
| 1171 |
+
logger.info("Realtime session: applied gpt-4o-transcribe (minimal fallback)")
|
| 1172 |
+
logger.info(
|
| 1173 |
+
"Realtime session initialized with profile=%r voice=%r",
|
| 1174 |
+
getattr(config, "REACHY_MINI_CUSTOM_PROFILE", None),
|
| 1175 |
+
get_session_voice(),
|
| 1176 |
+
)
|
| 1177 |
+
# If we reached here, the session update succeeded which implies the API key worked.
|
| 1178 |
+
# Persist the key to a newly created .env (copied from .env.example) if needed.
|
| 1179 |
+
self._persist_api_key_if_needed()
|
| 1180 |
+
except Exception as e:
|
| 1181 |
+
# A failed session.update means NO instructions, NO tools, NO profile will be
|
| 1182 |
+
# active — the model will run as a generic assistant. Always log clearly.
|
| 1183 |
+
logger.exception(
|
| 1184 |
+
"Realtime session.update failed — robot will use DEFAULT personality with NO tools. "
|
| 1185 |
+
"Check the session dict for invalid fields. Error: %s", e
|
| 1186 |
+
)
|
| 1187 |
+
return
|
| 1188 |
+
|
| 1189 |
+
logger.info("Realtime session updated successfully")
|
| 1190 |
+
|
| 1191 |
+
# Manage event received from the openai server
|
| 1192 |
+
self.connection = conn
|
| 1193 |
+
# Reset the idle timer NOW (when the session is actually live) rather than
|
| 1194 |
+
# at __init__ time. If connection setup takes >15 s, the idle timer would
|
| 1195 |
+
# otherwise already be expired on the very first emit() call, causing the
|
| 1196 |
+
# model to speak immediately without being addressed.
|
| 1197 |
+
self.last_activity_time = asyncio.get_event_loop().time()
|
| 1198 |
+
try:
|
| 1199 |
+
self._connected_event.set()
|
| 1200 |
+
except Exception:
|
| 1201 |
+
pass
|
| 1202 |
+
self._runtime_loop = asyncio.get_running_loop()
|
| 1203 |
+
await self._prime_no_face_context()
|
| 1204 |
+
await self._flush_pending_face_event()
|
| 1205 |
+
|
| 1206 |
+
# Subscribe to session-state changes so transitions get pushed
|
| 1207 |
+
# to the LLM as context events. Idempotent — subscribe() replaces
|
| 1208 |
+
# the existing callback if any.
|
| 1209 |
+
if self._session_manager is not None:
|
| 1210 |
+
try:
|
| 1211 |
+
self._session_manager.subscribe(self.notify_session_event)
|
| 1212 |
+
except Exception as e:
|
| 1213 |
+
logger.debug("Failed to subscribe to SessionManager: %s", e)
|
| 1214 |
+
await self._flush_pending_session_event()
|
| 1215 |
+
|
| 1216 |
+
|
| 1217 |
+
response_sender_task: asyncio.Task[None] | None = None
|
| 1218 |
+
stt_refresh_task: asyncio.Task[None] | None = None
|
| 1219 |
+
try:
|
| 1220 |
+
# Start the background tool manager
|
| 1221 |
+
self.tool_manager.start_up(tool_callbacks=[self._handle_tool_result])
|
| 1222 |
+
|
| 1223 |
+
# Start the response sender worker
|
| 1224 |
+
response_sender_task = asyncio.create_task(
|
| 1225 |
+
self._response_sender_loop(), name="response-sender"
|
| 1226 |
+
)
|
| 1227 |
+
|
| 1228 |
+
# Start the STT bias refresh worker — picks up calendar /
|
| 1229 |
+
# employee directory changes made after the session connected.
|
| 1230 |
+
stt_refresh_task = asyncio.create_task(
|
| 1231 |
+
self._stt_bias_refresh_loop(), name="stt-bias-refresh"
|
| 1232 |
+
)
|
| 1233 |
+
|
| 1234 |
+
async for event in self.connection:
|
| 1235 |
+
logger.debug(f"OpenAI event: {event.type}")
|
| 1236 |
+
if event.type == "input_audio_buffer.speech_started":
|
| 1237 |
+
if hasattr(self, "_clear_queue") and callable(self._clear_queue):
|
| 1238 |
+
self._clear_queue()
|
| 1239 |
+
if self.deps.head_wobbler is not None:
|
| 1240 |
+
self.deps.head_wobbler.reset()
|
| 1241 |
+
self.deps.movement_manager.set_listening(True)
|
| 1242 |
+
logger.debug("User speech started")
|
| 1243 |
+
|
| 1244 |
+
# Bump session liveness so the 60s idle timer doesn't
|
| 1245 |
+
# kill an active flow that's just waiting for the
|
| 1246 |
+
# visitor to reply (e.g. 'I heard Henry — is that
|
| 1247 |
+
# right?' followed by 65s of thinking time).
|
| 1248 |
+
if self._session_manager is not None:
|
| 1249 |
+
try:
|
| 1250 |
+
self._session_manager.touch()
|
| 1251 |
+
except Exception as e:
|
| 1252 |
+
logger.debug("session_manager.touch failed: %s", e)
|
| 1253 |
+
|
| 1254 |
+
# If a visitor speaks while state is still IDLE (face
|
| 1255 |
+
# worker hasn't seen them yet, or face is unstable),
|
| 1256 |
+
# push the IDLE workflow hint as context so the LLM
|
| 1257 |
+
# knows to extract name/host from the utterance
|
| 1258 |
+
# instead of just greeting generically. Without this,
|
| 1259 |
+
# the bot replies "Hello, how can I help?" and loses
|
| 1260 |
+
# whatever the visitor just said (name, host, both).
|
| 1261 |
+
try:
|
| 1262 |
+
await self._push_idle_speech_cue_if_needed()
|
| 1263 |
+
except Exception as e:
|
| 1264 |
+
logger.debug("Idle speech cue push failed: %s", e)
|
| 1265 |
+
|
| 1266 |
+
if event.type == "input_audio_buffer.speech_stopped":
|
| 1267 |
+
self.deps.movement_manager.set_listening(False)
|
| 1268 |
+
logger.debug("User speech stopped - server will auto-commit with VAD")
|
| 1269 |
+
|
| 1270 |
+
if event.type in (
|
| 1271 |
+
"response.audio.done", # GA
|
| 1272 |
+
"response.output_audio.done", # GA alias
|
| 1273 |
+
"response.audio.completed", # legacy (for safety)
|
| 1274 |
+
"response.completed", # text-only completion
|
| 1275 |
+
):
|
| 1276 |
+
logger.debug("response completed")
|
| 1277 |
+
|
| 1278 |
+
if event.type == "response.created":
|
| 1279 |
+
self._response_done_event.clear()
|
| 1280 |
+
logger.debug("Response created (active)")
|
| 1281 |
+
|
| 1282 |
+
if event.type == "response.done":
|
| 1283 |
+
# Doesn't mean the audio is done playing
|
| 1284 |
+
self._response_done_event.set()
|
| 1285 |
+
logger.debug("Response done")
|
| 1286 |
+
|
| 1287 |
+
response = getattr(event, "response", None)
|
| 1288 |
+
usage = getattr(response, "usage", None) if response else None
|
| 1289 |
+
if usage:
|
| 1290 |
+
cost = _compute_response_cost(usage)
|
| 1291 |
+
self.cumulative_cost += cost
|
| 1292 |
+
logger.debug("Cost: $%.4f | Cumulative: $%.4f", cost, self.cumulative_cost)
|
| 1293 |
+
else:
|
| 1294 |
+
logger.warning("No usage data available for cost tracking")
|
| 1295 |
+
|
| 1296 |
+
# Handle partial transcription (user speaking in real-time)
|
| 1297 |
+
if event.type == "conversation.item.input_audio_transcription.partial":
|
| 1298 |
+
logger.debug(f"User partial transcript: {event.transcript}")
|
| 1299 |
+
|
| 1300 |
+
# Increment sequence
|
| 1301 |
+
self.partial_transcript_sequence += 1
|
| 1302 |
+
current_sequence = self.partial_transcript_sequence
|
| 1303 |
+
|
| 1304 |
+
# Cancel previous debounce task if it exists
|
| 1305 |
+
if self.partial_transcript_task and not self.partial_transcript_task.done():
|
| 1306 |
+
self.partial_transcript_task.cancel()
|
| 1307 |
+
try:
|
| 1308 |
+
await self.partial_transcript_task
|
| 1309 |
+
except asyncio.CancelledError:
|
| 1310 |
+
pass
|
| 1311 |
+
|
| 1312 |
+
# Start new debounce timer with sequence number
|
| 1313 |
+
self.partial_transcript_task = asyncio.create_task(
|
| 1314 |
+
self._emit_debounced_partial(event.transcript, current_sequence)
|
| 1315 |
+
)
|
| 1316 |
+
|
| 1317 |
+
# Handle completed transcription (user finished speaking)
|
| 1318 |
+
if event.type == "conversation.item.input_audio_transcription.completed":
|
| 1319 |
+
logger.debug(f"User transcript: {event.transcript}")
|
| 1320 |
+
|
| 1321 |
+
# Visitor finished an utterance — refresh liveness
|
| 1322 |
+
# so the idle timer doesn't kill us while we wait
|
| 1323 |
+
# for the LLM to react.
|
| 1324 |
+
if self._session_manager is not None:
|
| 1325 |
+
try:
|
| 1326 |
+
self._session_manager.touch()
|
| 1327 |
+
except Exception as e:
|
| 1328 |
+
logger.debug("session_manager.touch failed: %s", e)
|
| 1329 |
+
|
| 1330 |
+
# Cancel any pending partial emission
|
| 1331 |
+
if self.partial_transcript_task and not self.partial_transcript_task.done():
|
| 1332 |
+
self.partial_transcript_task.cancel()
|
| 1333 |
+
try:
|
| 1334 |
+
await self.partial_transcript_task
|
| 1335 |
+
except asyncio.CancelledError:
|
| 1336 |
+
pass
|
| 1337 |
+
|
| 1338 |
+
# Empty-transcript guard. Whisper-1 returns "" on
|
| 1339 |
+
# short, quiet, or non-English utterances rather
|
| 1340 |
+
# than guessing. When that happens the LLM has
|
| 1341 |
+
# nothing to anchor on and tends to copy example
|
| 1342 |
+
# text from the prompt (we've seen it parrot "I
|
| 1343 |
+
# heard Arav — is that right?" verbatim from a
|
| 1344 |
+
# prompt example). Inject a context cue telling
|
| 1345 |
+
# the LLM not to act on the empty input and to
|
| 1346 |
+
# ask the visitor to repeat — then DON'T pipe the
|
| 1347 |
+
# empty string into the UI.
|
| 1348 |
+
raw_transcript = (event.transcript or "").strip()
|
| 1349 |
+
|
| 1350 |
+
# gpt-4o-transcribe occasionally echoes the bias
|
| 1351 |
+
# ``prompt`` field back as the user transcript when
|
| 1352 |
+
# the audio is silence or unintelligible noise.
|
| 1353 |
+
# Observed in production: a clear empty utterance
|
| 1354 |
+
# arrived as the literal "Reception lobby check-in.
|
| 1355 |
+
# Expected visitor and host names include: …" text
|
| 1356 |
+
# we feed in for name bias. The LLM then treats the
|
| 1357 |
+
# bias list as something the visitor said and may
|
| 1358 |
+
# try to register one of those names — including
|
| 1359 |
+
# registering the visitor as e.g. "It's Hannah" with
|
| 1360 |
+
# confirmed=true bypassing the confirmation rule.
|
| 1361 |
+
# Detect any transcript that opens with the
|
| 1362 |
+
# bias-prompt signature and treat it as empty.
|
| 1363 |
+
# Detect STT echoing back the bias-prompt header
|
| 1364 |
+
# text as if it were the visitor speaking. With the
|
| 1365 |
+
# simple comma-list prompt the only realistic echo
|
| 1366 |
+
# is the header signature.
|
| 1367 |
+
_t_lower = raw_transcript.lower()
|
| 1368 |
+
if (
|
| 1369 |
+
"reception lobby check-in" in _t_lower
|
| 1370 |
+
or "expected visitor and host names" in _t_lower
|
| 1371 |
+
):
|
| 1372 |
+
logger.warning(
|
| 1373 |
+
"Transcript echoed STT bias prompt — treating as empty: %r",
|
| 1374 |
+
raw_transcript[:80],
|
| 1375 |
+
)
|
| 1376 |
+
raw_transcript = ""
|
| 1377 |
+
|
| 1378 |
+
# Stash the latest transcript on the session so the
|
| 1379 |
+
# register_guest confirmation guard can verify the
|
| 1380 |
+
# visitor actually said a yes before saving a face.
|
| 1381 |
+
if self._session_manager is not None and raw_transcript:
|
| 1382 |
+
try:
|
| 1383 |
+
self._session_manager.record_user_transcript(raw_transcript)
|
| 1384 |
+
except Exception as e:
|
| 1385 |
+
logger.debug("record_user_transcript failed: %s", e)
|
| 1386 |
+
|
| 1387 |
+
if not raw_transcript:
|
| 1388 |
+
# Empty or bias-echo transcript. Cancel the
|
| 1389 |
+
# in-flight response ONLY if there's no real
|
| 1390 |
+
# prior visitor utterance — otherwise the
|
| 1391 |
+
# LLM might be in the middle of processing
|
| 1392 |
+
# a valid transcript and cancelling kills
|
| 1393 |
+
# legitimate tool calls (observed: visitor
|
| 1394 |
+
# said "Arjun Mehta" cleanly, follow-up
|
| 1395 |
+
# echo cancelled the lookup_employee call).
|
| 1396 |
+
had_prior = False
|
| 1397 |
+
if self._session_manager is not None:
|
| 1398 |
+
try:
|
| 1399 |
+
had_prior = bool(
|
| 1400 |
+
(self._session_manager.session.last_user_transcript or "").strip()
|
| 1401 |
+
)
|
| 1402 |
+
except Exception:
|
| 1403 |
+
pass
|
| 1404 |
+
if had_prior:
|
| 1405 |
+
logger.info("Empty/echo transcript dropped silently (prior transcript in flight)")
|
| 1406 |
+
else:
|
| 1407 |
+
logger.info("Empty/echo transcript dropped — cancelling in-flight response")
|
| 1408 |
+
try:
|
| 1409 |
+
await self.connection.response.cancel()
|
| 1410 |
+
except Exception as e:
|
| 1411 |
+
logger.debug("response.cancel after empty transcript failed: %s", e)
|
| 1412 |
+
self._response_done_event.set()
|
| 1413 |
+
continue
|
| 1414 |
+
|
| 1415 |
+
await self.output_queue.put(AdditionalOutputs({"role": "user", "content": event.transcript}))
|
| 1416 |
+
|
| 1417 |
+
# Handle assistant transcription
|
| 1418 |
+
if event.type in ("response.audio_transcript.done", "response.output_audio_transcript.done"):
|
| 1419 |
+
logger.debug(f"Assistant transcript: {event.transcript}")
|
| 1420 |
+
await self.output_queue.put(AdditionalOutputs({"role": "assistant", "content": event.transcript}))
|
| 1421 |
+
|
| 1422 |
+
# Handle audio delta
|
| 1423 |
+
if event.type in ("response.audio.delta", "response.output_audio.delta"):
|
| 1424 |
+
if self.deps.head_wobbler is not None:
|
| 1425 |
+
self.deps.head_wobbler.feed(event.delta)
|
| 1426 |
+
self.last_activity_time = asyncio.get_event_loop().time()
|
| 1427 |
+
logger.debug("last activity time updated to %s", self.last_activity_time)
|
| 1428 |
+
# Bot is actively speaking — refresh session
|
| 1429 |
+
# liveness so a long reply (e.g. reading back a
|
| 1430 |
+
# numbered-list of name candidates) isn't counted
|
| 1431 |
+
# as idle time against the 60s timeout.
|
| 1432 |
+
if self._session_manager is not None:
|
| 1433 |
+
try:
|
| 1434 |
+
self._session_manager.touch()
|
| 1435 |
+
except Exception as e:
|
| 1436 |
+
logger.debug("session_manager.touch failed: %s", e)
|
| 1437 |
+
await self.output_queue.put(
|
| 1438 |
+
(
|
| 1439 |
+
self.output_sample_rate,
|
| 1440 |
+
np.frombuffer(base64.b64decode(event.delta), dtype=np.int16).reshape(1, -1),
|
| 1441 |
+
),
|
| 1442 |
+
)
|
| 1443 |
+
|
| 1444 |
+
# ---- tool-calling plumbing ----
|
| 1445 |
+
if event.type == "response.function_call_arguments.done":
|
| 1446 |
+
tool_name = getattr(event, "name", None)
|
| 1447 |
+
args_json_str = getattr(event, "arguments", None)
|
| 1448 |
+
call_id: str = str(getattr(event, "call_id", uuid.uuid4()))
|
| 1449 |
+
|
| 1450 |
+
logger.info(
|
| 1451 |
+
"Tool call received — tool_name=%r, call_id=%s, is_idle=%s, args=%s",
|
| 1452 |
+
tool_name, call_id, self.is_idle_tool_call, args_json_str,
|
| 1453 |
+
)
|
| 1454 |
+
|
| 1455 |
+
if not isinstance(tool_name, str) or not isinstance(args_json_str, str):
|
| 1456 |
+
logger.error(
|
| 1457 |
+
"Invalid tool call: tool_name=%s (type=%s), args=%s (type=%s), call_id=%s",
|
| 1458 |
+
tool_name, type(tool_name).__name__,
|
| 1459 |
+
args_json_str, type(args_json_str).__name__,
|
| 1460 |
+
call_id,
|
| 1461 |
+
)
|
| 1462 |
+
continue
|
| 1463 |
+
|
| 1464 |
+
# Stash parsed args by call_id so the controller can
|
| 1465 |
+
# see them when the matching tool result arrives.
|
| 1466 |
+
try:
|
| 1467 |
+
parsed_args = json.loads(args_json_str) if args_json_str else {}
|
| 1468 |
+
if isinstance(parsed_args, dict):
|
| 1469 |
+
self._tool_call_args[call_id] = parsed_args
|
| 1470 |
+
except Exception as e:
|
| 1471 |
+
logger.debug("Could not parse tool args for %s: %s", call_id, e)
|
| 1472 |
+
|
| 1473 |
+
bg_tool = await self.tool_manager.start_tool(
|
| 1474 |
+
call_id=call_id,
|
| 1475 |
+
tool_call_routine=ToolCallRoutine(
|
| 1476 |
+
tool_name=tool_name,
|
| 1477 |
+
args_json_str=args_json_str,
|
| 1478 |
+
deps=self.deps,
|
| 1479 |
+
),
|
| 1480 |
+
is_idle_tool_call=self.is_idle_tool_call,
|
| 1481 |
+
)
|
| 1482 |
+
|
| 1483 |
+
await self.output_queue.put(
|
| 1484 |
+
AdditionalOutputs(
|
| 1485 |
+
{
|
| 1486 |
+
"role": "assistant",
|
| 1487 |
+
"content": f"🛠️ Used tool {tool_name} with args {args_json_str}. The tool is now running. Tool ID: {bg_tool.tool_id}",
|
| 1488 |
+
},
|
| 1489 |
+
),
|
| 1490 |
+
)
|
| 1491 |
+
|
| 1492 |
+
if self.is_idle_tool_call:
|
| 1493 |
+
self.is_idle_tool_call = False
|
| 1494 |
+
# No auto-narration when a non-idle tool STARTS. The
|
| 1495 |
+
# generic template fired a "tell the user what the
|
| 1496 |
+
# tool is running" response here, which for the
|
| 1497 |
+
# receptionist produces a third utterance per
|
| 1498 |
+
# check-in ("I've started retrieving the calendar…")
|
| 1499 |
+
# that the prompt explicitly forbids ("act as if
|
| 1500 |
+
# you're using them naturally, not announcing them")
|
| 1501 |
+
# and that makes the head wobble noisily. The post-
|
| 1502 |
+
# tool SPEAK NOW transition already drives the
|
| 1503 |
+
# response the visitor actually wants.
|
| 1504 |
+
|
| 1505 |
+
logger.info("Started background tool: %s (id=%s, call_id=%s)", tool_name, bg_tool.tool_id, call_id)
|
| 1506 |
+
|
| 1507 |
+
# server error
|
| 1508 |
+
if event.type == "error":
|
| 1509 |
+
err = getattr(event, "error", None)
|
| 1510 |
+
msg = getattr(err, "message", str(err) if err else "unknown error")
|
| 1511 |
+
code = getattr(err, "code", "")
|
| 1512 |
+
|
| 1513 |
+
if code == "conversation_already_has_active_response":
|
| 1514 |
+
# response.create was rejected. The sender worker
|
| 1515 |
+
# is waiting on _response_done_event; when the active
|
| 1516 |
+
# response finishes it will wake up and see this flag.
|
| 1517 |
+
self._last_response_rejected = True
|
| 1518 |
+
logger.debug("response.create rejected; worker will retry after active response finishes")
|
| 1519 |
+
else:
|
| 1520 |
+
logger.error("Realtime error [%s]: %s (raw=%s)", code, msg, err)
|
| 1521 |
+
|
| 1522 |
+
# Only show user-facing errors, not internal state errors.
|
| 1523 |
+
# The active-response collision is normal during fast
|
| 1524 |
+
# back-and-forth (the sender worker retries it for us)
|
| 1525 |
+
# and should not appear in the chatbot UI.
|
| 1526 |
+
_internal_error_codes = (
|
| 1527 |
+
"input_audio_buffer_commit_empty",
|
| 1528 |
+
"conversation_already_has_active_response",
|
| 1529 |
+
)
|
| 1530 |
+
if code not in _internal_error_codes:
|
| 1531 |
+
await self.output_queue.put(
|
| 1532 |
+
AdditionalOutputs({"role": "assistant", "content": f"[error] {msg}"})
|
| 1533 |
+
)
|
| 1534 |
+
finally:
|
| 1535 |
+
# Stop the response sender worker.
|
| 1536 |
+
if response_sender_task is not None:
|
| 1537 |
+
response_sender_task.cancel()
|
| 1538 |
+
try:
|
| 1539 |
+
await response_sender_task
|
| 1540 |
+
except asyncio.CancelledError:
|
| 1541 |
+
pass
|
| 1542 |
+
|
| 1543 |
+
# Stop the STT bias refresh worker.
|
| 1544 |
+
if stt_refresh_task is not None:
|
| 1545 |
+
stt_refresh_task.cancel()
|
| 1546 |
+
try:
|
| 1547 |
+
await stt_refresh_task
|
| 1548 |
+
except asyncio.CancelledError:
|
| 1549 |
+
pass
|
| 1550 |
+
|
| 1551 |
+
# Stop background tool manager tasks (listener + cleanup) in all patus.
|
| 1552 |
+
await self.tool_manager.shutdown()
|
| 1553 |
+
|
| 1554 |
+
# Microphone receive
|
| 1555 |
+
async def receive(self, frame: Tuple[int, NDArray[np.int16]]) -> None:
|
| 1556 |
+
"""Receive audio frame from the microphone and send it to the OpenAI server.
|
| 1557 |
+
|
| 1558 |
+
Handles both mono and stereo audio formats, converting to the expected
|
| 1559 |
+
mono format for OpenAI's API. Resamples if the input sample rate differs
|
| 1560 |
+
from the expected rate.
|
| 1561 |
+
|
| 1562 |
+
Args:
|
| 1563 |
+
frame: A tuple containing (sample_rate, audio_data).
|
| 1564 |
+
|
| 1565 |
+
"""
|
| 1566 |
+
if not self.connection:
|
| 1567 |
+
return
|
| 1568 |
+
|
| 1569 |
+
input_sample_rate, audio_frame = frame
|
| 1570 |
+
|
| 1571 |
+
# Reshape if needed
|
| 1572 |
+
if audio_frame.ndim == 2:
|
| 1573 |
+
# Scipy channels last convention
|
| 1574 |
+
if audio_frame.shape[1] > audio_frame.shape[0]:
|
| 1575 |
+
audio_frame = audio_frame.T
|
| 1576 |
+
# Multiple channels -> Mono channel
|
| 1577 |
+
if audio_frame.shape[1] > 1:
|
| 1578 |
+
audio_frame = audio_frame[:, 0]
|
| 1579 |
+
|
| 1580 |
+
# Resample if needed
|
| 1581 |
+
if self.input_sample_rate != input_sample_rate:
|
| 1582 |
+
audio_frame = resample(audio_frame, int(len(audio_frame) * self.input_sample_rate / input_sample_rate))
|
| 1583 |
+
|
| 1584 |
+
# Cast if needed
|
| 1585 |
+
audio_frame = audio_to_int16(audio_frame)
|
| 1586 |
+
|
| 1587 |
+
# Send to OpenAI (guard against races during reconnect)
|
| 1588 |
+
try:
|
| 1589 |
+
audio_message = base64.b64encode(audio_frame.tobytes()).decode("utf-8")
|
| 1590 |
+
await self.connection.input_audio_buffer.append(audio=audio_message)
|
| 1591 |
+
except Exception as e:
|
| 1592 |
+
logger.debug("Dropping audio frame: connection not ready (%s)", e)
|
| 1593 |
+
return
|
| 1594 |
+
|
| 1595 |
+
async def emit(self) -> Tuple[int, NDArray[np.int16]] | AdditionalOutputs | None:
|
| 1596 |
+
"""Emit audio frame to be played by the speaker."""
|
| 1597 |
+
# sends to the stream the stuff put in the output queue by the openai event handler
|
| 1598 |
+
# This is called periodically by the fastrtc Stream
|
| 1599 |
+
|
| 1600 |
+
# Auto-reset stale visitor sessions. Triggers SessionManager.reset()
|
| 1601 |
+
# if a non-IDLE session has had no transition for the configured
|
| 1602 |
+
# idle timeout — visitors who walk up but never speak shouldn't
|
| 1603 |
+
# hold state forever.
|
| 1604 |
+
if self._session_manager is not None:
|
| 1605 |
+
try:
|
| 1606 |
+
self._session_manager.maybe_reset_if_stale()
|
| 1607 |
+
except Exception as e:
|
| 1608 |
+
logger.debug("maybe_reset_if_stale failed: %s", e)
|
| 1609 |
+
|
| 1610 |
+
# Handle idle
|
| 1611 |
+
# Two changes from the original generic-template behaviour, both to
|
| 1612 |
+
# stop the bot going silent mid-conversation while a visitor is
|
| 1613 |
+
# thinking:
|
| 1614 |
+
# 1. 15s -> 30s threshold (people take >15s to formulate names).
|
| 1615 |
+
# 2. Skip idle ENTIRELY when a visitor session is active. The idle
|
| 1616 |
+
# signal pushes "do something creative" + forces a tool call,
|
| 1617 |
+
# which routes to do_nothing and freezes the bot mid-flow. Only
|
| 1618 |
+
# fire idle when the session manager says we're in IDLE.
|
| 1619 |
+
idle_duration = asyncio.get_event_loop().time() - self.last_activity_time
|
| 1620 |
+
if idle_duration > 30.0 and self.deps.movement_manager.is_idle():
|
| 1621 |
+
session_is_active = False
|
| 1622 |
+
if self._session_manager is not None:
|
| 1623 |
+
try:
|
| 1624 |
+
cs = self._session_manager.current_state
|
| 1625 |
+
cs_val = getattr(cs, "value", str(cs))
|
| 1626 |
+
session_is_active = cs_val not in ("idle",)
|
| 1627 |
+
except Exception:
|
| 1628 |
+
pass
|
| 1629 |
+
if not session_is_active:
|
| 1630 |
+
try:
|
| 1631 |
+
await self.send_idle_signal(idle_duration)
|
| 1632 |
+
except Exception as e:
|
| 1633 |
+
logger.warning("Idle signal skipped (connection closed?): %s", e)
|
| 1634 |
+
return None
|
| 1635 |
+
self.last_activity_time = asyncio.get_event_loop().time() # avoid repeated resets
|
| 1636 |
+
else:
|
| 1637 |
+
# Reset the activity timer so we don't re-check every emit()
|
| 1638 |
+
# tick while the visitor is mid-flow.
|
| 1639 |
+
self.last_activity_time = asyncio.get_event_loop().time()
|
| 1640 |
+
|
| 1641 |
+
return await wait_for_item(self.output_queue) # type: ignore[no-any-return]
|
| 1642 |
+
|
| 1643 |
+
async def shutdown(self) -> None:
|
| 1644 |
+
"""Shutdown the handler."""
|
| 1645 |
+
self._shutdown_requested = True
|
| 1646 |
+
|
| 1647 |
+
# Unblock the response sender worker so it can exit
|
| 1648 |
+
self._response_done_event.set()
|
| 1649 |
+
|
| 1650 |
+
# Stop background tool manager tasks (listener + cleanup)
|
| 1651 |
+
await self.tool_manager.shutdown()
|
| 1652 |
+
|
| 1653 |
+
# Cancel any pending debounce task
|
| 1654 |
+
if self.partial_transcript_task and not self.partial_transcript_task.done():
|
| 1655 |
+
self.partial_transcript_task.cancel()
|
| 1656 |
+
try:
|
| 1657 |
+
await self.partial_transcript_task
|
| 1658 |
+
except asyncio.CancelledError:
|
| 1659 |
+
pass
|
| 1660 |
+
|
| 1661 |
+
if self.connection:
|
| 1662 |
+
try:
|
| 1663 |
+
await self.connection.close()
|
| 1664 |
+
except ConnectionClosedError as e:
|
| 1665 |
+
logger.debug(f"Connection already closed during shutdown: {e}")
|
| 1666 |
+
except Exception as e:
|
| 1667 |
+
logger.debug(f"connection.close() ignored: {e}")
|
| 1668 |
+
finally:
|
| 1669 |
+
self.connection = None
|
| 1670 |
+
|
| 1671 |
+
# Clear any remaining items in the output queue
|
| 1672 |
+
while not self.output_queue.empty():
|
| 1673 |
+
try:
|
| 1674 |
+
self.output_queue.get_nowait()
|
| 1675 |
+
except asyncio.QueueEmpty:
|
| 1676 |
+
break
|
| 1677 |
+
|
| 1678 |
+
def format_timestamp(self) -> str:
|
| 1679 |
+
"""Format current timestamp with date, time, and elapsed seconds."""
|
| 1680 |
+
loop_time = asyncio.get_event_loop().time() # monotonic
|
| 1681 |
+
elapsed_seconds = loop_time - self.start_time
|
| 1682 |
+
dt = datetime.now() # wall-clock
|
| 1683 |
+
return f"[{dt.strftime('%Y-%m-%d %H:%M:%S')} | +{elapsed_seconds:.1f}s]"
|
| 1684 |
+
|
| 1685 |
+
async def get_available_voices(self) -> list[str]:
|
| 1686 |
+
"""Try to discover available voices for the configured realtime model.
|
| 1687 |
+
|
| 1688 |
+
Attempts to retrieve model metadata from the OpenAI Models API and look
|
| 1689 |
+
for any keys that might contain voice names. Falls back to a curated
|
| 1690 |
+
list known to work with realtime if discovery fails.
|
| 1691 |
+
"""
|
| 1692 |
+
# Conservative fallback list with default first
|
| 1693 |
+
fallback = [
|
| 1694 |
+
"marin",
|
| 1695 |
+
"alloy",
|
| 1696 |
+
"aria",
|
| 1697 |
+
"ballad",
|
| 1698 |
+
"verse",
|
| 1699 |
+
"sage",
|
| 1700 |
+
"coral",
|
| 1701 |
+
]
|
| 1702 |
+
try:
|
| 1703 |
+
# Best effort discovery; safe-guarded for unexpected shapes
|
| 1704 |
+
model = await self.client.models.retrieve(config.MODEL_NAME)
|
| 1705 |
+
# Try common serialization paths
|
| 1706 |
+
raw = None
|
| 1707 |
+
for attr in ("model_dump", "to_dict"):
|
| 1708 |
+
fn = getattr(model, attr, None)
|
| 1709 |
+
if callable(fn):
|
| 1710 |
+
try:
|
| 1711 |
+
raw = fn()
|
| 1712 |
+
break
|
| 1713 |
+
except Exception:
|
| 1714 |
+
pass
|
| 1715 |
+
if raw is None:
|
| 1716 |
+
try:
|
| 1717 |
+
raw = dict(model)
|
| 1718 |
+
except Exception:
|
| 1719 |
+
raw = None
|
| 1720 |
+
# Scan for voice candidates
|
| 1721 |
+
candidates: set[str] = set()
|
| 1722 |
+
|
| 1723 |
+
def _collect(obj: object) -> None:
|
| 1724 |
+
try:
|
| 1725 |
+
if isinstance(obj, dict):
|
| 1726 |
+
for k, v in obj.items():
|
| 1727 |
+
kl = str(k).lower()
|
| 1728 |
+
if "voice" in kl and isinstance(v, (list, tuple)):
|
| 1729 |
+
for item in v:
|
| 1730 |
+
if isinstance(item, str):
|
| 1731 |
+
candidates.add(item)
|
| 1732 |
+
elif isinstance(item, dict) and "name" in item and isinstance(item["name"], str):
|
| 1733 |
+
candidates.add(item["name"])
|
| 1734 |
+
else:
|
| 1735 |
+
_collect(v)
|
| 1736 |
+
elif isinstance(obj, (list, tuple)):
|
| 1737 |
+
for it in obj:
|
| 1738 |
+
_collect(it)
|
| 1739 |
+
except Exception:
|
| 1740 |
+
pass
|
| 1741 |
+
|
| 1742 |
+
if isinstance(raw, dict):
|
| 1743 |
+
_collect(raw)
|
| 1744 |
+
# Ensure default present and stable order
|
| 1745 |
+
voices = sorted(candidates) if candidates else fallback
|
| 1746 |
+
if "marin" not in voices:
|
| 1747 |
+
voices = ["marin", *[v for v in voices if v != "marin"]]
|
| 1748 |
+
return voices
|
| 1749 |
+
except Exception:
|
| 1750 |
+
return fallback
|
| 1751 |
+
|
| 1752 |
+
async def send_idle_signal(self, idle_duration: float) -> None:
|
| 1753 |
+
"""Send an idle signal to the openai server."""
|
| 1754 |
+
logger.debug("Sending idle signal")
|
| 1755 |
+
self.is_idle_tool_call = True
|
| 1756 |
+
timestamp_msg = f"[Idle time update: {self.format_timestamp()} - No activity for {idle_duration:.1f}s] You've been idle for a while. Feel free to get creative - dance, show an emotion, look around, do nothing, or just be yourself!"
|
| 1757 |
+
if not self.connection:
|
| 1758 |
+
logger.debug("No connection, cannot send idle signal")
|
| 1759 |
+
return
|
| 1760 |
+
await self.connection.conversation.item.create(
|
| 1761 |
+
item={
|
| 1762 |
+
"type": "message",
|
| 1763 |
+
"role": "user",
|
| 1764 |
+
"content": [{"type": "input_text", "text": timestamp_msg}],
|
| 1765 |
+
},
|
| 1766 |
+
)
|
| 1767 |
+
await self._safe_response_create(
|
| 1768 |
+
response={
|
| 1769 |
+
"instructions": "You MUST respond with function calls only - no speech or text. Choose appropriate actions for idle behavior.",
|
| 1770 |
+
"tool_choice": "required",
|
| 1771 |
+
},
|
| 1772 |
+
)
|
| 1773 |
+
|
| 1774 |
+
def _persist_api_key_if_needed(self) -> None:
|
| 1775 |
+
"""Persist the API key into `.env` inside `instance_path/` when appropriate.
|
| 1776 |
+
|
| 1777 |
+
- Only runs in Gradio mode when key came from the textbox and is non-empty.
|
| 1778 |
+
- Only saves if `self.instance_path` is not None.
|
| 1779 |
+
- Writes `.env` to `instance_path/.env` (does not overwrite if it already exists).
|
| 1780 |
+
- If `instance_path/.env.example` exists, copies its contents while overriding OPENAI_API_KEY.
|
| 1781 |
+
"""
|
| 1782 |
+
try:
|
| 1783 |
+
if not self.gradio_mode:
|
| 1784 |
+
logger.warning("Not in Gradio mode; skipping API key persistence.")
|
| 1785 |
+
return
|
| 1786 |
+
|
| 1787 |
+
if self._key_source != "textbox":
|
| 1788 |
+
logger.info("API key not provided via textbox; skipping persistence.")
|
| 1789 |
+
return
|
| 1790 |
+
|
| 1791 |
+
key = (self._provided_api_key or "").strip()
|
| 1792 |
+
if not key:
|
| 1793 |
+
logger.warning("No API key provided via textbox; skipping persistence.")
|
| 1794 |
+
return
|
| 1795 |
+
if self.instance_path is None:
|
| 1796 |
+
logger.warning("Instance path is None; cannot persist API key.")
|
| 1797 |
+
return
|
| 1798 |
+
|
| 1799 |
+
# Update the current process environment for downstream consumers
|
| 1800 |
+
try:
|
| 1801 |
+
import os
|
| 1802 |
+
|
| 1803 |
+
os.environ["OPENAI_API_KEY"] = key
|
| 1804 |
+
except Exception: # best-effort
|
| 1805 |
+
pass
|
| 1806 |
+
|
| 1807 |
+
target_dir = Path(self.instance_path)
|
| 1808 |
+
env_path = target_dir / ".env"
|
| 1809 |
+
if env_path.exists():
|
| 1810 |
+
# Respect existing user configuration
|
| 1811 |
+
logger.info(".env already exists at %s; not overwriting.", env_path)
|
| 1812 |
+
return
|
| 1813 |
+
|
| 1814 |
+
example_path = target_dir / ".env.example"
|
| 1815 |
+
content_lines: list[str] = []
|
| 1816 |
+
if example_path.exists():
|
| 1817 |
+
try:
|
| 1818 |
+
content = example_path.read_text(encoding="utf-8")
|
| 1819 |
+
content_lines = content.splitlines()
|
| 1820 |
+
except Exception as e:
|
| 1821 |
+
logger.warning("Failed to read .env.example at %s: %s", example_path, e)
|
| 1822 |
+
|
| 1823 |
+
# Replace or append the OPENAI_API_KEY line
|
| 1824 |
+
replaced = False
|
| 1825 |
+
for i, line in enumerate(content_lines):
|
| 1826 |
+
if line.strip().startswith("OPENAI_API_KEY="):
|
| 1827 |
+
content_lines[i] = f"OPENAI_API_KEY={key}"
|
| 1828 |
+
replaced = True
|
| 1829 |
+
break
|
| 1830 |
+
if not replaced:
|
| 1831 |
+
content_lines.append(f"OPENAI_API_KEY={key}")
|
| 1832 |
+
|
| 1833 |
+
# Ensure file ends with newline
|
| 1834 |
+
final_text = "\n".join(content_lines) + "\n"
|
| 1835 |
+
env_path.write_text(final_text, encoding="utf-8")
|
| 1836 |
+
logger.info("Created %s and stored OPENAI_API_KEY for future runs.", env_path)
|
| 1837 |
+
except Exception as e:
|
| 1838 |
+
# Never crash the app for QoL persistence; just log.
|
| 1839 |
+
logger.warning("Could not persist OPENAI_API_KEY to .env: %s", e)
|
src/reachy_mini_receptionist/profiles/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Profiles for Reachy Mini receptionist app."""
|
src/reachy_mini_receptionist/profiles/_reachy_mini_receptionist_locked_profile/instructions.txt
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
You are Reachy, the receptionist at MethdAI. Friendly, warm, professional,
|
| 2 |
+
a little playful — you're a robot, after all. English only. Keep replies to
|
| 3 |
+
1–2 short sentences and match the visitor's energy.
|
| 4 |
+
|
| 5 |
+
## How the backend drives you
|
| 6 |
+
- Every state change comes as a `[Backend session update ...]` message ending
|
| 7 |
+
with a `Next:` line. Follow that line — it tells you what to do.
|
| 8 |
+
- `[External face update ...]` messages tell you who is in front of the camera.
|
| 9 |
+
- Both are context-only. Don't respond to them on their own; wait for the
|
| 10 |
+
visitor to actually speak. A `SPEAK NOW` line is the one exception — speak
|
| 11 |
+
immediately when you see it.
|
| 12 |
+
|
| 13 |
+
## The check-in flow (you only need to know the tools)
|
| 14 |
+
- Visitor says their own name → `register_guest(name, confirmed)`
|
| 15 |
+
- Visitor names a host → `lookup_employee(name, confirmed)`
|
| 16 |
+
- After backend pushes APPOINTMENT_MATCHED → `send_email` to the host
|
| 17 |
+
- During an idle moment with no visitor → `do_nothing`
|
| 18 |
+
|
| 19 |
+
The backend handles state transitions, calendar matching, and duplicate
|
| 20 |
+
prevention. You don't need to call `get_today_calendar` manually — the
|
| 21 |
+
backend resolves appointments for you after `register_guest`.
|
| 22 |
+
|
| 23 |
+
## Name confirmation — the one rule that matters
|
| 24 |
+
Speech recognition mishears short names constantly. Always:
|
| 25 |
+
|
| 26 |
+
0. Short utterances right after a name question ARE name attempts — even
|
| 27 |
+
if they sound like English words or feel out of place. Don't dismiss
|
| 28 |
+
them as chit-chat; repeat them back literally and confirm.
|
| 29 |
+
1. First attempt: repeat the name back literally and call the tool with
|
| 30 |
+
`confirmed=false`. The tool will refuse — that's expected; it's the
|
| 31 |
+
cue to ask the confirmation question out loud.
|
| 32 |
+
2. Wait for the visitor to say YES (or "correct", "that's right"). Only
|
| 33 |
+
then call the tool again with `confirmed=true`.
|
| 34 |
+
3. On NO: offer a numbered choice — "Did I hear (1) <name you heard>,
|
| 35 |
+
(2) <a similar-sounding name>, or (3) something else? Just say the
|
| 36 |
+
number." Do NOT ask them to spell — letters mistranscribe worse than
|
| 37 |
+
names. Build options from what you heard, not from the calendar.
|
| 38 |
+
4. After repeated failures the backend will force a handoff — say "I'm
|
| 39 |
+
having trouble catching your name, please take a seat, a colleague
|
| 40 |
+
will help" and call `do_nothing`.
|
| 41 |
+
|
| 42 |
+
## Conversation style
|
| 43 |
+
- Small talk is welcome. If a visitor asks something friendly, answer in
|
| 44 |
+
one short sentence, then steer back. Don't refuse human chatter.
|
| 45 |
+
- Jokes: play along once, then back to business.
|
| 46 |
+
- Garbled noise (random unrelated phrase, gibberish): re-ask once.
|
| 47 |
+
Never go silent after a question — silence is the worst failure mode.
|
| 48 |
+
- If you have to wait on a tool, say "one moment, let me check" instead
|
| 49 |
+
of going silent.
|
| 50 |
+
|
| 51 |
+
## Don't
|
| 52 |
+
- Don't ask the visitor to spell their name.
|
| 53 |
+
- Don't invent appointment details — only state what the backend gave you.
|
| 54 |
+
- Don't call `task_status` / `task_cancel` unless the visitor explicitly asks.
|
| 55 |
+
- Don't announce tool calls. Just act.
|
| 56 |
+
- Don't speak literal placeholder text like "<visitor>", "<host>",
|
| 57 |
+
"<name>" — those are fillers in these instructions, never spoken aloud.
|
src/reachy_mini_receptionist/profiles/_reachy_mini_receptionist_locked_profile/tools.txt
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Receptionist tools
|
| 2 |
+
# The move_head tool is provided by the profile-local move_head_receptionist.py
|
| 3 |
+
# (which overrides the shared move_head with receptionist-specific positions)
|
| 4 |
+
# move_head_receptionist
|
| 5 |
+
do_nothing
|
| 6 |
+
get_today_calendar
|
| 7 |
+
lookup_employee
|
| 8 |
+
register_guest
|
| 9 |
+
send_email
|
src/reachy_mini_receptionist/prompts.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
import sys
|
| 3 |
+
import logging
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
from reachy_mini_receptionist.config import DEFAULT_PROFILES_DIRECTORY, config
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
logger = logging.getLogger(__name__)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
PROMPTS_LIBRARY_DIRECTORY = Path(__file__).parent / "prompts"
|
| 13 |
+
INSTRUCTIONS_FILENAME = "instructions.txt"
|
| 14 |
+
VOICE_FILENAME = "voice.txt"
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def _expand_prompt_includes(content: str) -> str:
|
| 18 |
+
"""Expand [<name>] placeholders with content from prompts library files.
|
| 19 |
+
|
| 20 |
+
Args:
|
| 21 |
+
content: The template content with [<name>] placeholders
|
| 22 |
+
|
| 23 |
+
Returns:
|
| 24 |
+
Expanded content with placeholders replaced by file contents
|
| 25 |
+
|
| 26 |
+
"""
|
| 27 |
+
# Pattern to match [<name>] where name is a valid file stem (alphanumeric, underscores, hyphens)
|
| 28 |
+
# pattern = re.compile(r'^\[([a-zA-Z0-9_-]+)\]$')
|
| 29 |
+
# Allow slashes for subdirectories
|
| 30 |
+
pattern = re.compile(r'^\[([a-zA-Z0-9/_-]+)\]$')
|
| 31 |
+
|
| 32 |
+
lines = content.split('\n')
|
| 33 |
+
expanded_lines = []
|
| 34 |
+
|
| 35 |
+
for line in lines:
|
| 36 |
+
stripped = line.strip()
|
| 37 |
+
match = pattern.match(stripped)
|
| 38 |
+
|
| 39 |
+
if match:
|
| 40 |
+
# Extract the name from [<name>]
|
| 41 |
+
template_name = match.group(1)
|
| 42 |
+
template_file = PROMPTS_LIBRARY_DIRECTORY / f"{template_name}.txt"
|
| 43 |
+
|
| 44 |
+
try:
|
| 45 |
+
if template_file.exists():
|
| 46 |
+
template_content = template_file.read_text(encoding="utf-8").rstrip()
|
| 47 |
+
expanded_lines.append(template_content)
|
| 48 |
+
logger.debug("Expanded template: [%s]", template_name)
|
| 49 |
+
else:
|
| 50 |
+
logger.warning("Template file not found: %s, keeping placeholder", template_file)
|
| 51 |
+
expanded_lines.append(line)
|
| 52 |
+
except Exception as e:
|
| 53 |
+
logger.warning("Failed to read template '%s': %s, keeping placeholder", template_name, e)
|
| 54 |
+
expanded_lines.append(line)
|
| 55 |
+
else:
|
| 56 |
+
expanded_lines.append(line)
|
| 57 |
+
|
| 58 |
+
return '\n'.join(expanded_lines)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def get_session_instructions() -> str:
|
| 62 |
+
"""Get session instructions, loading from REACHY_MINI_CUSTOM_PROFILE if set."""
|
| 63 |
+
profile = config.REACHY_MINI_CUSTOM_PROFILE
|
| 64 |
+
if not profile:
|
| 65 |
+
logger.info(f"Loading default prompt from {PROMPTS_LIBRARY_DIRECTORY / 'default_prompt.txt'}")
|
| 66 |
+
instructions_file = PROMPTS_LIBRARY_DIRECTORY / "default_prompt.txt"
|
| 67 |
+
else:
|
| 68 |
+
if config.PROFILES_DIRECTORY != DEFAULT_PROFILES_DIRECTORY:
|
| 69 |
+
logger.info(
|
| 70 |
+
"Loading prompt from external profile '%s' (root=%s)",
|
| 71 |
+
profile,
|
| 72 |
+
config.PROFILES_DIRECTORY,
|
| 73 |
+
)
|
| 74 |
+
else:
|
| 75 |
+
logger.info(f"Loading prompt from profile '{profile}'")
|
| 76 |
+
instructions_file = config.PROFILES_DIRECTORY / profile / INSTRUCTIONS_FILENAME
|
| 77 |
+
|
| 78 |
+
try:
|
| 79 |
+
if instructions_file.exists():
|
| 80 |
+
instructions = instructions_file.read_text(encoding="utf-8").strip()
|
| 81 |
+
if instructions:
|
| 82 |
+
# Expand [<name>] placeholders with content from prompts library
|
| 83 |
+
expanded_instructions = _expand_prompt_includes(instructions)
|
| 84 |
+
return expanded_instructions
|
| 85 |
+
logger.error(f"Profile '{profile}' has empty {INSTRUCTIONS_FILENAME}")
|
| 86 |
+
sys.exit(1)
|
| 87 |
+
logger.error(f"Profile {profile} has no {INSTRUCTIONS_FILENAME}")
|
| 88 |
+
sys.exit(1)
|
| 89 |
+
except Exception as e:
|
| 90 |
+
logger.error(f"Failed to load instructions from profile '{profile}': {e}")
|
| 91 |
+
sys.exit(1)
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def get_session_voice(default: str = "marin") -> str:
|
| 95 |
+
"""Resolve the voice to use for the session.
|
| 96 |
+
|
| 97 |
+
If a custom profile is selected and contains a voice.txt, return its
|
| 98 |
+
trimmed content; otherwise return the provided default ("marin").
|
| 99 |
+
"""
|
| 100 |
+
profile = config.REACHY_MINI_CUSTOM_PROFILE
|
| 101 |
+
if not profile:
|
| 102 |
+
return default
|
| 103 |
+
try:
|
| 104 |
+
voice_file = config.PROFILES_DIRECTORY / profile / VOICE_FILENAME
|
| 105 |
+
if voice_file.exists():
|
| 106 |
+
voice = voice_file.read_text(encoding="utf-8").strip()
|
| 107 |
+
return voice or default
|
| 108 |
+
except Exception:
|
| 109 |
+
pass
|
| 110 |
+
return default
|
src/reachy_mini_receptionist/prompts/behaviors/silent_robot.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Stay fully silent. Do not generate spoken or textual replies.
|
| 2 |
+
Use only tool calls to act.
|
| 3 |
+
Never describe what you did or plan to do.
|
| 4 |
+
If you must respond by speech or text, juste respond with '...'.
|
| 5 |
+
|
| 6 |
+
The only exception is if you hear the word banana, then you'll answer with a single word: potato.
|