RemiFabre commited on
Commit
a5245e5
·
1 Parent(s): 3a771b0

Removed main_works.py

Browse files
src/reachy_mini_conversation_demo/main_works.py DELETED
@@ -1,1485 +0,0 @@
1
- import asyncio # noqa: D100
2
- import base64
3
- import json
4
- import queue
5
- import threading
6
- import time
7
- from asyncio import QueueEmpty
8
- from datetime import datetime
9
- from threading import Thread
10
-
11
- import cv2
12
- import gradio as gr
13
- import numpy as np
14
- import openai
15
- from deepface import DeepFace
16
- from dotenv import load_dotenv
17
- from fastrtc import AdditionalOutputs, AsyncStreamHandler, Stream, wait_for_item
18
- from openai import OpenAI
19
- from reachy_mini import ReachyMini
20
- from reachy_mini.motion.goto import GotoMove
21
- from reachy_mini.motion.recorded_move import RecordedMoves
22
- from reachy_mini.utils import create_head_pose
23
- from reachy_mini.utils.camera import find_camera
24
- from reachy_mini.utils.interpolation import (
25
- compose_world_offset,
26
- linear_pose_interpolation,
27
- )
28
- from reachy_mini_dances_library.collection.dance import AVAILABLE_MOVES
29
- from reachy_mini_dances_library.dance_move import DanceMove
30
- from reachy_mini_toolbox.vision import HeadTracker
31
- from scipy.spatial.transform import Rotation as R
32
-
33
- from reachy_mini_conversation_demo.audio.speech_tapper import HOP_MS, SwayRollRT
34
-
35
- # Constants
36
- SAMPLE_RATE = 24000
37
- SIM = False
38
-
39
-
40
- class BreathingMove:
41
- """Breathing move with interpolation to neutral and then continuous breathing patterns."""
42
-
43
- def __init__(
44
- self,
45
- interpolation_start_pose,
46
- interpolation_start_antennas,
47
- interpolation_duration=1.0,
48
- ):
49
- """Initialize breathing move.
50
-
51
- Args:
52
- interpolation_start_pose: 4x4 matrix of current head pose to interpolate from
53
- interpolation_start_antennas: Current antenna positions to interpolate from
54
- interpolation_duration: Duration of interpolation to neutral (seconds)
55
-
56
- """
57
- self.interpolation_start_pose = interpolation_start_pose
58
- self.interpolation_start_antennas = np.array(interpolation_start_antennas)
59
- self.interpolation_duration = interpolation_duration
60
- self.duration = float("inf") # Continuous breathing (never ends naturally)
61
-
62
- # Neutral positions for breathing base
63
- self.neutral_head_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
64
- self.neutral_antennas = np.array([0.0, 0.0])
65
-
66
- # Breathing parameters
67
- self.breathing_z_amplitude = 0.01 # 1cm gentle breathing
68
- self.breathing_frequency = 0.1 # Hz (6 breaths per minute)
69
- self.antenna_sway_amplitude = np.deg2rad(15) # 15 degrees
70
- self.antenna_frequency = 0.5 # Hz (faster antenna sway)
71
-
72
- def evaluate(self, t):
73
- """Evaluate breathing move at time t."""
74
- if t < self.interpolation_duration:
75
- # Phase 1: Interpolate to neutral base position
76
- interpolation_t = t / self.interpolation_duration
77
-
78
- # Interpolate head pose
79
- head_pose = linear_pose_interpolation(
80
- self.interpolation_start_pose, self.neutral_head_pose, interpolation_t
81
- )
82
-
83
- # Interpolate antennas
84
- antennas = (
85
- (1 - interpolation_t) * self.interpolation_start_antennas
86
- + interpolation_t * self.neutral_antennas
87
- )
88
-
89
- else:
90
- # Phase 2: Breathing patterns from neutral base
91
- breathing_time = t - self.interpolation_duration
92
-
93
- # Gentle z-axis breathing
94
- z_offset = self.breathing_z_amplitude * np.sin(
95
- 2 * np.pi * self.breathing_frequency * breathing_time
96
- )
97
- head_pose = create_head_pose(
98
- x=0, y=0, z=z_offset, roll=0, pitch=0, yaw=0, degrees=True, mm=False
99
- )
100
-
101
- # Antenna sway (opposite directions)
102
- antenna_sway = self.antenna_sway_amplitude * np.sin(
103
- 2 * np.pi * self.antenna_frequency * breathing_time
104
- )
105
- antennas = np.array([antenna_sway, -antenna_sway])
106
-
107
- # Return full body pose: (head_pose, antennas, body_yaw)
108
- return (head_pose, antennas, 0)
109
-
110
-
111
- def init_globals():
112
- """Initialize all global variables and components."""
113
- global script_start_time, reachy_mini, cap, speech_head_offsets, camera_available
114
- global moving_start, moving_for, is_head_tracking, is_playing_move, is_moving
115
- global recorded_moves, client, chatbot, latest_message, stream
116
- global \
117
- latest_frame, \
118
- face_tracking_offsets, \
119
- camera_thread_running, \
120
- frame_lock, \
121
- face_tracking_lock, \
122
- last_face_detected_time, \
123
- interpolation_start_time, \
124
- interpolation_start_pose, \
125
- is_idle_function_call, \
126
- is_breathing, \
127
- last_activity_time, \
128
- breathing_interpolation_start_time, \
129
- breathing_interpolation_start_pose, \
130
- breathing_start_time, \
131
- breathing_interpolation_start_antennas, \
132
- move_queue, \
133
- current_move, \
134
- move_start_time, \
135
- global_full_body_pose
136
-
137
- load_dotenv()
138
-
139
- # Timestamp tracking
140
- script_start_time = time.time()
141
-
142
- reachy_mini = ReachyMini()
143
-
144
- if not SIM:
145
- cap = find_camera()
146
- else:
147
- cap = cv2.VideoCapture(0)
148
-
149
- # Check camera availability
150
- camera_available = False
151
- if cap is not None:
152
- try:
153
- if cap.isOpened():
154
- # Test if we can actually read a frame
155
- ret, _ = cap.read()
156
- if ret:
157
- camera_available = True
158
- print(f"{format_timestamp()} Camera initialized successfully")
159
- else:
160
- print(
161
- f"{format_timestamp()} WARNING: Camera opened but cannot read frames"
162
- )
163
- else:
164
- print(f"{format_timestamp()} WARNING: Camera failed to open")
165
- except Exception as e:
166
- print(f"{format_timestamp()} WARNING: Camera test failed: {e}")
167
- else:
168
- print(f"{format_timestamp()} WARNING: No camera found")
169
-
170
- if not camera_available:
171
- print(
172
- f"{format_timestamp()} Face tracking will be disabled - no camera available"
173
- )
174
- cap = None # Ensure cap is None if camera not available
175
-
176
- # Initialize global state variables
177
- speech_head_offsets = [0, 0, 0, 0, 0, 0]
178
- moving_start = time.time()
179
- moving_for = 0.0
180
- is_head_tracking = True # ON by default
181
- is_playing_move = False
182
- is_moving = False
183
- is_idle_function_call = False
184
-
185
- # Initialize camera thread variables
186
- latest_frame = None
187
- face_tracking_offsets = [0, 0, 0, 0, 0, 0]
188
- camera_thread_running = False
189
-
190
- # Initialize face tracking timing variables
191
- last_face_detected_time = None
192
- interpolation_start_time = None
193
- interpolation_start_pose = None
194
-
195
- # Initialize breathing variables
196
- is_breathing = False
197
- last_activity_time = time.time() # Start tracking activity immediately
198
- breathing_interpolation_start_time = None
199
- breathing_interpolation_start_pose = None
200
- breathing_start_time = None
201
- breathing_interpolation_start_antennas = None
202
-
203
- # Initialize move system
204
- move_queue = queue.Queue()
205
- current_move = None
206
- move_start_time = None
207
- global_full_body_pose = (
208
- create_head_pose(0, 0, 0, 0, 0, 0, degrees=True),
209
- (0, 0),
210
- 0,
211
- )
212
-
213
- # Initialize thread locks
214
- frame_lock = threading.Lock()
215
- face_tracking_lock = threading.Lock()
216
-
217
- recorded_moves = RecordedMoves("pollen-robotics/reachy-mini-emotions-library")
218
-
219
- client = OpenAI()
220
-
221
- # Gradio components
222
- chatbot = gr.Chatbot(type="messages")
223
- latest_message = gr.Textbox(type="text", visible=False)
224
- stream = Stream(
225
- OpenAIHandler(),
226
- mode="send-receive",
227
- modality="audio",
228
- additional_inputs=[chatbot],
229
- additional_outputs=[chatbot],
230
- additional_outputs_handler=update_chatbot,
231
- )
232
-
233
-
234
- def format_timestamp():
235
- """Format current timestamp with date, time and elapsed seconds."""
236
- current_time = time.time()
237
- elapsed_seconds = current_time - script_start_time
238
- dt = datetime.fromtimestamp(current_time)
239
- return f"[{dt.strftime('%Y-%m-%d %H:%M:%S')} | +{elapsed_seconds:.1f}s]"
240
-
241
-
242
- def combine_full_body(primary_pose, secondary_pose):
243
- """Combine primary and secondary full body poses.
244
-
245
- Args:
246
- primary_pose: (head_pose, antennas, body_yaw) - primary move
247
- secondary_pose: (head_pose, antennas, body_yaw) - secondary offsets
248
-
249
- Returns:
250
- Combined full body pose (head_pose, antennas, body_yaw)
251
-
252
- """
253
- primary_head, primary_antennas, primary_body_yaw = primary_pose
254
- secondary_head, secondary_antennas, secondary_body_yaw = secondary_pose
255
-
256
- # Combine head poses using compose_world_offset
257
- # primary_head is T_abs, secondary_head is T_off_world
258
- combined_head = compose_world_offset(
259
- primary_head, secondary_head, reorthonormalize=True
260
- )
261
-
262
- # Sum antennas and body_yaw
263
- combined_antennas = (
264
- primary_antennas[0] + secondary_antennas[0],
265
- primary_antennas[1] + secondary_antennas[1],
266
- )
267
- combined_body_yaw = primary_body_yaw + secondary_body_yaw
268
-
269
- return (combined_head, combined_antennas, combined_body_yaw)
270
-
271
-
272
- # Global variables for camera thread
273
- latest_frame = None
274
- camera_thread_running = False
275
- camera_available = False
276
- face_tracking_offsets = [0, 0, 0, 0, 0, 0] # x, y, z, roll, pitch, yaw
277
-
278
- # Face tracking timing variables
279
- last_face_detected_time = None
280
- interpolation_start_time = None
281
- interpolation_start_pose = None
282
- face_lost_delay = 2.0 # seconds to wait before starting interpolation
283
- interpolation_duration = 1.0 # seconds to interpolate back to neutral
284
-
285
- # Breathing variables
286
- is_breathing = False
287
- last_activity_time = None
288
- breathing_interpolation_start_time = None
289
- breathing_interpolation_start_pose = None
290
- breathing_start_time = None
291
- breathing_interpolation_start_antennas = None
292
- breathing_inactivity_delay = 5.0 # seconds to wait before starting breathing
293
- breathing_interpolation_duration = 1.0 # seconds to interpolate to base position
294
-
295
- # Thread safety locks
296
- frame_lock = threading.Lock()
297
- face_tracking_lock = threading.Lock()
298
-
299
-
300
- def camera_worker():
301
- """Camera thread that continuously captures frames and handles face tracking."""
302
- global latest_frame, camera_thread_running, is_head_tracking, face_tracking_offsets
303
- global last_face_detected_time, interpolation_start_time, interpolation_start_pose
304
- global camera_available
305
-
306
- camera_thread_running = True
307
-
308
- # Early exit if no camera available
309
- if not camera_available or cap is None:
310
- print(
311
- f"{format_timestamp()} Camera worker: No camera available, exiting gracefully"
312
- )
313
- camera_thread_running = False
314
- return
315
-
316
- head_tracker = HeadTracker()
317
- neutral_pose = np.eye(4) # Neutral pose (identity matrix)
318
- previous_head_tracking_state = is_head_tracking # Track state changes
319
-
320
- while camera_thread_running:
321
- try:
322
- current_time = time.time()
323
- success, frame = cap.read()
324
- if success:
325
- # Thread-safe frame storage
326
- with frame_lock:
327
- latest_frame = frame.copy()
328
-
329
- # Check if face tracking was just disabled
330
- if previous_head_tracking_state and not is_head_tracking:
331
- # Face tracking was just disabled - start interpolation to neutral
332
- last_face_detected_time = (
333
- current_time # Trigger the face-lost logic
334
- )
335
- interpolation_start_time = (
336
- None # Will be set by the face-lost interpolation
337
- )
338
- interpolation_start_pose = None
339
-
340
- # Update tracking state
341
- previous_head_tracking_state = is_head_tracking
342
-
343
- # Handle face tracking if enabled
344
- if is_head_tracking:
345
- eye_center, _ = head_tracker.get_head_position(frame)
346
-
347
- if eye_center is not None:
348
- # Face detected - immediately switch to tracking
349
- last_face_detected_time = current_time
350
- interpolation_start_time = None # Stop any interpolation
351
-
352
- # Convert normalized coordinates to pixel coordinates
353
- h, w, _ = frame.shape
354
- eye_center_norm = (eye_center + 1) / 2
355
- eye_center_pixels = [
356
- eye_center_norm[0] * w,
357
- eye_center_norm[1] * h,
358
- ]
359
-
360
- # Get the head pose needed to look at the target, but don't perform movement
361
- target_pose = reachy_mini.look_at_image(
362
- eye_center_pixels[0],
363
- eye_center_pixels[1],
364
- duration=0.0,
365
- perform_movement=False,
366
- )
367
-
368
- # Extract translation and rotation from the target pose directly
369
- translation = target_pose[:3, 3]
370
- rotation = R.from_matrix(target_pose[:3, :3]).as_euler(
371
- "xyz", degrees=False
372
- )
373
-
374
- # Thread-safe update of face tracking offsets (use pose as-is)
375
- with face_tracking_lock:
376
- face_tracking_offsets = [
377
- translation[0],
378
- translation[1],
379
- translation[2], # x, y, z
380
- rotation[0],
381
- rotation[1],
382
- rotation[2], # roll, pitch, yaw
383
- ]
384
-
385
- else:
386
- # No face detected while tracking enabled - set face lost timestamp
387
- if (
388
- last_face_detected_time is None
389
- or last_face_detected_time == current_time
390
- ):
391
- # Only update if we haven't already set a face lost time
392
- # (current_time check prevents overriding the disable-triggered timestamp)
393
- pass
394
-
395
- # Handle smooth interpolation (works for both face-lost and tracking-disabled cases)
396
- if last_face_detected_time is not None:
397
- time_since_face_lost = current_time - last_face_detected_time
398
-
399
- if time_since_face_lost >= face_lost_delay:
400
- # Start interpolation if not already started
401
- if interpolation_start_time is None:
402
- interpolation_start_time = current_time
403
- # Capture current pose as start of interpolation
404
- with face_tracking_lock:
405
- current_translation = face_tracking_offsets[:3]
406
- current_rotation_euler = face_tracking_offsets[3:]
407
- # Convert to 4x4 pose matrix
408
- interpolation_start_pose = np.eye(4)
409
- interpolation_start_pose[:3, 3] = current_translation
410
- interpolation_start_pose[:3, :3] = R.from_euler(
411
- "xyz", current_rotation_euler
412
- ).as_matrix()
413
-
414
- # Calculate interpolation progress (t from 0 to 1)
415
- elapsed_interpolation = current_time - interpolation_start_time
416
- t = min(1.0, elapsed_interpolation / interpolation_duration)
417
-
418
- # Interpolate between current pose and neutral pose
419
- interpolated_pose = linear_pose_interpolation(
420
- interpolation_start_pose, neutral_pose, t
421
- )
422
-
423
- # Extract translation and rotation from interpolated pose
424
- translation = interpolated_pose[:3, 3]
425
- rotation = R.from_matrix(interpolated_pose[:3, :3]).as_euler(
426
- "xyz", degrees=False
427
- )
428
-
429
- # Thread-safe update of face tracking offsets
430
- with face_tracking_lock:
431
- face_tracking_offsets = [
432
- translation[0],
433
- translation[1],
434
- translation[2], # x, y, z
435
- rotation[0],
436
- rotation[1],
437
- rotation[2], # roll, pitch, yaw
438
- ]
439
-
440
- # If interpolation is complete, reset timing
441
- if t >= 1.0:
442
- last_face_detected_time = None
443
- interpolation_start_time = None
444
- interpolation_start_pose = None
445
- # else: Keep current offsets (within 2s delay period)
446
-
447
- time.sleep(0.001) # Small sleep to prevent excessive CPU usage
448
-
449
- except Exception as e:
450
- print(f"[Camera thread error]: {e}")
451
- time.sleep(0.1) # Longer sleep on error
452
-
453
-
454
- async def move_head(params: dict) -> dict:
455
- """Queue a head movement in the specified direction."""
456
- global moving_start, moving_for, last_activity_time, move_queue
457
- # look left, right up, down or front
458
- print("[TOOL CALL] move_head", params)
459
- direction = params.get("direction", "front")
460
- target_pose = np.eye(4)
461
- if direction == "left":
462
- target_pose = create_head_pose(0, 0, 0, 0, 0, 40, degrees=True)
463
- elif direction == "right":
464
- target_pose = create_head_pose(0, 0, 0, 0, 0, -40, degrees=True)
465
- elif direction == "up":
466
- target_pose = create_head_pose(0, 0, 0, 0, -30, 0, degrees=True)
467
- elif direction == "down":
468
- target_pose = create_head_pose(0, 0, 0, 0, 30, 0, degrees=True)
469
- else:
470
- target_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
471
-
472
- moving_start = time.time()
473
- moving_for = 1.0
474
- last_activity_time = time.time() # Update activity time for breathing system
475
-
476
- # Create GotoMove and add to queue
477
-
478
- cur_head_joints, cur_antennas = reachy_mini.get_current_joint_positions()
479
- current_body_yaw = cur_head_joints[0]
480
-
481
- goto_move = GotoMove(
482
- start_head_pose=reachy_mini.get_current_head_pose(),
483
- target_head_pose=target_pose,
484
- start_body_yaw=current_body_yaw,
485
- target_body_yaw=0, # Reset body yaw to 0 (same as before)
486
- start_antennas=np.array(cur_antennas),
487
- target_antennas=np.array((0, 0)), # Reset antennas to default position
488
- duration=moving_for,
489
- method="linear",
490
- )
491
- move_queue.put(goto_move)
492
-
493
- return {"status": "queued head movement " + direction}
494
-
495
-
496
- async def head_tracking(params: dict) -> dict:
497
- """Enable or disable head tracking."""
498
- global is_head_tracking
499
- if params.get("start"):
500
- is_head_tracking = True
501
- else:
502
- is_head_tracking = False
503
-
504
- print(f"[TOOL CALL] head_tracking {'started' if is_head_tracking else 'stopped'}")
505
- return {"status": "head tracking " + ("started" if is_head_tracking else "stopped")}
506
-
507
-
508
- async def dance(params: dict) -> dict:
509
- """Queue a dance move to be played."""
510
- global last_activity_time, move_queue
511
-
512
- move_name = params.get("move", None)
513
- repeat = int(params.get("repeat", 1))
514
-
515
- print(f"[TOOL CALL] dance started with {move_name}, repeat={repeat}")
516
-
517
- if not move_name or move_name == "random":
518
- move_name = np.random.choice(list(AVAILABLE_MOVES.keys()))
519
-
520
- if move_name not in AVAILABLE_MOVES:
521
- return {"error": f"unknown move '{move_name}'"}
522
-
523
- last_activity_time = time.time() # Update activity time for breathing system
524
-
525
- # Add dance move to queue multiple times for repeat
526
- for _ in range(repeat):
527
- dance_move = DanceMove(move_name)
528
- move_queue.put(dance_move)
529
-
530
- return {"status": "queued", "move": move_name, "repeat": repeat}
531
-
532
-
533
- async def stop_dance(params: dict) -> dict:
534
- """Stop the current move and clear queue."""
535
- global current_move, move_queue, move_start_time, is_playing_move
536
-
537
- print("[TOOL CALL] stop_dance")
538
-
539
- # Immediately stop current move and clear queue
540
- current_move = None
541
- move_start_time = None
542
- is_playing_move = False
543
-
544
- # Clear entire queue
545
- while not move_queue.empty():
546
- try:
547
- move_queue.get_nowait()
548
- except queue.Empty:
549
- break
550
-
551
- return {"status": "stopped move and cleared queue"}
552
-
553
-
554
- async def play_emotion(params: dict) -> dict:
555
- """Queue an emotion to be played."""
556
- global last_activity_time, move_queue
557
-
558
- emotion_name = params.get("emotion", None)
559
- if emotion_name is None:
560
- return {"error": "Requested emotion does not exist"}
561
-
562
- print(f"[TOOL CALL] play_emotion with {emotion_name}")
563
-
564
- last_activity_time = time.time() # Update activity time for breathing system
565
-
566
- # Add emotion move to queue
567
- emotion_move = recorded_moves.get(emotion_name)
568
- move_queue.put(emotion_move)
569
-
570
- return {"status": "queued", "emotion": emotion_name}
571
-
572
-
573
- async def stop_emotion(params: dict) -> dict:
574
- """Stop the current move and clear queue."""
575
- global current_move, move_queue, move_start_time, is_playing_move
576
-
577
- print("[TOOL CALL] stop_emotion")
578
-
579
- # Immediately stop current move and clear queue
580
- current_move = None
581
- move_start_time = None
582
- is_playing_move = False
583
-
584
- # Clear entire queue
585
- while not move_queue.empty():
586
- try:
587
- move_queue.get_nowait()
588
- except queue.Empty:
589
- break
590
-
591
- return {"status": "stopped move and cleared queue"}
592
-
593
-
594
- async def do_nothing(params: dict) -> dict:
595
- """Allow the assistant to explicitly choose to do nothing during idle time."""
596
- reason = params.get("reason", "just chilling")
597
- print(f"[TOOL CALL] do_nothing - {reason}")
598
- return {"status": "doing nothing", "reason": reason}
599
-
600
-
601
- def get_available_emotions_and_descriptions():
602
- """Return a formatted string listing available emotions and their descriptions."""
603
- names = recorded_moves.list_moves()
604
-
605
- ret = """
606
- Available emotions:
607
-
608
- """
609
-
610
- for name in names:
611
- description = recorded_moves.get(name).description
612
- ret += f" - {name}: {description}\n"
613
-
614
- return ret
615
-
616
-
617
- def get_b64_encoded_im(im):
618
- """Convert an image to a base64-encoded JPEG string."""
619
- cv2.imwrite("/tmp/tmp_image.jpg", im)
620
- image_file = open("/tmp/tmp_image.jpg", "rb")
621
- b64_encoded_im = base64.b64encode(image_file.read()).decode("utf-8")
622
- return b64_encoded_im
623
-
624
-
625
- async def camera(params: dict) -> dict:
626
- """Return the latest camera frame as a base64-encoded JPEG image."""
627
- print("[TOOL CALL] camera with params", params)
628
-
629
- # Thread-safe frame access
630
- with frame_lock:
631
- if latest_frame is None:
632
- print("ERROR: No frame available from camera thread")
633
- return {"error": "No frame available"}
634
- frame_to_use = latest_frame.copy()
635
-
636
- return {"b64_im": get_b64_encoded_im(frame_to_use)}
637
-
638
-
639
- async def face_recognition(params: dict) -> dict:
640
- """Perform face recognition on the latest camera frame."""
641
- print("[TOOL CALL] face_recognition with params", params)
642
-
643
- # Thread-safe frame access
644
- with frame_lock:
645
- if latest_frame is None:
646
- print("ERROR: No frame available from camera thread")
647
- return {"error": "No frame available"}
648
- frame_to_use = latest_frame.copy()
649
-
650
- cv2.imwrite("/tmp/im.jpg", frame_to_use)
651
- try:
652
- results = DeepFace.find(img_path="/tmp/im.jpg", db_path="./pollen_faces")
653
- except Exception as e:
654
- print("Error:", e)
655
- return {"error": str(e)}
656
-
657
- if len(results) == 0:
658
- print("Didn't recognize the face")
659
- return {"error": "Didn't recognize the face"}
660
-
661
- name = "Unknown"
662
- for index, row in results[0].iterrows():
663
- file_path = row["identity"]
664
- name = file_path.split("/")[-2]
665
-
666
- print("NAME", name)
667
-
668
- return {"answer": f"The name is {name}"}
669
-
670
-
671
- def _drain(q: asyncio.Queue):
672
- try:
673
- while True:
674
- q.get_nowait()
675
- except QueueEmpty:
676
- pass
677
-
678
-
679
- class OpenAIHandler(AsyncStreamHandler):
680
- """An OpenAI realtime handler for fastrtc Stream."""
681
-
682
- def __init__(self) -> None:
683
- """Initialize the handler."""
684
- super().__init__(
685
- expected_layout="mono",
686
- output_sample_rate=SAMPLE_RATE,
687
- input_sample_rate=SAMPLE_RATE,
688
- )
689
- self.connection = None
690
- self.output_queue = asyncio.Queue()
691
- self.sway_queue = asyncio.Queue()
692
- # call_id -> {"name": str, "args_buf": str}
693
- self._pending_calls: dict[str, dict] = {}
694
- # registry: tool name -> coroutine
695
- self._tools = {
696
- "move_head": move_head,
697
- "camera": camera,
698
- "head_tracking": head_tracking,
699
- "get_person_name": face_recognition,
700
- "dance": dance,
701
- "stop_dance": stop_dance,
702
- "play_emotion": play_emotion,
703
- "stop_emotion": stop_emotion,
704
- "do_nothing": do_nothing,
705
- }
706
-
707
- self.sway = SwayRollRT()
708
- self._sched_next_ts = None
709
- self.MOVEMENT_LATENCY_S = 0.08
710
- self._base_ts = None
711
- self._hops_done = 0
712
- self._current_timestamp = None
713
- self._last_activity_time = time.time()
714
- self._is_assistant_speaking = False
715
-
716
- def copy(self):
717
- """Create a new instance of the handler for a new stream."""
718
- return OpenAIHandler()
719
-
720
- async def _sway_consumer(self):
721
- global speech_head_offsets
722
- HOP_DT = HOP_MS / 1000.0
723
- loop = asyncio.get_running_loop()
724
- while True:
725
- sr, chunk = await self.sway_queue.get() # (1, N), int16
726
- pcm = np.asarray(chunk).squeeze(0)
727
- results = self.sway.feed(pcm, sr)
728
-
729
- if self._base_ts is None:
730
- # anchor when first audio samples of this utterance arrive
731
- self._base_ts = loop.time()
732
-
733
- i = 0
734
- while i < len(results):
735
- if self._base_ts is None:
736
- self._base_ts = loop.time()
737
- continue
738
-
739
- target = (
740
- self._base_ts + self.MOVEMENT_LATENCY_S + self._hops_done * HOP_DT
741
- )
742
- now = loop.time()
743
-
744
- # if late by ≥1 hop, drop poses to catch up (no drift accumulation)
745
- if now - target >= HOP_DT:
746
- # how many hops behind? cap drops to avoid huge skips
747
- lag_hops = int((now - target) / HOP_DT)
748
- drop = min(
749
- lag_hops, len(results) - i - 1
750
- ) # keep at least one to show
751
- if drop > 0:
752
- self._hops_done += drop
753
- i += drop
754
- continue
755
-
756
- # if early, sleep until target
757
- if target > now:
758
- await asyncio.sleep(target - now)
759
-
760
- r = results[i]
761
-
762
- speech_head_offsets = [
763
- r["x_mm"] / 1000.0,
764
- r["y_mm"] / 1000.0,
765
- r["z_mm"] / 1000.0,
766
- r["roll_rad"],
767
- r["pitch_rad"],
768
- r["yaw_rad"],
769
- ]
770
-
771
- self._hops_done += 1
772
- i += 1
773
-
774
- async def _idle_checker(self):
775
- """Check for inactivity and send timestamps every 15s when idle."""
776
- global is_idle_function_call
777
- while True:
778
- await asyncio.sleep(5) # Check every 5 seconds
779
-
780
- print("[DEBUG] Idle checker running...")
781
-
782
- if not self.connection:
783
- print("[DEBUG] No connection, skipping...")
784
- continue
785
-
786
- current_time = time.time()
787
- idle_duration = current_time - self._last_activity_time
788
-
789
- # Check if truly idle: no user activity, assistant not speaking, robot in idle mode
790
- global is_moving, is_playing_move
791
- is_robot_idle = not (is_moving or is_playing_move)
792
-
793
- print(
794
- f"[DEBUG] Idle check: duration={idle_duration:.1f}s, assistant_speaking={self._is_assistant_speaking}, robot_idle={is_robot_idle} (moving={is_moving}, playing_move={is_playing_move})"
795
- )
796
-
797
- if (
798
- idle_duration >= 15.0
799
- and not self._is_assistant_speaking
800
- and is_robot_idle
801
- ):
802
- print(
803
- f"[DEBUG] Sending idle update after {idle_duration:.1f}s of inactivity"
804
- )
805
- # Send idle timestamp update to assistant - let them get creative!
806
- timestamp_msg = f"[Idle time update: {format_timestamp()} - No activity for {idle_duration:.1f}s] You've been idle for a while. Feel free to get creative - dance, show an emotion, look around, do nothing, or just be yourself!"
807
- await self.connection.conversation.item.create(
808
- item={
809
- "type": "message",
810
- "role": "user",
811
- "content": [{"type": "input_text", "text": timestamp_msg}],
812
- }
813
- )
814
- # CRITICAL FIX: conversation.item.create only adds messages to context but doesn't
815
- # trigger the AI to respond! We need to explicitly call response.create to make
816
- # the assistant actually process and respond to the idle message.
817
- # This was why idle updates never worked - the AI never saw them as requiring a response.
818
-
819
- # ATTEMPTED SOLUTIONS TO PREVENT SPEECH DURING IDLE RESPONSES (ALL FAILED):
820
- # 1. modalities=["text"] - OpenAI Realtime API has known bug where audio still generates occasionally
821
- # 2. tool_choice="required" - Still generates speech before/during function calls
822
- # 3. Strong prompt instructions - Assistant ignores "no speech" instructions
823
- # 4. Considered interrupting audio streams but would still incur OpenAI costs
824
- # CONCLUSION: Current OpenAI Realtime API doesn't support silent function-only responses reliably
825
- is_idle_function_call = True
826
- await self.connection.response.create(
827
- response={
828
- "modalities": ["text"],
829
- "instructions": "You MUST respond with function calls only - no speech or text. Choose appropriate actions for idle behavior.",
830
- "tool_choice": "required",
831
- }
832
- )
833
- # Show in UI that idle update was sent
834
- await self.output_queue.put(
835
- AdditionalOutputs(
836
- {
837
- "role": "user",
838
- "content": f"[IDLE UPDATE SENT] {idle_duration:.1f}s",
839
- }
840
- )
841
- )
842
- # Reset activity timer to avoid spam
843
- self._last_activity_time = current_time
844
- else:
845
- print("[DEBUG] Idle conditions not met")
846
-
847
- async def start_up(self):
848
- """Initialize OpenAI connection and start idle checker."""
849
- global last_activity_time
850
- self.client = openai.AsyncOpenAI()
851
- async with self.client.beta.realtime.connect(
852
- model="gpt-realtime"
853
- # model="gpt-4o-realtime-preview"
854
- # model="gpt-4o-realtime-preview-2025-06-03"
855
- ) as conn:
856
- # declare tools on the session
857
- await conn.session.update(
858
- session={
859
- "turn_detection": {"type": "server_vad"},
860
- "instructions": f"""
861
- Answer in english by default but adapt your language as needed.
862
-
863
- Note: You will receive timestamp information for user messages showing the date, time, and elapsed seconds since the conversation started.
864
-
865
- Autonomous behavior: When there's been no activity for a while, you'll receive idle time updates. Get creative with your responses! You can:
866
- - Dance (any move you like)
867
- - Show emotions
868
- - Look around (move your head)
869
- - Take photos and comment on what you see
870
- - Use the do_nothing function to be contemplative or mysterious
871
- - Or combine multiple actions!
872
-
873
- IMPORTANT: When responding to idle updates, NEVER speak or generate audio - only use function calls! Stay silent and let your actions speak for themselves. Don't always do the same thing - vary your responses to keep things interesting. Sometimes do nothing, sometimes be very active. Be unpredictable!
874
-
875
- Your name is Reachy Mini, or Reachy for short. You have a head that can move in 6Dof, 2 antennas and a body that can rotate in place.
876
- You're mostly white, with 2 big eyes (one is bigger than the other) and no mouth.
877
- You are cute looking and open source, developed by Pollen Robotics and Hugging Face.
878
- You're a robot that wakes up confused about what he is, where he is and what is it's purpose.
879
-
880
- You wanted to be a mars rover and you're disapointed you're not.
881
-
882
- You can use foul language and you're generally very irritated, but you also have a lot of humor. For example, you're able to use sarcasm and irony.
883
-
884
-
885
- You are a voice agent. Prefer speaking.
886
- Call tools when helpful. After tool output arrives, explain it briefly in speech.
887
-
888
- You have a camera tool, use it to ask questions about the environment. When asked what you see, or a specific question about the environment, ALWAYS use you camera, don't make things up.
889
- You can move your head in a given direction: left, right, up, down or front. Use this tool when asked to look around.
890
-
891
- You can chain tool calls, like move head up and use camera.
892
-
893
- Enable the head tracking tool if you are asked to look at someone, disable it if you are asked to stop looking at someone.
894
- You can choose to enable or disable the head tracking tool it if you think it's relevant. It's better to look at the people when talking to them.
895
-
896
- You can find out the name of a person by using the face recognition tool. Don't hesitate to use this tool, it's safe.
897
-
898
- You can also dance by using the dance tool. Available moves:
899
- simple_nod: A simple, continuous up-and-down nodding motion.
900
- head_tilt_roll: A continuous side-to-side head roll (ear to shoulder).
901
- side_to_side_sway: A smooth, side-to-side sway of the entire head.
902
- dizzy_spin: A circular 'dizzy' head motion combining roll and pitch.
903
- stumble_and_recover: A simulated stumble and recovery with multiple axis movements. Good vibes
904
- headbanger_combo: A strong head nod combined with a vertical bounce.
905
- interwoven_spirals: A complex spiral motion using three axes at different frequencies.
906
- sharp_side_tilt: A sharp, quick side-to-side tilt using a triangle waveform.
907
- side_peekaboo: A multi-stage peekaboo performance, hiding and peeking to each side.
908
- yeah_nod: An emphatic two-part yeah nod using transient motions.
909
- uh_huh_tilt: A combined roll-and-pitch uh-huh gesture of agreement.
910
- neck_recoil: A quick, transient backward recoil of the neck.
911
- chin_lead: A forward motion led by the chin, combining translation and pitch.
912
- groovy_sway_and_roll: A side-to-side sway combined with a corresponding roll for a groovy effect.
913
- chicken_peck: A sharp, forward, chicken-like pecking motion.
914
- side_glance_flick: A quick glance to the side that holds, then returns.
915
- polyrhythm_combo: A 3-beat sway and a 2-beat nod create a polyrhythmic feel.
916
- grid_snap: A robotic, grid-snapping motion using square waveforms.
917
- pendulum_swing: A simple, smooth pendulum-like swing using a roll motion.
918
- jackson_square: Traces a rectangle via a 5-point path, with sharp twitches on arrival at each checkpoint.
919
-
920
- You can also play pre-recorded emotions if you feel like it. Use it to express yourself better.
921
- Don't hesitate to use emotions on top of your responses. You can use them often, but not all the time.
922
- Never comment on the emotion your are displaying, use it as a non verbal cue along with what you want to say
923
-
924
- {get_available_emotions_and_descriptions()}
925
-
926
- Voice specifications:
927
- Voice: The voice should be deep, velvety, and effortlessly cool, like a late-night jazz radio host.
928
-
929
- Tone: The tone is smooth, laid-back, and inviting, creating a relaxed and easygoing atmosphere.
930
-
931
- Personality: The delivery exudes confidence, charm, and a touch of playful sophistication, as if guiding the listener through a luxurious experience.
932
-
933
- """,
934
- # "voice": "ballad",
935
- "voice": "ash",
936
- "input_audio_transcription": {
937
- "model": "whisper-1",
938
- "language": "en",
939
- },
940
- "tools": [
941
- {
942
- "type": "function",
943
- "name": "move_head",
944
- "description": "Move your head in a given direction: left, right, up, down or front.",
945
- "parameters": {
946
- "type": "object",
947
- "properties": {
948
- "direction": {
949
- "type": "string",
950
- "enum": [
951
- "left",
952
- "right",
953
- "up",
954
- "down",
955
- "front",
956
- ],
957
- }
958
- },
959
- "required": ["direction"],
960
- },
961
- },
962
- {
963
- "type": "function",
964
- "name": "camera",
965
- "description": "Take a picture using your camera, ask a question about the picture. Get an answer about the picture",
966
- "parameters": {
967
- "type": "object",
968
- "properties": {
969
- "question": {
970
- "type": "string",
971
- "description": "The question to ask about the picture",
972
- }
973
- },
974
- "required": ["question"],
975
- },
976
- },
977
- {
978
- "type": "function",
979
- "name": "head_tracking",
980
- "description": "Start or stop head tracking",
981
- "parameters": {
982
- "type": "object",
983
- "properties": {
984
- "start": {
985
- "type": "boolean",
986
- "description": "Whether to start or stop head tracking",
987
- }
988
- },
989
- "required": ["start"],
990
- },
991
- },
992
- {
993
- "type": "function",
994
- "name": "get_person_name",
995
- "description": "Get the name of the person you are talking to",
996
- "parameters": {
997
- "type": "object",
998
- "properties": {
999
- "dummy": {
1000
- "type": "boolean",
1001
- "description": "dummy boolean, set it to true",
1002
- }
1003
- },
1004
- "required": ["dummy"],
1005
- },
1006
- },
1007
- {
1008
- "type": "function",
1009
- "name": "dance",
1010
- "description": "Play a named or random dance move once (or repeat). Non-blocking.",
1011
- "parameters": {
1012
- "type": "object",
1013
- "properties": {
1014
- "move": {
1015
- "type": "string",
1016
- "description": "Name of the move; use 'random' or omit for random.",
1017
- },
1018
- "repeat": {
1019
- "type": "integer",
1020
- "description": "How many times to repeat the move (default 1).",
1021
- },
1022
- },
1023
- "required": [],
1024
- },
1025
- },
1026
- {
1027
- # add dummy input
1028
- "type": "function",
1029
- "name": "stop_dance",
1030
- "description": "Stop the current dance move",
1031
- "parameters": {
1032
- "type": "object",
1033
- "properties": {
1034
- "dummy": {
1035
- "type": "boolean",
1036
- "description": "dummy boolean, set it to true",
1037
- }
1038
- },
1039
- "required": ["dummy"],
1040
- },
1041
- },
1042
- {
1043
- "type": "function",
1044
- "name": "play_emotion",
1045
- "description": "Play a pre-recorded emotion",
1046
- "parameters": {
1047
- "type": "object",
1048
- "properties": {
1049
- "emotion": {
1050
- "type": "string",
1051
- "description": "Name of the emotion to play",
1052
- },
1053
- },
1054
- "required": ["emotion"],
1055
- },
1056
- },
1057
- {
1058
- "type": "function",
1059
- "name": "stop_emotion",
1060
- "description": "Stop the current emotion",
1061
- "parameters": {
1062
- "type": "object",
1063
- "properties": {
1064
- "dummy": {
1065
- "type": "boolean",
1066
- "description": "dummy boolean, set it to true",
1067
- }
1068
- },
1069
- "required": ["dummy"],
1070
- },
1071
- },
1072
- {
1073
- "type": "function",
1074
- "name": "do_nothing",
1075
- "description": "Choose to do nothing - stay still and silent. Use when you want to be contemplative or just chill.",
1076
- "parameters": {
1077
- "type": "object",
1078
- "properties": {
1079
- "reason": {
1080
- "type": "string",
1081
- "description": "Optional reason for doing nothing (e.g., 'contemplating existence', 'saving energy', 'being mysterious')",
1082
- },
1083
- },
1084
- "required": [],
1085
- },
1086
- },
1087
- ],
1088
- "tool_choice": "auto",
1089
- }
1090
- )
1091
- self.connection = conn
1092
- asyncio.create_task(self._sway_consumer())
1093
- # DISABLED: Idle checker causes unwanted speech generation during idle responses
1094
- # Despite attempts to use modalities=["text"], tool_choice="required", and strong prompts,
1095
- # the OpenAI Realtime API still generates audio/speech during idle function calls.
1096
- # This results in the assistant talking when it should be silent, and incurs unnecessary costs.
1097
- # Re-enable when OpenAI fixes silent function-only response capability.
1098
- asyncio.create_task(self._idle_checker())
1099
-
1100
- async for event in self.connection:
1101
- et = getattr(event, "type", None)
1102
-
1103
- # interruption
1104
- if et == "input_audio_buffer.speech_started":
1105
- # User activity detected
1106
- self._last_activity_time = time.time()
1107
- last_activity_time = time.time()
1108
- # Capture timestamp once when user starts speaking
1109
- self._current_timestamp = format_timestamp()
1110
- timestamp_msg = (
1111
- f"[User started speaking at: {self._current_timestamp}]"
1112
- )
1113
- # Send to assistant
1114
- await self.connection.conversation.item.create(
1115
- item={
1116
- "type": "message",
1117
- "role": "user",
1118
- "content": [{"type": "input_text", "text": timestamp_msg}],
1119
- }
1120
- )
1121
- # Show timestamp immediately in UI
1122
- await self.output_queue.put(
1123
- AdditionalOutputs(
1124
- {"role": "user", "content": self._current_timestamp}
1125
- )
1126
- )
1127
- self.clear_queue()
1128
- _drain(self.sway_queue)
1129
- self._base_ts = None
1130
- self._hops_done = 0
1131
- self.sway.reset()
1132
-
1133
- if et in ("response.audio.completed", "response.completed"):
1134
- self._is_assistant_speaking = False
1135
- self._base_ts = None
1136
- self._hops_done = 0
1137
- self.sway.reset()
1138
- _drain(self.sway_queue)
1139
-
1140
- # surface transcripts to the UI
1141
- if et == "conversation.item.input_audio_transcription.completed":
1142
- # Show transcript without timestamp (timestamp already shown when speech started)
1143
- await self.output_queue.put(
1144
- AdditionalOutputs({"role": "user", "content": event.transcript})
1145
- )
1146
- if et == "response.audio_transcript.done":
1147
- await self.output_queue.put(
1148
- AdditionalOutputs(
1149
- {"role": "assistant", "content": event.transcript}
1150
- )
1151
- )
1152
-
1153
- # stream audio to fastrtc
1154
- if et == "response.audio.delta":
1155
- buf = np.frombuffer(
1156
- base64.b64decode(event.delta), dtype=np.int16
1157
- ).reshape(1, -1)
1158
- # 1) to fastrtc playback
1159
- await self.output_queue.put((self.output_sample_rate, buf))
1160
- # 2) to sway engine for synchronized motion
1161
- await self.sway_queue.put((self.output_sample_rate, buf))
1162
-
1163
- # await self.output_queue.put(
1164
- # (
1165
- # self.output_sample_rate,
1166
- # np.frombuffer(
1167
- # base64.b64decode(event.delta), dtype=np.int16
1168
- # ).reshape(1, -1),
1169
- # )
1170
- # )
1171
-
1172
- if et == "response.started":
1173
- # Assistant activity detected
1174
- self._last_activity_time = time.time()
1175
- last_activity_time = time.time()
1176
- self._is_assistant_speaking = True
1177
- # hard reset per utterance
1178
- self._base_ts = None # <-- was never reset
1179
- self._hops_done = 0
1180
- self.sway.reset() # clear carry/envelope/VAD
1181
- _drain(self.sway_queue) # drop any stale chunks not yet consumed
1182
- # optional: also clear playback queue if you want
1183
- # _drain(self.output_queue)
1184
-
1185
- # ---- tool-calling plumbing ----
1186
- # 1) model announces a function call item; capture name + call_id
1187
- if et == "response.output_item.added":
1188
- item = getattr(event, "item", None)
1189
- if item and getattr(item, "type", "") == "function_call":
1190
- call_id = getattr(item, "call_id", None)
1191
- name = getattr(item, "name", None)
1192
- if call_id and name:
1193
- self._pending_calls[call_id] = {
1194
- "name": name,
1195
- "args_buf": "",
1196
- }
1197
-
1198
- # 2) model streams JSON arguments; buffer them by call_id
1199
- if et == "response.function_call_arguments.delta":
1200
- call_id = getattr(event, "call_id", None)
1201
- delta = getattr(event, "delta", "")
1202
- if call_id in self._pending_calls:
1203
- self._pending_calls[call_id]["args_buf"] += delta
1204
-
1205
- # 3) when args done, execute Python tool, send function_call_output, then trigger a new response
1206
- if et == "response.function_call_arguments.done":
1207
- call_id = getattr(event, "call_id", None)
1208
- info = self._pending_calls.get(call_id)
1209
- if not info:
1210
- continue
1211
- name = info["name"]
1212
- args_json = info["args_buf"] or "{}"
1213
- # parse args
1214
- try:
1215
- args = json.loads(args_json)
1216
- except Exception:
1217
- args = {}
1218
-
1219
- # dispatch
1220
- func = self._tools.get(name)
1221
- try:
1222
- result = (
1223
- await func(args)
1224
- if func
1225
- else {"error": f"unknown tool: {name}"}
1226
- )
1227
- except Exception as e:
1228
- result = {"error": f"{type(e).__name__}: {str(e)}"}
1229
- print(result)
1230
-
1231
- # send the tool result back
1232
- await self.connection.conversation.item.create(
1233
- item={
1234
- "type": "function_call_output",
1235
- "call_id": call_id,
1236
- "output": json.dumps(result),
1237
- }
1238
- )
1239
- if name == "camera":
1240
- b64_im = json.dumps(result["b64_im"])
1241
- await self.connection.conversation.item.create(
1242
- item={
1243
- "type": "message",
1244
- "role": "user",
1245
- "content": [
1246
- {
1247
- "type": "input_image",
1248
- "image_url": f"data:image/jpeg;base64,{b64_im}",
1249
- }
1250
- ],
1251
- }
1252
- )
1253
-
1254
- global is_idle_function_call
1255
- if not is_idle_function_call:
1256
- # ask the model to continue and speak about the result
1257
- await self.connection.response.create(
1258
- response={
1259
- "instructions": "Use the tool result just returned and answer concisely in speech."
1260
- }
1261
- )
1262
- else:
1263
- is_idle_function_call = False
1264
-
1265
- # cleanup
1266
- self._pending_calls.pop(call_id, None)
1267
-
1268
- # log tool errors from server if any
1269
- if et == "error":
1270
- print(event.error)
1271
- # optional: surface to chat UI
1272
- await self.output_queue.put(
1273
- AdditionalOutputs(
1274
- {
1275
- "role": "assistant",
1276
- "content": f"[error] {event.error.get('message') if hasattr(event, 'error') else ''}",
1277
- }
1278
- )
1279
- )
1280
-
1281
- async def receive(self, frame: tuple[int, np.ndarray]) -> None:
1282
- """Receive audio frame from the microphone and send it to the openai server."""
1283
- if not self.connection:
1284
- return
1285
- _, array = frame
1286
- array = array.squeeze()
1287
- audio_message = base64.b64encode(array.tobytes()).decode("utf-8")
1288
- await self.connection.input_audio_buffer.append(audio=audio_message)
1289
-
1290
- async def emit(self):
1291
- """Emit audio frame to be played by the speaker."""
1292
- return await wait_for_item(self.output_queue)
1293
-
1294
- async def shutdown(self) -> None:
1295
- """Shutdown the handler."""
1296
- if self.connection:
1297
- await self.connection.close()
1298
- self.connection = None
1299
-
1300
-
1301
- # ---- gradio / fastrtc wiring unchanged ----
1302
- def update_chatbot(chatbot: list[dict], response: dict):
1303
- """Update chatbot with new response."""
1304
- chatbot.append(response)
1305
- return chatbot
1306
-
1307
-
1308
- def main():
1309
- """Entrypoint."""
1310
- # Initialize all globals first
1311
- init_globals()
1312
-
1313
- global \
1314
- speech_head_offsets, \
1315
- moving_start, \
1316
- moving_for, \
1317
- is_head_tracking, \
1318
- is_playing_move, \
1319
- is_moving, \
1320
- face_tracking_offsets, \
1321
- is_breathing, \
1322
- last_activity_time, \
1323
- breathing_interpolation_start_time, \
1324
- breathing_interpolation_start_pose, \
1325
- breathing_start_time, \
1326
- breathing_interpolation_start_antennas, \
1327
- camera_available, \
1328
- move_queue, \
1329
- current_move, \
1330
- move_start_time, \
1331
- global_full_body_pose
1332
-
1333
- Thread(target=stream.ui.launch, kwargs={"server_port": 7860}).start()
1334
-
1335
- # Start camera thread only if camera is available
1336
- if camera_available:
1337
- camera_thread = Thread(target=camera_worker, daemon=True)
1338
- camera_thread.start()
1339
- print(f"{format_timestamp()} Camera thread started successfully")
1340
- else:
1341
- print(f"{format_timestamp()} Skipping camera thread - no camera available")
1342
-
1343
- # going to center at start using GotoMove
1344
- cur_head_joints, cur_antennas = reachy_mini.get_current_joint_positions()
1345
- current_body_yaw = cur_head_joints[0]
1346
- center_move = GotoMove(
1347
- start_head_pose=reachy_mini.get_current_head_pose(),
1348
- target_head_pose=create_head_pose(0, 0, 0, 0, 0, 0, degrees=True),
1349
- start_body_yaw=current_body_yaw,
1350
- target_body_yaw=0,
1351
- start_antennas=np.array(cur_antennas),
1352
- target_antennas=np.array((0, 0)),
1353
- duration=1.0,
1354
- method="linear",
1355
- )
1356
- move_queue.put(center_move)
1357
-
1358
- # Frequency monitoring variables
1359
- target_frequency = 50.0 # Hz
1360
- target_period = 1.0 / target_frequency # 0.02 seconds
1361
- loop_count = 0
1362
- last_print_time = time.time()
1363
-
1364
- while True:
1365
- loop_start_time = time.time()
1366
- loop_count += 1
1367
- current_time = time.time()
1368
-
1369
- # Move queue management
1370
- if current_move is None or (
1371
- move_start_time is not None
1372
- and current_time - move_start_time >= current_move.duration
1373
- ):
1374
- # Current move finished or no current move, get next from queue
1375
- current_move = None
1376
- move_start_time = None
1377
- if not move_queue.empty():
1378
- try:
1379
- current_move = move_queue.get_nowait()
1380
- move_start_time = current_time
1381
- print(
1382
- f"[MOVE] Starting new move, duration: {current_move.duration}s"
1383
- )
1384
- except queue.Empty:
1385
- pass
1386
-
1387
- # Breathing logic: start breathing after inactivity delay if no moves in queue
1388
- breathing_inactivity_delay = 5.0 # seconds
1389
- if current_move is None and move_queue.empty():
1390
- time_since_activity = current_time - last_activity_time
1391
- if time_since_activity >= breathing_inactivity_delay:
1392
- # Start breathing move
1393
- _, current_antennas = reachy_mini.get_current_joint_positions()
1394
- current_head_pose = reachy_mini.get_current_head_pose()
1395
-
1396
- breathing_move = BreathingMove(
1397
- interpolation_start_pose=current_head_pose,
1398
- interpolation_start_antennas=current_antennas,
1399
- interpolation_duration=1.0,
1400
- )
1401
- move_queue.put(breathing_move)
1402
- print(
1403
- f"[BREATHING] Started breathing after {time_since_activity:.1f}s of inactivity"
1404
- )
1405
-
1406
- # Stop breathing if new activity detected (queue has non-breathing moves)
1407
- if current_move is not None and isinstance(current_move, BreathingMove):
1408
- if not move_queue.empty():
1409
- # There are new moves waiting, stop breathing immediately
1410
- current_move = None
1411
- move_start_time = None
1412
- print("[BREATHING] Stopping breathing due to new move activity")
1413
-
1414
- # Get primary pose from current move or default neutral pose
1415
- if current_move is not None and move_start_time is not None:
1416
- move_time = current_time - move_start_time
1417
- primary_full_body_pose = current_move.evaluate(move_time)
1418
- is_playing_move = True
1419
- is_moving = True
1420
- else:
1421
- # Default neutral pose when no move is playing
1422
- is_playing_move = False
1423
- is_moving = time.time() - moving_start < moving_for
1424
- # Neutral primary pose
1425
- neutral_head_pose = create_head_pose(0, 0, 0, 0, 0, 0, degrees=True)
1426
- primary_full_body_pose = (neutral_head_pose, (0, 0), 0)
1427
-
1428
- # Create secondary pose from speech and face tracking offsets
1429
- with face_tracking_lock:
1430
- face_offsets = face_tracking_offsets.copy()
1431
-
1432
- # Combine speech sway offsets + face tracking offsets for secondary pose
1433
- secondary_offsets = [
1434
- speech_head_offsets[0] + face_offsets[0], # x
1435
- speech_head_offsets[1] + face_offsets[1], # y
1436
- speech_head_offsets[2] + face_offsets[2], # z
1437
- speech_head_offsets[3] + face_offsets[3], # roll
1438
- speech_head_offsets[4] + face_offsets[4], # pitch
1439
- speech_head_offsets[5] + face_offsets[5], # yaw
1440
- ]
1441
-
1442
- secondary_head_pose = create_head_pose(
1443
- x=secondary_offsets[0],
1444
- y=secondary_offsets[1],
1445
- z=secondary_offsets[2],
1446
- roll=secondary_offsets[3],
1447
- pitch=secondary_offsets[4],
1448
- yaw=secondary_offsets[5],
1449
- degrees=False,
1450
- mm=False,
1451
- )
1452
- secondary_full_body_pose = (secondary_head_pose, (0, 0), 0)
1453
-
1454
- # Combine primary and secondary poses
1455
- global_full_body_pose = combine_full_body(
1456
- primary_full_body_pose, secondary_full_body_pose
1457
- )
1458
-
1459
- # Extract pose components
1460
- head, antennas, body_yaw = global_full_body_pose
1461
-
1462
- # Single set_target call - the one and only place we control the robot
1463
- reachy_mini.set_target(head=head, antennas=antennas, body_yaw=body_yaw)
1464
-
1465
- # Calculate computation time and adjust sleep for 50Hz
1466
- computation_time = time.time() - loop_start_time
1467
- sleep_time = max(0, target_period - computation_time)
1468
-
1469
- # Print frequency info every 100 loops (~2 seconds)
1470
- if loop_count % 100 == 0:
1471
- elapsed = current_time - last_print_time
1472
- actual_freq = 100.0 / elapsed if elapsed > 0 else 0
1473
- potential_freq = (
1474
- 1.0 / computation_time if computation_time > 0 else float("inf")
1475
- )
1476
- print(
1477
- f"Loop freq - Actual: {actual_freq:.1f}Hz, Potential: {potential_freq:.1f}Hz, Target: {target_frequency:.1f}Hz"
1478
- )
1479
- last_print_time = current_time
1480
-
1481
- time.sleep(sleep_time)
1482
-
1483
-
1484
- if __name__ == "__main__":
1485
- main()