""" 🗣️ Haven Speech System with Expressive Animations Text-to-speech with personality-driven robot movements! """ import os import time import threading import re from animations import animate, stop_current_animation # Try to import ElevenLabs try: from elevenlabs.client import ElevenLabs ELEVENLABS_AVAILABLE = True except ImportError: ELEVENLABS_AVAILABLE = False api_key = os.getenv("ELEVENLABS_API_KEY") client = ElevenLabs(api_key=api_key) if ELEVENLABS_AVAILABLE and api_key else None # Voice IDs for each persona (free-tier compatible) VOICES = { "Olivia": "21m00Tcm4TlvDq8ikWAM", # Rachel - warm, professional "Brie": "EXAVITQu4vr4xnSDxMaL", # Bella - friendly, enthusiastic } # 🗣️ Pronunciation dictionary for TTS PRONUNCIATIONS = { r'(\d{1,2}(?::\d{2})?\s*(?:AM|PM|a\.?m\.?|p\.?m\.?))\s*[-–—]\s*(\d{1,2}(?::\d{2})?\s*(?:AM|PM|a\.?m\.?|p\.?m\.?))': r'\1 to \2', r'(\d{1,2}):00\s*(AM|PM|am|pm|A\.M\.|P\.M\.)': r'\1 \2', r'(\d{1,2}):(\d{2})\s*(AM|PM|am|pm|A\.M\.|P\.M\.)': r'\1:\2 \3', r'(\d{1,2}):00\b': r'\1 o\'clock', r'\bbrulee\b': 'broo-LAY', r'\bbrûlée\b': 'broo-LAY', r'\bcrème\b': 'krem', r'\bcreme\b': 'krem', r'\bjalapeno\b': 'hala-PEN-yo', r'\bjalapeño\b': 'hala-PEN-yo', r'\bchipotle\b': 'chi-POAT-lay', r'\bquinoa\b': 'KEEN-wah', r'\bgnocchi\b': 'NYOH-kee', r'\bpho\b': 'fuh', r'\bbanh mi\b': 'bahn mee', r'\bai?oli\b': 'eye-OH-lee', r'\bbruschetta\b': 'broo-SKET-ah', r'\bprosciutto\b': 'pro-SHOO-toh', r'\bmascarpone\b': 'mas-kar-POH-nay', r'\btiramisu\b': 'teer-ah-mee-SOO', r'\bespresso\b': 'es-PRESS-oh', r'\bcroissant\b': 'kwah-SAHN', r'\bfilet\b': 'fi-LAY', r'\bhors d\'oeuvres?\b': 'or-DERVS', r'\bsouffle\b': 'soo-FLAY', r'\bsoufflé\b': 'soo-FLAY', r'\bratatouille\b': 'rat-ah-TOO-ee', r'\bvinaigrette\b': 'vin-eh-GRET', r'(\d+)°F': r'\1 degrees Fahrenheit', r'(\d+)°C': r'\1 degrees Celsius', r'(\d+)°': r'\1 degrees', r'\btsp\.?\b': 'teaspoon', r'\btbsp\.?\b': 'tablespoon', r'\boz\.?\b': 'ounce', r'\blb\.?\b': 'pound', r'\bsaute\b': 'saw-TAY', r'\bsauté\b': 'saw-TAY', r'\bflambé\b': 'flahm-BAY', r'\bjulienne\b': 'joo-lee-EN', r'\bdeglaze\b': 'dee-GLAYZ', } def fix_pronunciation(text): """Apply pronunciation fixes for better TTS output.""" result = text for pattern, replacement in PRONUNCIATIONS.items(): result = re.sub(pattern, replacement, result, flags=re.IGNORECASE) return result def estimate_speech_duration(text): """Estimate how long the speech will take.""" words = len(text.split()) return max(2.0, words / 2.5) def speak(text, persona="Olivia"): """Speak text with persona-appropriate voice and animations.""" clean_text = re.sub(r'\[CALENDAR[^\]]*\]', '', text) clean_text = re.sub(r'\[EMAIL[^\]]*\]', '', clean_text) clean_text = re.sub(r'\[DAILY[^\]]*\]', '', clean_text) clean_text = re.sub(r'\[REAL-TIME[^\]]*\]', '', clean_text) clean_text = clean_text.strip() tts_text = fix_pronunciation(clean_text) duration = estimate_speech_duration(text) def run_animation(): animate("greeting", persona=persona) time.sleep(0.5) animate("speaking", persona=persona, duration=duration) anim_thread = threading.Thread(target=run_animation, daemon=True) anim_thread.start() if client: try: voice_id = VOICES.get(persona, VOICES["Olivia"]) audio = client.text_to_speech.convert( voice_id=voice_id, model_id="eleven_turbo_v2_5", text=tts_text ) with open("/tmp/haven_speech.mp3", "wb") as f: for chunk in audio: f.write(chunk) os.system("afplay /tmp/haven_speech.mp3") except Exception: pass else: time.sleep(min(duration, 3.0)) anim_thread.join(timeout=1.0) stop_current_animation() def speak_step(step_text, step_number, total_steps): """Speak a recipe step with appropriate Brie animations.""" progress = step_number / total_steps if progress < 0.3: animate("thinking", persona="Brie") time.sleep(0.3) elif progress > 0.8: animate("excited", persona="Brie") time.sleep(0.3) speak(step_text, persona="Brie") def celebrate_completion(): """Celebrate when a recipe is complete!""" animate("step_complete", persona="Brie") speak("Amazing work! Your dish is ready. I hope it tastes as wonderful as it smells!", "Brie") animate("excited", persona="Brie") def olivia_handoff_to_brie(): """Olivia's graceful handoff to Brie.""" animate("handoff", persona="Olivia") time.sleep(0.3) animate("greeting", persona="Brie") def react_to_emotion(emotion): """React to detected emotion in user's message.""" emotion_map = { "happy": "happy", "excited": "happy", "sad": "sympathetic", "frustrated": "sympathetic", "confused": "curious", "curious": "curious", } animation = emotion_map.get(emotion, "listening") animate(animation) def start_listening(): """Show that Haven is listening.""" animate("listening") def idle_mode(): """Start idle animations when not actively conversing.""" animate("idle", duration=30.0)