devvibes's picture
Fix import errors for daemon compatibility
a21a256
"""
🗣️ Haven Speech System with Expressive Animations
Text-to-speech with personality-driven robot movements!
"""
import os
import time
import threading
import re
from animations import animate, stop_current_animation
# Try to import ElevenLabs
try:
from elevenlabs.client import ElevenLabs
ELEVENLABS_AVAILABLE = True
except ImportError:
ELEVENLABS_AVAILABLE = False
api_key = os.getenv("ELEVENLABS_API_KEY")
client = ElevenLabs(api_key=api_key) if ELEVENLABS_AVAILABLE and api_key else None
# Voice IDs for each persona (free-tier compatible)
VOICES = {
"Olivia": "21m00Tcm4TlvDq8ikWAM", # Rachel - warm, professional
"Brie": "EXAVITQu4vr4xnSDxMaL", # Bella - friendly, enthusiastic
}
# 🗣️ Pronunciation dictionary for TTS
PRONUNCIATIONS = {
r'(\d{1,2}(?::\d{2})?\s*(?:AM|PM|a\.?m\.?|p\.?m\.?))\s*[-–—]\s*(\d{1,2}(?::\d{2})?\s*(?:AM|PM|a\.?m\.?|p\.?m\.?))': r'\1 to \2',
r'(\d{1,2}):00\s*(AM|PM|am|pm|A\.M\.|P\.M\.)': r'\1 \2',
r'(\d{1,2}):(\d{2})\s*(AM|PM|am|pm|A\.M\.|P\.M\.)': r'\1:\2 \3',
r'(\d{1,2}):00\b': r'\1 o\'clock',
r'\bbrulee\b': 'broo-LAY',
r'\bbrûlée\b': 'broo-LAY',
r'\bcrème\b': 'krem',
r'\bcreme\b': 'krem',
r'\bjalapeno\b': 'hala-PEN-yo',
r'\bjalapeño\b': 'hala-PEN-yo',
r'\bchipotle\b': 'chi-POAT-lay',
r'\bquinoa\b': 'KEEN-wah',
r'\bgnocchi\b': 'NYOH-kee',
r'\bpho\b': 'fuh',
r'\bbanh mi\b': 'bahn mee',
r'\bai?oli\b': 'eye-OH-lee',
r'\bbruschetta\b': 'broo-SKET-ah',
r'\bprosciutto\b': 'pro-SHOO-toh',
r'\bmascarpone\b': 'mas-kar-POH-nay',
r'\btiramisu\b': 'teer-ah-mee-SOO',
r'\bespresso\b': 'es-PRESS-oh',
r'\bcroissant\b': 'kwah-SAHN',
r'\bfilet\b': 'fi-LAY',
r'\bhors d\'oeuvres?\b': 'or-DERVS',
r'\bsouffle\b': 'soo-FLAY',
r'\bsoufflé\b': 'soo-FLAY',
r'\bratatouille\b': 'rat-ah-TOO-ee',
r'\bvinaigrette\b': 'vin-eh-GRET',
r'(\d+)°F': r'\1 degrees Fahrenheit',
r'(\d+)°C': r'\1 degrees Celsius',
r'(\d+)°': r'\1 degrees',
r'\btsp\.?\b': 'teaspoon',
r'\btbsp\.?\b': 'tablespoon',
r'\boz\.?\b': 'ounce',
r'\blb\.?\b': 'pound',
r'\bsaute\b': 'saw-TAY',
r'\bsauté\b': 'saw-TAY',
r'\bflambé\b': 'flahm-BAY',
r'\bjulienne\b': 'joo-lee-EN',
r'\bdeglaze\b': 'dee-GLAYZ',
}
def fix_pronunciation(text):
"""Apply pronunciation fixes for better TTS output."""
result = text
for pattern, replacement in PRONUNCIATIONS.items():
result = re.sub(pattern, replacement, result, flags=re.IGNORECASE)
return result
def estimate_speech_duration(text):
"""Estimate how long the speech will take."""
words = len(text.split())
return max(2.0, words / 2.5)
def speak(text, persona="Olivia"):
"""Speak text with persona-appropriate voice and animations."""
clean_text = re.sub(r'\[CALENDAR[^\]]*\]', '', text)
clean_text = re.sub(r'\[EMAIL[^\]]*\]', '', clean_text)
clean_text = re.sub(r'\[DAILY[^\]]*\]', '', clean_text)
clean_text = re.sub(r'\[REAL-TIME[^\]]*\]', '', clean_text)
clean_text = clean_text.strip()
tts_text = fix_pronunciation(clean_text)
duration = estimate_speech_duration(text)
def run_animation():
animate("greeting", persona=persona)
time.sleep(0.5)
animate("speaking", persona=persona, duration=duration)
anim_thread = threading.Thread(target=run_animation, daemon=True)
anim_thread.start()
if client:
try:
voice_id = VOICES.get(persona, VOICES["Olivia"])
audio = client.text_to_speech.convert(
voice_id=voice_id,
model_id="eleven_turbo_v2_5",
text=tts_text
)
with open("/tmp/haven_speech.mp3", "wb") as f:
for chunk in audio:
f.write(chunk)
os.system("afplay /tmp/haven_speech.mp3")
except Exception:
pass
else:
time.sleep(min(duration, 3.0))
anim_thread.join(timeout=1.0)
stop_current_animation()
def speak_step(step_text, step_number, total_steps):
"""Speak a recipe step with appropriate Brie animations."""
progress = step_number / total_steps
if progress < 0.3:
animate("thinking", persona="Brie")
time.sleep(0.3)
elif progress > 0.8:
animate("excited", persona="Brie")
time.sleep(0.3)
speak(step_text, persona="Brie")
def celebrate_completion():
"""Celebrate when a recipe is complete!"""
animate("step_complete", persona="Brie")
speak("Amazing work! Your dish is ready. I hope it tastes as wonderful as it smells!", "Brie")
animate("excited", persona="Brie")
def olivia_handoff_to_brie():
"""Olivia's graceful handoff to Brie."""
animate("handoff", persona="Olivia")
time.sleep(0.3)
animate("greeting", persona="Brie")
def react_to_emotion(emotion):
"""React to detected emotion in user's message."""
emotion_map = {
"happy": "happy",
"excited": "happy",
"sad": "sympathetic",
"frustrated": "sympathetic",
"confused": "curious",
"curious": "curious",
}
animation = emotion_map.get(emotion, "listening")
animate(animation)
def start_listening():
"""Show that Haven is listening."""
animate("listening")
def idle_mode():
"""Start idle animations when not actively conversing."""
animate("idle", duration=30.0)