Spaces:

AccidentalCoder80
/

Haven_ReachyMini_Contest_Final

Running

App Files Files Community

Haven_ReachyMini_Contest_Final / haven_kitchen_os /speech.py

devvibes

Fix import errors for daemon compatibility

a21a256 about 1 month ago

raw

history blame contribute delete

5.51 kB

	"""
	🗣️ Haven Speech System with Expressive Animations
	Text-to-speech with personality-driven robot movements!
	"""

	import os
	import time
	import threading
	import re
	from animations import animate, stop_current_animation

	# Try to import ElevenLabs
	try:
	from elevenlabs.client import ElevenLabs
	ELEVENLABS_AVAILABLE = True
	except ImportError:
	ELEVENLABS_AVAILABLE = False

	api_key = os.getenv("ELEVENLABS_API_KEY")
	client = ElevenLabs(api_key=api_key) if ELEVENLABS_AVAILABLE and api_key else None

	# Voice IDs for each persona (free-tier compatible)
	VOICES = {
	"Olivia": "21m00Tcm4TlvDq8ikWAM", # Rachel - warm, professional
	"Brie": "EXAVITQu4vr4xnSDxMaL", # Bella - friendly, enthusiastic
	}

	# 🗣️ Pronunciation dictionary for TTS
	PRONUNCIATIONS = {
	r'(\d{1,2}(?::\d{2})?\s(?:AM\|PM\|a\.?m\.?\|p\.?m\.?))\s[-–—]\s(\d{1,2}(?::\d{2})?\s(?:AM\|PM\|a\.?m\.?\|p\.?m\.?))': r'\1 to \2',
	r'(\d{1,2}):00\s*(AM\|PM\|am\|pm\|A\.M\.\|P\.M\.)': r'\1 \2',
	r'(\d{1,2}):(\d{2})\s*(AM\|PM\|am\|pm\|A\.M\.\|P\.M\.)': r'\1:\2 \3',
	r'(\d{1,2}):00\b': r'\1 o\'clock',
	r'\bbrulee\b': 'broo-LAY',
	r'\bbrûlée\b': 'broo-LAY',
	r'\bcrème\b': 'krem',
	r'\bcreme\b': 'krem',
	r'\bjalapeno\b': 'hala-PEN-yo',
	r'\bjalapeño\b': 'hala-PEN-yo',
	r'\bchipotle\b': 'chi-POAT-lay',
	r'\bquinoa\b': 'KEEN-wah',
	r'\bgnocchi\b': 'NYOH-kee',
	r'\bpho\b': 'fuh',
	r'\bbanh mi\b': 'bahn mee',
	r'\bai?oli\b': 'eye-OH-lee',
	r'\bbruschetta\b': 'broo-SKET-ah',
	r'\bprosciutto\b': 'pro-SHOO-toh',
	r'\bmascarpone\b': 'mas-kar-POH-nay',
	r'\btiramisu\b': 'teer-ah-mee-SOO',
	r'\bespresso\b': 'es-PRESS-oh',
	r'\bcroissant\b': 'kwah-SAHN',
	r'\bfilet\b': 'fi-LAY',
	r'\bhors d\'oeuvres?\b': 'or-DERVS',
	r'\bsouffle\b': 'soo-FLAY',
	r'\bsoufflé\b': 'soo-FLAY',
	r'\bratatouille\b': 'rat-ah-TOO-ee',
	r'\bvinaigrette\b': 'vin-eh-GRET',
	r'(\d+)°F': r'\1 degrees Fahrenheit',
	r'(\d+)°C': r'\1 degrees Celsius',
	r'(\d+)°': r'\1 degrees',
	r'\btsp\.?\b': 'teaspoon',
	r'\btbsp\.?\b': 'tablespoon',
	r'\boz\.?\b': 'ounce',
	r'\blb\.?\b': 'pound',
	r'\bsaute\b': 'saw-TAY',
	r'\bsauté\b': 'saw-TAY',
	r'\bflambé\b': 'flahm-BAY',
	r'\bjulienne\b': 'joo-lee-EN',
	r'\bdeglaze\b': 'dee-GLAYZ',
	}

	def fix_pronunciation(text):
	"""Apply pronunciation fixes for better TTS output."""
	result = text
	for pattern, replacement in PRONUNCIATIONS.items():
	result = re.sub(pattern, replacement, result, flags=re.IGNORECASE)
	return result

	def estimate_speech_duration(text):
	"""Estimate how long the speech will take."""
	words = len(text.split())
	return max(2.0, words / 2.5)


	def speak(text, persona="Olivia"):
	"""Speak text with persona-appropriate voice and animations."""
	clean_text = re.sub(r'\[CALENDAR[^\]]*\]', '', text)
	clean_text = re.sub(r'\[EMAIL[^\]]*\]', '', clean_text)
	clean_text = re.sub(r'\[DAILY[^\]]*\]', '', clean_text)
	clean_text = re.sub(r'\[REAL-TIME[^\]]*\]', '', clean_text)
	clean_text = clean_text.strip()

	tts_text = fix_pronunciation(clean_text)
	duration = estimate_speech_duration(text)

	def run_animation():
	animate("greeting", persona=persona)
	time.sleep(0.5)
	animate("speaking", persona=persona, duration=duration)

	anim_thread = threading.Thread(target=run_animation, daemon=True)
	anim_thread.start()

	if client:
	try:
	voice_id = VOICES.get(persona, VOICES["Olivia"])
	audio = client.text_to_speech.convert(
	voice_id=voice_id,
	model_id="eleven_turbo_v2_5",
	text=tts_text
	)

	with open("/tmp/haven_speech.mp3", "wb") as f:
	for chunk in audio:
	f.write(chunk)

	os.system("afplay /tmp/haven_speech.mp3")

	except Exception:
	pass
	else:
	time.sleep(min(duration, 3.0))

	anim_thread.join(timeout=1.0)
	stop_current_animation()

	def speak_step(step_text, step_number, total_steps):
	"""Speak a recipe step with appropriate Brie animations."""
	progress = step_number / total_steps

	if progress < 0.3:
	animate("thinking", persona="Brie")
	time.sleep(0.3)
	elif progress > 0.8:
	animate("excited", persona="Brie")
	time.sleep(0.3)

	speak(step_text, persona="Brie")

	def celebrate_completion():
	"""Celebrate when a recipe is complete!"""
	animate("step_complete", persona="Brie")
	speak("Amazing work! Your dish is ready. I hope it tastes as wonderful as it smells!", "Brie")
	animate("excited", persona="Brie")

	def olivia_handoff_to_brie():
	"""Olivia's graceful handoff to Brie."""
	animate("handoff", persona="Olivia")
	time.sleep(0.3)
	animate("greeting", persona="Brie")

	def react_to_emotion(emotion):
	"""React to detected emotion in user's message."""
	emotion_map = {
	"happy": "happy",
	"excited": "happy",
	"sad": "sympathetic",
	"frustrated": "sympathetic",
	"confused": "curious",
	"curious": "curious",
	}
	animation = emotion_map.get(emotion, "listening")
	animate(animation)

	def start_listening():
	"""Show that Haven is listening."""
	animate("listening")

	def idle_mode():
	"""Start idle animations when not actively conversing."""
	animate("idle", duration=30.0)