"""Voice Input Service for Speech Recognition""" import speech_recognition as sr from typing import Optional, Callable import io class VoiceInputService: """Service for handling voice input and speech recognition""" def __init__(self): """Initialize voice input service""" self.recognizer = sr.Recognizer() self.microphone = None self.available = False try: # Try to initialize microphone (requires pyaudio) self.microphone = sr.Microphone() # Adjust for ambient noise with self.microphone as source: self.recognizer.adjust_for_ambient_noise(source, duration=0.5) self.available = True except OSError as e: print(f"Warning: Could not initialize microphone: {e}") print("Voice input will not be available") print("To enable voice input, install PortAudio:") print(" macOS: brew install portaudio") print(" Linux: sudo apt-get install portaudio19-dev") print(" Then: pip install pyaudio") self.available = False except Exception as e: print(f"Warning: Could not initialize microphone: {e}") print("Voice input will not be available") self.available = False def listen_and_recognize(self, timeout: int = 5, phrase_time_limit: int = 10) -> Optional[str]: """ Listen to microphone and recognize speech Args: timeout: Maximum time to wait for speech to start phrase_time_limit: Maximum time for a phrase Returns: Recognized text or None if error """ if not self.available or not self.microphone: return None try: with self.microphone as source: print("Listening... Speak now!") audio = self.recognizer.listen( source, timeout=timeout, phrase_time_limit=phrase_time_limit ) print("Processing speech...") # Use Google's free speech recognition API text = self.recognizer.recognize_google(audio) print(f"Recognized: {text}") return text except sr.WaitTimeoutError: print("No speech detected within timeout") return None except sr.UnknownValueError: print("Could not understand audio") return None except sr.RequestError as e: print(f"Error with speech recognition service: {e}") return None except Exception as e: print(f"Error during voice recognition: {e}") return None def process_song_request(self, recognized_text: str) -> dict: """ Process a song request from recognized speech Args: recognized_text: Text recognized from speech Returns: Dictionary with song request details """ text_lower = recognized_text.lower() # Extract keywords request = { "original_text": recognized_text, "action": None, "song": None, "artist": None, "genre": None, "mood": None } # Remove common action words to get the actual query # Order matters - longer phrases first action_phrases = [ "i want to hear", "i want to", "want to hear", "i'd like to hear", "i would like to hear", "play", "put on", "listen to", "i want", "can you", "please", "i'd like", "i would like" ] cleaned_text = recognized_text.lower() for phrase in action_phrases: if phrase in cleaned_text: cleaned_text = cleaned_text.replace(phrase, "").strip() break # Only remove one phrase # Clean up extra spaces and remove standalone "i", "a", "the" words = cleaned_text.split() words = [w for w in words if w not in ["i", "a", "an", "the"]] cleaned_text = " ".join(words).strip() # Detect action if any(word in text_lower for word in ["play", "put on", "listen to", "want to hear"]): request["action"] = "play" elif any(word in text_lower for word in ["skip", "next", "change"]): request["action"] = "skip" else: request["action"] = "play" # Default # Try to extract song/artist/genre # Simple keyword extraction - can be enhanced with NLP if "by" in text_lower: parts = text_lower.split("by") if len(parts) == 2: request["song"] = parts[0].strip() request["artist"] = parts[1].strip() else: # If no "by", treat the cleaned text as the song/query # But remove genre/mood words that are already extracted song_text = cleaned_text if cleaned_text else recognized_text if request.get("genre"): # Remove genre from song text song_text = song_text.replace(request["genre"], "").strip() if request.get("mood"): # Remove mood from song text song_text = song_text.replace(request["mood"], "").strip() song_text = " ".join(song_text.split()) # Clean up spaces request["song"] = song_text if song_text else recognized_text # Check for genre keywords - first try known genres, then extract custom ones known_genres = ["pop", "rock", "jazz", "classical", "electronic", "hip-hop", "hip hop", "country", "indie", "rap", "blues", "folk"] genre_found = False # First, check for known genres for genre in known_genres: if genre in text_lower: request["genre"] = genre genre_found = True break # If no known genre found, try to extract a custom genre if not genre_found: # Look for patterns like "some [genre] music", "[genre] music", "play [genre]" # Remove action words and common words to find potential genre genre_indicators = ["music", "song", "track", "tune"] words = cleaned_text.split() # Find words that might be genres (not action words, not common words) common_words = {"i", "want", "to", "hear", "play", "put", "on", "listen", "some", "a", "an", "the", "me", "my"} # Look for genre-like words (usually before "music" or standalone) for i, word in enumerate(words): # If word is before "music" or similar, it might be a genre if i < len(words) - 1 and words[i + 1] in genre_indicators: if word not in common_words and len(word) > 2: request["genre"] = word genre_found = True break # Or if it's a standalone word that's not a common word elif word not in common_words and len(word) > 3 and word not in known_genres: # Check if it looks like a genre (not a song/artist name pattern) # Simple heuristic: if it's a single word and not capitalized in original, might be genre if word in text_lower and not word[0].isupper() if word[0].isalpha() else False: # Additional check: if user said "some [word]" or "[word] music", likely a genre if i > 0 and words[i-1] in ["some", "any", "a", "an"]: request["genre"] = word genre_found = True break elif i < len(words) - 1 and words[i+1] in genre_indicators: request["genre"] = word genre_found = True break # Check for mood keywords moods = ["happy", "sad", "energetic", "calm", "relaxed", "focused", "upbeat", "chill"] for mood in moods: if mood in text_lower: request["mood"] = mood break return request