Spaces:
Sleeping
Sleeping
File size: 8,457 Bytes
4cdaf71 61f40a7 4cdaf71 61f40a7 4cdaf71 61f40a7 4cdaf71 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 |
"""Voice Input Service for Speech Recognition"""
import speech_recognition as sr
from typing import Optional, Callable
import io
class VoiceInputService:
"""Service for handling voice input and speech recognition"""
def __init__(self):
"""Initialize voice input service"""
self.recognizer = sr.Recognizer()
self.microphone = None
self.available = False
try:
# Try to initialize microphone (requires pyaudio)
self.microphone = sr.Microphone()
# Adjust for ambient noise
with self.microphone as source:
self.recognizer.adjust_for_ambient_noise(source, duration=0.5)
self.available = True
except OSError as e:
print(f"Warning: Could not initialize microphone: {e}")
print("Voice input will not be available")
print("To enable voice input, install PortAudio:")
print(" macOS: brew install portaudio")
print(" Linux: sudo apt-get install portaudio19-dev")
print(" Then: pip install pyaudio")
self.available = False
except Exception as e:
print(f"Warning: Could not initialize microphone: {e}")
print("Voice input will not be available")
self.available = False
def listen_and_recognize(self, timeout: int = 5, phrase_time_limit: int = 10) -> Optional[str]:
"""
Listen to microphone and recognize speech
Args:
timeout: Maximum time to wait for speech to start
phrase_time_limit: Maximum time for a phrase
Returns:
Recognized text or None if error
"""
if not self.available or not self.microphone:
return None
try:
with self.microphone as source:
print("Listening... Speak now!")
audio = self.recognizer.listen(
source,
timeout=timeout,
phrase_time_limit=phrase_time_limit
)
print("Processing speech...")
# Use Google's free speech recognition API
text = self.recognizer.recognize_google(audio)
print(f"Recognized: {text}")
return text
except sr.WaitTimeoutError:
print("No speech detected within timeout")
return None
except sr.UnknownValueError:
print("Could not understand audio")
return None
except sr.RequestError as e:
print(f"Error with speech recognition service: {e}")
return None
except Exception as e:
print(f"Error during voice recognition: {e}")
return None
def process_song_request(self, recognized_text: str) -> dict:
"""
Process a song request from recognized speech
Args:
recognized_text: Text recognized from speech
Returns:
Dictionary with song request details
"""
text_lower = recognized_text.lower()
# Extract keywords
request = {
"original_text": recognized_text,
"action": None,
"song": None,
"artist": None,
"genre": None,
"mood": None
}
# Remove common action words to get the actual query
# Order matters - longer phrases first
action_phrases = [
"i want to hear", "i want to", "want to hear",
"i'd like to hear", "i would like to hear",
"play", "put on", "listen to", "i want",
"can you", "please", "i'd like", "i would like"
]
cleaned_text = recognized_text.lower()
for phrase in action_phrases:
if phrase in cleaned_text:
cleaned_text = cleaned_text.replace(phrase, "").strip()
break # Only remove one phrase
# Clean up extra spaces and remove standalone "i", "a", "the"
words = cleaned_text.split()
words = [w for w in words if w not in ["i", "a", "an", "the"]]
cleaned_text = " ".join(words).strip()
# Detect action
if any(word in text_lower for word in ["play", "put on", "listen to", "want to hear"]):
request["action"] = "play"
elif any(word in text_lower for word in ["skip", "next", "change"]):
request["action"] = "skip"
else:
request["action"] = "play" # Default
# Try to extract song/artist/genre
# Simple keyword extraction - can be enhanced with NLP
if "by" in text_lower:
parts = text_lower.split("by")
if len(parts) == 2:
request["song"] = parts[0].strip()
request["artist"] = parts[1].strip()
else:
# If no "by", treat the cleaned text as the song/query
# But remove genre/mood words that are already extracted
song_text = cleaned_text if cleaned_text else recognized_text
if request.get("genre"):
# Remove genre from song text
song_text = song_text.replace(request["genre"], "").strip()
if request.get("mood"):
# Remove mood from song text
song_text = song_text.replace(request["mood"], "").strip()
song_text = " ".join(song_text.split()) # Clean up spaces
request["song"] = song_text if song_text else recognized_text
# Check for genre keywords - first try known genres, then extract custom ones
known_genres = ["pop", "rock", "jazz", "classical", "electronic", "hip-hop", "hip hop", "country", "indie", "rap", "blues", "folk"]
genre_found = False
# First, check for known genres
for genre in known_genres:
if genre in text_lower:
request["genre"] = genre
genre_found = True
break
# If no known genre found, try to extract a custom genre
if not genre_found:
# Look for patterns like "some [genre] music", "[genre] music", "play [genre]"
# Remove action words and common words to find potential genre
genre_indicators = ["music", "song", "track", "tune"]
words = cleaned_text.split()
# Find words that might be genres (not action words, not common words)
common_words = {"i", "want", "to", "hear", "play", "put", "on", "listen", "some", "a", "an", "the", "me", "my"}
# Look for genre-like words (usually before "music" or standalone)
for i, word in enumerate(words):
# If word is before "music" or similar, it might be a genre
if i < len(words) - 1 and words[i + 1] in genre_indicators:
if word not in common_words and len(word) > 2:
request["genre"] = word
genre_found = True
break
# Or if it's a standalone word that's not a common word
elif word not in common_words and len(word) > 3 and word not in known_genres:
# Check if it looks like a genre (not a song/artist name pattern)
# Simple heuristic: if it's a single word and not capitalized in original, might be genre
if word in text_lower and not word[0].isupper() if word[0].isalpha() else False:
# Additional check: if user said "some [word]" or "[word] music", likely a genre
if i > 0 and words[i-1] in ["some", "any", "a", "an"]:
request["genre"] = word
genre_found = True
break
elif i < len(words) - 1 and words[i+1] in genre_indicators:
request["genre"] = word
genre_found = True
break
# Check for mood keywords
moods = ["happy", "sad", "energetic", "calm", "relaxed", "focused", "upbeat", "chill"]
for mood in moods:
if mood in text_lower:
request["mood"] = mood
break
return request
|