Spaces:
Sleeping
Sleeping
| # modules/stt_handler.py | |
| import speech_recognition as sr | |
| import os | |
| import tempfile | |
| from pydub import AudioSegment | |
| def transcribe_audio(audio_filepath): | |
| """Transcribe audio with multiple fallback methods""" | |
| if not audio_filepath or not os.path.exists(audio_filepath): | |
| print("β STT Error: No audio file provided or file does not exist.") | |
| return "Sorry, I couldn't process your audio file. Please try recording again." | |
| print(f"ποΈ Transcribing audio file: {audio_filepath}") | |
| recognizer = sr.Recognizer() | |
| try: | |
| # Try to convert audio format if needed | |
| audio_data = None | |
| # First try direct speech recognition | |
| try: | |
| with sr.AudioFile(audio_filepath) as source: | |
| audio_data = recognizer.record(source) | |
| print("β Audio file loaded successfully") | |
| except Exception as audio_load_error: | |
| print(f"β οΈ Direct audio loading failed: {audio_load_error}") | |
| # Try converting with pydub first | |
| try: | |
| print("π Converting audio format...") | |
| audio = AudioSegment.from_file(audio_filepath) | |
| # Export as WAV for better compatibility | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_wav: | |
| temp_wav_path = temp_wav.name | |
| audio.export(temp_wav_path, format="wav") | |
| with sr.AudioFile(temp_wav_path) as source: | |
| audio_data = recognizer.record(source) | |
| print("β Audio converted and loaded successfully") | |
| # Clean up temp file | |
| if os.path.exists(temp_wav_path): | |
| os.remove(temp_wav_path) | |
| except Exception as convert_error: | |
| print(f"β Audio conversion failed: {convert_error}") | |
| return "Sorry, I couldn't process your audio format. Please try recording again." | |
| if not audio_data: | |
| return "Sorry, I couldn't load your audio. Please try recording again." | |
| # Try Whisper transcription | |
| try: | |
| print("π€ Transcribing with Whisper...") | |
| text = recognizer.recognize_whisper(audio_data, language="english") | |
| print(f"β Transcription successful: {text[:100]}...") | |
| return text if text.strip() else "I didn't catch what you said. Could you please speak more clearly?" | |
| except sr.UnknownValueError: | |
| print("β οΈ Whisper could not understand the audio") | |
| return "I couldn't understand what you said. Please speak more clearly and try again." | |
| except sr.RequestError as e: | |
| print(f"β οΈ Whisper service error: {e}") | |
| # Fallback to Google Web Speech API | |
| try: | |
| print("π Falling back to Google Speech Recognition...") | |
| text = recognizer.recognize_google(audio_data) | |
| print(f"β Google transcription successful: {text[:100]}...") | |
| return text if text.strip() else "I didn't catch what you said. Could you please try again?" | |
| except Exception as google_error: | |
| print(f"β Google fallback failed: {google_error}") | |
| return "I'm having trouble with speech recognition. Please try again or check your microphone." | |
| except Exception as e: | |
| print(f"β Unexpected transcription error: {e}") | |
| return f"Sorry, I encountered an error processing your audio. Please try recording again." | |
| finally: | |
| # Clean up the original audio file | |
| if os.path.exists(audio_filepath): | |
| try: | |
| os.remove(audio_filepath) | |
| print(f"ποΈ Cleaned up audio file: {audio_filepath}") | |
| except OSError as e: | |
| print(f"β οΈ Error deleting temp audio file {audio_filepath}: {e}") |