Spaces:
Sleeping
Sleeping
| """ | |
| Speech-to-Text Module | |
| ===================== | |
| Converts voice/audio input to text using speech recognition. | |
| Functions: | |
| - transcribe_audio: Convert audio file to text | |
| - transcribe_from_microphone: Real-time microphone transcription | |
| """ | |
| import speech_recognition as sr | |
| from pathlib import Path | |
| from typing import Optional, Tuple | |
| import tempfile | |
| from pydub import AudioSegment | |
| def transcribe_audio( | |
| audio_path: str, | |
| language: str = "en-US" | |
| ) -> Tuple[str, bool]: | |
| """ | |
| Transcribe audio file to text using Google Speech Recognition. | |
| Supports various audio formats (WAV, MP3, etc.) and converts | |
| them automatically for processing. | |
| Args: | |
| audio_path: Path to the audio file | |
| language: Language code for recognition | |
| - 'en-US' for English (US) | |
| - 'hi-IN' for Hindi (India) | |
| Returns: | |
| Tuple of (transcribed_text, success_flag) | |
| - If successful: (text, True) | |
| - If failed: (error_message, False) | |
| Example: | |
| >>> text, success = transcribe_audio("recording.wav", "en-US") | |
| >>> if success: | |
| ... print(f"You said: {text}") | |
| ... else: | |
| ... print(f"Error: {text}") | |
| Supported Formats: | |
| - WAV (recommended) | |
| - MP3 | |
| - FLAC | |
| - OGG | |
| """ | |
| recognizer = sr.Recognizer() | |
| try: | |
| # Convert audio to WAV format if needed | |
| audio_path = Path(audio_path) | |
| if audio_path.suffix.lower() != '.wav': | |
| # Convert to WAV using pydub | |
| audio = AudioSegment.from_file(str(audio_path)) | |
| # Create temporary WAV file | |
| with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp: | |
| wav_path = tmp.name | |
| audio.export(wav_path, format='wav') | |
| else: | |
| wav_path = str(audio_path) | |
| # Load audio file for recognition | |
| with sr.AudioFile(wav_path) as source: | |
| # Adjust for ambient noise | |
| recognizer.adjust_for_ambient_noise(source, duration=0.5) | |
| # Record the audio | |
| audio_data = recognizer.record(source) | |
| # Perform speech recognition | |
| text = recognizer.recognize_google(audio_data, language=language) | |
| return text, True | |
| except sr.UnknownValueError: | |
| return "Could not understand the audio. Please speak clearly.", False | |
| except sr.RequestError as e: | |
| return f"Speech recognition service error: {str(e)}", False | |
| except Exception as e: | |
| return f"Error processing audio: {str(e)}", False | |
| def get_language_code(lang: str) -> str: | |
| """ | |
| Convert short language code to full speech recognition code. | |
| Args: | |
| lang: Short language code ('en' or 'hi') | |
| Returns: | |
| Full language code for speech recognition | |
| Example: | |
| >>> get_language_code('en') | |
| 'en-US' | |
| >>> get_language_code('hi') | |
| 'hi-IN' | |
| """ | |
| language_map = { | |
| 'en': 'en-US', | |
| 'hi': 'hi-IN', | |
| 'auto': 'en-US' # Default to English for auto | |
| } | |
| return language_map.get(lang, 'en-US') |