Spaces:

nihun
/

Translator

Sleeping

App Files Files Community

Translator / utils /speech_to_text.py

nihun

Upload 19 files

85c18a5 verified 2 months ago

raw

history blame contribute delete

3.37 kB

	"""
	Speech-to-Text Module
	=====================
	Converts voice/audio input to text using speech recognition.

	Functions:
	- transcribe_audio: Convert audio file to text
	- transcribe_from_microphone: Real-time microphone transcription
	"""

	import speech_recognition as sr
	from pathlib import Path
	from typing import Optional, Tuple
	import tempfile
	from pydub import AudioSegment


	def transcribe_audio(
	audio_path: str,
	language: str = "en-US"
	) -> Tuple[str, bool]:
	"""
	Transcribe audio file to text using Google Speech Recognition.

	Supports various audio formats (WAV, MP3, etc.) and converts
	them automatically for processing.

	Args:
	audio_path: Path to the audio file
	language: Language code for recognition
	- 'en-US' for English (US)
	- 'hi-IN' for Hindi (India)

	Returns:
	Tuple of (transcribed_text, success_flag)
	- If successful: (text, True)
	- If failed: (error_message, False)

	Example:
	>>> text, success = transcribe_audio("recording.wav", "en-US")
	>>> if success:
	... print(f"You said: {text}")
	... else:
	... print(f"Error: {text}")

	Supported Formats:
	- WAV (recommended)
	- MP3
	- FLAC
	- OGG
	"""
	recognizer = sr.Recognizer()

	try:
	# Convert audio to WAV format if needed
	audio_path = Path(audio_path)

	if audio_path.suffix.lower() != '.wav':
	# Convert to WAV using pydub
	audio = AudioSegment.from_file(str(audio_path))

	# Create temporary WAV file
	with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
	wav_path = tmp.name
	audio.export(wav_path, format='wav')
	else:
	wav_path = str(audio_path)

	# Load audio file for recognition
	with sr.AudioFile(wav_path) as source:
	# Adjust for ambient noise
	recognizer.adjust_for_ambient_noise(source, duration=0.5)

	# Record the audio
	audio_data = recognizer.record(source)

	# Perform speech recognition
	text = recognizer.recognize_google(audio_data, language=language)

	return text, True

	except sr.UnknownValueError:
	return "Could not understand the audio. Please speak clearly.", False

	except sr.RequestError as e:
	return f"Speech recognition service error: {str(e)}", False

	except Exception as e:
	return f"Error processing audio: {str(e)}", False


	def get_language_code(lang: str) -> str:
	"""
	Convert short language code to full speech recognition code.

	Args:
	lang: Short language code ('en' or 'hi')

	Returns:
	Full language code for speech recognition

	Example:
	>>> get_language_code('en')
	'en-US'
	>>> get_language_code('hi')
	'hi-IN'
	"""
	language_map = {
	'en': 'en-US',
	'hi': 'hi-IN',
	'auto': 'en-US' # Default to English for auto
	}
	return language_map.get(lang, 'en-US')