""" Text-to-Speech Engine ===================== Converts text to speech audio using Google Text-to-Speech (gTTS). Functions: - synthesize_speech: Convert text to MP3 audio file - get_audio_duration: Get duration of audio file """ from gtts import gTTS from pathlib import Path import time from typing import Literal import os def synthesize_speech( text: str, language: Literal["en", "hi"], output_dir: Path, slow: bool = False ) -> str: """ Convert text to speech and save as MP3 file. Uses Google Text-to-Speech (gTTS) for natural-sounding speech synthesis in English and Hindi. Args: text: Text to convert to speech language: Language code ('en' for English, 'hi' for Hindi) output_dir: Directory to save the audio file slow: If True, speak slowly (useful for language learning) Returns: Path to the generated MP3 file """ # Ensure output directory exists output_dir = Path(output_dir) output_dir.mkdir(parents=True, exist_ok=True) # Generate unique filename using timestamp timestamp = int(time.time() * 1000) output_path = output_dir / f"tts_{timestamp}.mp3" try: # Create TTS object and save to file tts = gTTS(text=text, lang=language, slow=slow) tts.save(str(output_path)) # Verify file was created if not output_path.exists(): raise FileNotFoundError(f"TTS file was not created: {output_path}") # Verify file has content if output_path.stat().st_size == 0: raise ValueError("TTS file is empty") return str(output_path) except Exception as e: print(f"TTS Error: {e}") raise def get_audio_duration(audio_path: str) -> float: """ Get the duration of an audio file in seconds. Args: audio_path: Path to the audio file Returns: Duration in seconds (estimated if pydub fails) """ try: from pydub import AudioSegment audio = AudioSegment.from_file(audio_path) return len(audio) / 1000.0 # Convert milliseconds to seconds except Exception: # Fallback: estimate based on file size # Approximate: MP3 at 128kbps = 16KB per second try: file_size = os.path.getsize(audio_path) return file_size / 16000 except Exception: return 3.0 # Default 3 seconds