Spaces:
Sleeping
Sleeping
File size: 2,590 Bytes
85c18a5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
"""
Text-to-Speech Engine
=====================
Converts text to speech audio using Google Text-to-Speech (gTTS).
Functions:
- synthesize_speech: Convert text to MP3 audio file
- get_audio_duration: Get duration of audio file
"""
from gtts import gTTS
from pathlib import Path
import time
from typing import Literal
import os
def synthesize_speech(
text: str,
language: Literal["en", "hi"],
output_dir: Path,
slow: bool = False
) -> str:
"""
Convert text to speech and save as MP3 file.
Uses Google Text-to-Speech (gTTS) for natural-sounding
speech synthesis in English and Hindi.
Args:
text: Text to convert to speech
language: Language code ('en' for English, 'hi' for Hindi)
output_dir: Directory to save the audio file
slow: If True, speak slowly (useful for language learning)
Returns:
Path to the generated MP3 file
"""
# Ensure output directory exists
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
# Generate unique filename using timestamp
timestamp = int(time.time() * 1000)
output_path = output_dir / f"tts_{timestamp}.mp3"
try:
# Create TTS object and save to file
tts = gTTS(text=text, lang=language, slow=slow)
tts.save(str(output_path))
# Verify file was created
if not output_path.exists():
raise FileNotFoundError(f"TTS file was not created: {output_path}")
# Verify file has content
if output_path.stat().st_size == 0:
raise ValueError("TTS file is empty")
return str(output_path)
except Exception as e:
print(f"TTS Error: {e}")
raise
def get_audio_duration(audio_path: str) -> float:
"""
Get the duration of an audio file in seconds.
Args:
audio_path: Path to the audio file
Returns:
Duration in seconds (estimated if pydub fails)
"""
try:
from pydub import AudioSegment
audio = AudioSegment.from_file(audio_path)
return len(audio) / 1000.0 # Convert milliseconds to seconds
except Exception:
# Fallback: estimate based on file size
# Approximate: MP3 at 128kbps = 16KB per second
try:
file_size = os.path.getsize(audio_path)
return file_size / 16000
except Exception:
return 3.0 # Default 3 seconds |