|
|
import uuid |
|
|
import base64 |
|
|
from pathlib import Path |
|
|
from config import GROQ_TTS_API_KEY, GROQ_TTS_MODEL |
|
|
from gtts import gTTS |
|
|
from fastapi import HTTPException |
|
|
|
|
|
def text_to_speech( |
|
|
text: str, |
|
|
voice: str = "en", |
|
|
fmt: str = "mp3", |
|
|
) -> str: |
|
|
""" |
|
|
Convert text to speech using gTTS (Google Translate, free). |
|
|
Only MP3 is supported. |
|
|
Returns file path. |
|
|
""" |
|
|
if not text or not text.strip(): |
|
|
raise ValueError("Text cannot be empty") |
|
|
|
|
|
if fmt != "mp3": |
|
|
raise ValueError("Only MP3 format is supported by the free TTS backend") |
|
|
|
|
|
try: |
|
|
temp_dir = Path("temp_audio") |
|
|
temp_dir.mkdir(exist_ok=True) |
|
|
|
|
|
output_filename = f"tts_{uuid.uuid4().hex[:8]}.{fmt}" |
|
|
output_path = temp_dir / output_filename |
|
|
|
|
|
|
|
|
tts = gTTS(text=text.strip(), lang=voice or "en") |
|
|
tts.save(str(output_path)) |
|
|
|
|
|
return str(output_path) |
|
|
|
|
|
except Exception as e: |
|
|
raise Exception(f"Unexpected error in text_to_speech: {str(e)}") |
|
|
|
|
|
|
|
|
def text_to_speech_base64( |
|
|
text: str, |
|
|
voice: str = "en", |
|
|
fmt: str = "mp3", |
|
|
) -> dict: |
|
|
""" |
|
|
Convert text to speech and return as Base64. |
|
|
Only MP3 is supported. |
|
|
Returns dict with Base64 and metadata. |
|
|
""" |
|
|
if not text or not text.strip(): |
|
|
raise ValueError("Text cannot be empty") |
|
|
|
|
|
if fmt != "mp3": |
|
|
raise ValueError("Only MP3 format is supported by the free TTS backend") |
|
|
|
|
|
try: |
|
|
temp_dir = Path("temp_audio") |
|
|
temp_dir.mkdir(exist_ok=True) |
|
|
|
|
|
output_filename = f"tts_{uuid.uuid4().hex[:8]}.{fmt}" |
|
|
output_path = temp_dir / output_filename |
|
|
|
|
|
|
|
|
tts = gTTS(text=text.strip(), lang=voice or "en") |
|
|
tts.save(str(output_path)) |
|
|
|
|
|
|
|
|
with open(output_path, "rb") as audio_file: |
|
|
audio_bytes = audio_file.read() |
|
|
audio_base64 = base64.b64encode(audio_bytes).decode('utf-8') |
|
|
|
|
|
|
|
|
output_path.unlink(missing_ok=True) |
|
|
|
|
|
return { |
|
|
"audio_base64": audio_base64, |
|
|
"mime_type": "audio/mpeg", |
|
|
"format": fmt, |
|
|
"filename": output_filename, |
|
|
"size_bytes": len(audio_bytes), |
|
|
"size_base64": len(audio_base64) |
|
|
} |
|
|
|
|
|
except Exception as e: |
|
|
raise Exception(f"Unexpected error in text_to_speech_base64: {str(e)}") |