|
|
import logging |
|
|
import io |
|
|
from gtts import gTTS |
|
|
import numpy as np |
|
|
import wave |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
async def generate_tts(text: str) -> bytes: |
|
|
""" |
|
|
Convert text to speech using Google Text-to-Speech (gTTS). |
|
|
Returns MP3 audio bytes. |
|
|
""" |
|
|
try: |
|
|
if not text or len(text) > 500: |
|
|
raise ValueError("Text must be between 1-500 characters") |
|
|
|
|
|
logger.info(f"TTS: Generating audio for '{text[:50]}...'") |
|
|
|
|
|
|
|
|
tts = gTTS(text=text, lang='en', slow=False) |
|
|
|
|
|
|
|
|
audio_buffer = io.BytesIO() |
|
|
tts.write_to_fp(audio_buffer) |
|
|
audio_bytes = audio_buffer.getvalue() |
|
|
|
|
|
logger.info(f"✓ TTS Success: {len(audio_bytes)} bytes generated") |
|
|
return audio_bytes |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"✗ TTS Error: {str(e)}") |
|
|
|
|
|
return generate_silent_wav() |
|
|
|
|
|
def generate_silent_wav() -> bytes: |
|
|
"""Generate 1-second silent WAV file as fallback""" |
|
|
sample_rate = 22050 |
|
|
duration = 1.0 |
|
|
silence = np.zeros(int(sample_rate * duration), dtype=np.int16) |
|
|
|
|
|
buffer = io.BytesIO() |
|
|
with wave.open(buffer, 'wb') as wav: |
|
|
wav.setnchannels(1) |
|
|
wav.setsampwidth(2) |
|
|
wav.setframerate(sample_rate) |
|
|
wav.writeframes(silence.tobytes()) |
|
|
|
|
|
return buffer.getvalue() |