fcas / simple_multi_speaker.py
lsempe's picture
Update app and remove old files
48c2af7
"""Minimal multi-speaker TTS example.
This script makes a single multi-speaker TTS request via the project's
`tts_provider.generate_tts` adapter. It does NOT use ffmpeg, segmentation,
or any fallbacks.
Usage: python scripts/simple_multi_speaker.py
Set environment variables as needed (GOOGLE_API_KEY or GEMINI_API_KEY). The
script will write `logs/simple_multi_out.mp3` if audio bytes are returned.
"""
import os
from datetime import datetime
try:
from tts_provider import generate_tts
except Exception as e:
print('Failed to import tts_provider:', e)
raise
MODEL = os.environ.get('TTS_MODEL', 'models/gemini-2.5-flash-preview-tts')
OUT_DIR = 'logs'
os.makedirs(OUT_DIR, exist_ok=True)
# Build a simple multi-speaker script in one string. Use 'Name: text' markers.
script = (
"Dr. Anya: Welcome to the short briefing. Today we summarize the main research findings. "
"Liam: Thanks Anya. Could you highlight the top policy recommendation?"
)
# Build a minimal multi-speaker voice config matching expected format
speaker_voice_configs = [
{'speaker': 'Dr. Anya', 'voice_config': {'prebuilt_voice_config': {'voice_name': 'Kore'}}},
{'speaker': 'Liam', 'voice_config': {'prebuilt_voice_config': {'voice_name': 'Puck'}}}
]
print('Calling generate_tts with model=', MODEL)
bytes_out, err, provider = generate_tts(script, speaker_voice_configs=speaker_voice_configs, model=MODEL)
if err:
print('TTS error:', err)
else:
if bytes_out:
ts = datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
out_path = os.path.join(OUT_DIR, f'simple_multi_out_{ts}.mp3')
with open(out_path, 'wb') as f:
f.write(bytes_out)
print('Wrote audio to', out_path)
else:
print('No audio bytes returned; provider=', provider)
print('Provider reported as:', provider)