"""Minimal multi-speaker TTS example. This script makes a single multi-speaker TTS request via the project's `tts_provider.generate_tts` adapter. It does NOT use ffmpeg, segmentation, or any fallbacks. Usage: python scripts/simple_multi_speaker.py Set environment variables as needed (GOOGLE_API_KEY or GEMINI_API_KEY). The script will write `logs/simple_multi_out.mp3` if audio bytes are returned. """ import os from datetime import datetime try: from tts_provider import generate_tts except Exception as e: print('Failed to import tts_provider:', e) raise MODEL = os.environ.get('TTS_MODEL', 'models/gemini-2.5-flash-preview-tts') OUT_DIR = 'logs' os.makedirs(OUT_DIR, exist_ok=True) # Build a simple multi-speaker script in one string. Use 'Name: text' markers. script = ( "Dr. Anya: Welcome to the short briefing. Today we summarize the main research findings. " "Liam: Thanks Anya. Could you highlight the top policy recommendation?" ) # Build a minimal multi-speaker voice config matching expected format speaker_voice_configs = [ {'speaker': 'Dr. Anya', 'voice_config': {'prebuilt_voice_config': {'voice_name': 'Kore'}}}, {'speaker': 'Liam', 'voice_config': {'prebuilt_voice_config': {'voice_name': 'Puck'}}} ] print('Calling generate_tts with model=', MODEL) bytes_out, err, provider = generate_tts(script, speaker_voice_configs=speaker_voice_configs, model=MODEL) if err: print('TTS error:', err) else: if bytes_out: ts = datetime.utcnow().strftime('%Y%m%dT%H%M%SZ') out_path = os.path.join(OUT_DIR, f'simple_multi_out_{ts}.mp3') with open(out_path, 'wb') as f: f.write(bytes_out) print('Wrote audio to', out_path) else: print('No audio bytes returned; provider=', provider) print('Provider reported as:', provider)