import { AI_VOICE_PROMPT } from '../../../prompts/aiVoice.js'; import { tryModels, getPrompt, DEFAULT_SAFETY_SETTINGS } from '@/backend/services/ai/utils'; import { Modality } from '@google/genai'; export async function tts(text, voiceName, tone, apiKey, isOwnApi = false) { // Strict requirement: Max 3500 characters const ABSOLUTE_MAX_LENGTH = 3500; if (text && text.length > ABSOLUTE_MAX_LENGTH) { throw new Error(`Text is too long (${text.length} chars). Maximum allowed is ${ABSOLUTE_MAX_LENGTH} characters.`); } const models = ['gemini-2.5-flash-preview-tts', 'gemini-2.5-pro-preview-tts']; const promptInstructions = AI_VOICE_PROMPT(tone); // For TTS models, prepending the tone instructions to the text is more stable than systemInstruction const textWithInstructions = `${promptInstructions}\n\nSCRIPT TO SPEAK:\n${text}`; console.log(`[TTS] Generating content... Length: ${text?.length || 0}`); const chunkBase64 = await tryModels(apiKey, models, async (ai, model) => { const response = await ai.models.generateContent({ model: model, contents: [{ parts: [{ text: textWithInstructions }] }], config: { responseModalities: [Modality.AUDIO], safetySettings: DEFAULT_SAFETY_SETTINGS, speechConfig: { voiceConfig: { prebuiltVoiceConfig: { voiceName: voiceName || 'Zephyr' } } } } }); const part = response.candidates?.[0]?.content?.parts.find(p => p.inlineData); if (part?.inlineData?.data) return part.inlineData.data; throw new Error("EMPTY_AUDIO_DATA_FROM_GEMINI"); }); return chunkBase64; }