bigbossmonster's picture
Upload 24 files
ea81969 verified
import { AI_VOICE_PROMPT } from '../../../prompts/aiVoice.js';
import { tryModels, getPrompt, DEFAULT_SAFETY_SETTINGS } from '@/backend/services/ai/utils';
import { Modality } from '@google/genai';
export async function tts(text, voiceName, tone, apiKey, isOwnApi = false) {
// Strict requirement: Max 3500 characters
const ABSOLUTE_MAX_LENGTH = 3500;
if (text && text.length > ABSOLUTE_MAX_LENGTH) {
throw new Error(`Text is too long (${text.length} chars). Maximum allowed is ${ABSOLUTE_MAX_LENGTH} characters.`);
}
const models = ['gemini-2.5-flash-preview-tts', 'gemini-2.5-pro-preview-tts'];
const promptInstructions = AI_VOICE_PROMPT(tone);
// For TTS models, prepending the tone instructions to the text is more stable than systemInstruction
const textWithInstructions = `${promptInstructions}\n\nSCRIPT TO SPEAK:\n${text}`;
console.log(`[TTS] Generating content... Length: ${text?.length || 0}`);
const chunkBase64 = await tryModels(apiKey, models, async (ai, model) => {
const response = await ai.models.generateContent({
model: model,
contents: [{ parts: [{ text: textWithInstructions }] }],
config: {
responseModalities: [Modality.AUDIO],
safetySettings: DEFAULT_SAFETY_SETTINGS,
speechConfig: {
voiceConfig: {
prebuiltVoiceConfig: {
voiceName: voiceName || 'Zephyr'
}
}
}
}
});
const part = response.candidates?.[0]?.content?.parts.find(p => p.inlineData);
if (part?.inlineData?.data) return part.inlineData.data;
throw new Error("EMPTY_AUDIO_DATA_FROM_GEMINI");
});
return chunkBase64;
}