Spaces:

bigbossmonster
/

transcriptmaster

Running

App Files Files Community

transcriptmaster / backend /services /ai /tts.js

bigbossmonster's picture

Upload 24 files

ea81969 verified about 2 months ago

history blame contribute delete

1.89 kB

	import { AI_VOICE_PROMPT } from '../../../prompts/aiVoice.js';

	import { tryModels, getPrompt, DEFAULT_SAFETY_SETTINGS } from '@/backend/services/ai/utils';
	import { Modality } from '@google/genai';

	export async function tts(text, voiceName, tone, apiKey, isOwnApi = false) {
	// Strict requirement: Max 3500 characters
	const ABSOLUTE_MAX_LENGTH = 3500;
	if (text && text.length > ABSOLUTE_MAX_LENGTH) {
	throw new Error(`Text is too long (${text.length} chars). Maximum allowed is ${ABSOLUTE_MAX_LENGTH} characters.`);
	}

	const models = ['gemini-2.5-flash-preview-tts', 'gemini-2.5-pro-preview-tts'];
	const promptInstructions = AI_VOICE_PROMPT(tone);

	// For TTS models, prepending the tone instructions to the text is more stable than systemInstruction
	const textWithInstructions = `${promptInstructions}\n\nSCRIPT TO SPEAK:\n${text}`;

	console.log(`[TTS] Generating content... Length: ${text?.length \|\| 0}`);

	const chunkBase64 = await tryModels(apiKey, models, async (ai, model) => {
	const response = await ai.models.generateContent({
	model: model,
	contents: [{ parts: [{ text: textWithInstructions }] }],
	config: {
	responseModalities: [Modality.AUDIO],
	safetySettings: DEFAULT_SAFETY_SETTINGS,
	speechConfig: {
	voiceConfig: {
	prebuiltVoiceConfig: {
	voiceName: voiceName \|\| 'Zephyr'
	}
	}
	}
	}
	});

	const part = response.candidates?.[0]?.content?.parts.find(p => p.inlineData);
	if (part?.inlineData?.data) return part.inlineData.data;
	throw new Error("EMPTY_AUDIO_DATA_FROM_GEMINI");
	});

	return chunkBase64;
	}