import { SUBTITLE_PROMPT } from '../../../prompts/subtitle.js'; import { tryModels, getPrompt, cleanSRTOutput, DEFAULT_SAFETY_SETTINGS } from '@/backend/services/ai/utils'; import { transcribe } from '@/backend/services/ai/transcribe'; const formatMsToSRT = (ms) => { const hours = Math.floor(ms / 3600000); const mins = Math.floor((ms % 3600000) / 60000); const secs = Math.floor((ms % 60000) / 1000); const mms = Math.floor(ms % 1000); return `${hours.toString().padStart(2, '0')}:${mins.toString().padStart(2, '0')}:${secs.toString().padStart(2, '0')},${mms.toString().padStart(3, '0')}`; }; export async function subtitle(mediaBase64, mimeType, fullScript, apiKey, isOwnApi = false, sourceLanguage = 'English', startOffsetMs = 0, lastScriptIndex = 0) { let scriptToProcess = fullScript || ""; // If script is empty, transcribe the whole media if (!scriptToProcess) { scriptToProcess = await transcribe(mediaBase64, mimeType, apiKey, isOwnApi); } const isBurmese = sourceLanguage.toLowerCase().includes('burm') || sourceLanguage.includes('မြန်မာ'); const promptTemplate = getPrompt('subtitle.txt'); // REMOVED: 15-second restriction logic. // ADDED: Explicit instruction to process the ENTIRE audio. const finalPrompt = promptTemplate .replace('{{script}}', scriptToProcess) .replace('{{language}}', isBurmese ? "Burmese (Conversational)" : sourceLanguage) .replace('15-second audio input', 'the provided audio file') .replace('Align the script', 'Align the COMPLETE script from start to finish'); const rawSRT = await tryModels(apiKey, ['gemini-3-flash-preview'], async (ai, model) => { const response = await ai.models.generateContent({ model, contents: { parts: [ { inlineData: { data: mediaBase64, mimeType: 'audio/wav' } }, { text: `GENERATE FULL SRT: Listen to this entire file and align every word from the reference script. Return the complete SRT.` } ] }, config: { temperature: 0, systemInstruction: finalPrompt, safetySettings: DEFAULT_SAFETY_SETTINGS } }); return cleanSRTOutput(response.text); }); return { srt: rawSRT || "" }; }