File size: 2,457 Bytes
ea81969
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import { SUBTITLE_PROMPT } from '../../../prompts/subtitle.js';

import { tryModels, getPrompt, cleanSRTOutput, DEFAULT_SAFETY_SETTINGS } from '@/backend/services/ai/utils';
import { transcribe } from '@/backend/services/ai/transcribe';

const formatMsToSRT = (ms) => {
    const hours = Math.floor(ms / 3600000);
    const mins = Math.floor((ms % 3600000) / 60000);
    const secs = Math.floor((ms % 60000) / 1000);
    const mms = Math.floor(ms % 1000);
    return `${hours.toString().padStart(2, '0')}:${mins.toString().padStart(2, '0')}:${secs.toString().padStart(2, '0')},${mms.toString().padStart(3, '0')}`;
};

export async function subtitle(mediaBase64, mimeType, fullScript, apiKey, isOwnApi = false, sourceLanguage = 'English', startOffsetMs = 0, lastScriptIndex = 0) {
    let scriptToProcess = fullScript || "";

    // If script is empty, transcribe the whole media
    if (!scriptToProcess) {
        scriptToProcess = await transcribe(mediaBase64, mimeType, apiKey, isOwnApi);
    }

    const isBurmese = sourceLanguage.toLowerCase().includes('burm') || sourceLanguage.includes('မြန်မာ');
    const promptTemplate = getPrompt('subtitle.txt');
    
    // REMOVED: 15-second restriction logic.
    // ADDED: Explicit instruction to process the ENTIRE audio.
    const finalPrompt = promptTemplate
        .replace('{{script}}', scriptToProcess)
        .replace('{{language}}', isBurmese ? "Burmese (Conversational)" : sourceLanguage)
        .replace('15-second audio input', 'the provided audio file') 
        .replace('Align the script', 'Align the COMPLETE script from start to finish');

    const rawSRT = await tryModels(apiKey, ['gemini-3-flash-preview'], async (ai, model) => {
        const response = await ai.models.generateContent({
            model,
            contents: { 
                parts: [
                    { inlineData: { data: mediaBase64, mimeType: 'audio/wav' } }, 
                    { text: `GENERATE FULL SRT: Listen to this entire file and align every word from the reference script. Return the complete SRT.` }
                ] 
            },
            config: { 
                temperature: 0, 
                systemInstruction: finalPrompt, 
                safetySettings: DEFAULT_SAFETY_SETTINGS
            }
        });
        return cleanSRTOutput(response.text);
    });

    return { 
        srt: rawSRT || "" 
    };
}