File size: 3,251 Bytes
ea81969
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import { COMIC_TRANSLATOR_PROMPT } from '../../../prompts/comicTranslator.js';

import { Type } from '@google/genai';
import { tryModels, getPrompt, DEFAULT_SAFETY_SETTINGS, cleanJson } from '@/backend/services/ai/utils';

export async function comicTranslate(media, mimeType, targetLanguage, apiKey, isOwnApi = false) {
    const models = ['gemini-3-flash-preview', 'gemini-flash-lite-latest'];
    const isBurmese = targetLanguage.toLowerCase().includes('burm') || targetLanguage.includes('မြန်မာ');
    
    const finalPrompt = COMIC_TRANSLATOR_PROMPT(targetLanguage);

    // AI identifies text locations and translations
    return await tryModels(apiKey, models, async (ai, model) => {
        const response = await ai.models.generateContent({
            model: model,
            contents: { 
                parts: [
                    { inlineData: { data: media, mimeType } }, 
                    { text: "TASK: Process this document page by page. For each page, identify all text bubbles. Provide their [ymin, xmin, ymax, xmax] coordinates and the translated text in " + targetLanguage + ". Output ONLY valid JSON." }
                ] 
            },
            config: { 
                temperature: 0.1, 
                systemInstruction: finalPrompt, 
                safetySettings: DEFAULT_SAFETY_SETTINGS,
                responseMimeType: "application/json",
                responseSchema: {
                    type: Type.OBJECT,
                    properties: {
                        pages: {
                            type: Type.ARRAY,
                            items: {
                                type: Type.OBJECT,
                                properties: {
                                    page_number: { type: Type.INTEGER },
                                    text_blocks: {
                                        type: Type.ARRAY,
                                        items: {
                                            type: Type.OBJECT,
                                            properties: {
                                                translated_text: { type: Type.STRING },
                                                box_2d: { 
                                                    type: Type.ARRAY, 
                                                    items: { type: Type.NUMBER },
                                                    description: "[ymin, xmin, ymax, xmax] coordinates normalized 0-1000"
                                                },
                                                background_color: { type: Type.STRING }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    },
                    required: ['pages']
                }
            }
        });
        
        // The backend server will receive this JSON and perform the heavy image manipulation
        // returning a final processed URL or Base64 to the client.
        return JSON.parse(cleanJson(response.text));
    });
}