transcriptmaster / backend /services /ai /comicTranslator.js
bigbossmonster's picture
Upload 24 files
ea81969 verified
import { COMIC_TRANSLATOR_PROMPT } from '../../../prompts/comicTranslator.js';
import { Type } from '@google/genai';
import { tryModels, getPrompt, DEFAULT_SAFETY_SETTINGS, cleanJson } from '@/backend/services/ai/utils';
export async function comicTranslate(media, mimeType, targetLanguage, apiKey, isOwnApi = false) {
const models = ['gemini-3-flash-preview', 'gemini-flash-lite-latest'];
const isBurmese = targetLanguage.toLowerCase().includes('burm') || targetLanguage.includes('မြန်မာ');
const finalPrompt = COMIC_TRANSLATOR_PROMPT(targetLanguage);
// AI identifies text locations and translations
return await tryModels(apiKey, models, async (ai, model) => {
const response = await ai.models.generateContent({
model: model,
contents: {
parts: [
{ inlineData: { data: media, mimeType } },
{ text: "TASK: Process this document page by page. For each page, identify all text bubbles. Provide their [ymin, xmin, ymax, xmax] coordinates and the translated text in " + targetLanguage + ". Output ONLY valid JSON." }
]
},
config: {
temperature: 0.1,
systemInstruction: finalPrompt,
safetySettings: DEFAULT_SAFETY_SETTINGS,
responseMimeType: "application/json",
responseSchema: {
type: Type.OBJECT,
properties: {
pages: {
type: Type.ARRAY,
items: {
type: Type.OBJECT,
properties: {
page_number: { type: Type.INTEGER },
text_blocks: {
type: Type.ARRAY,
items: {
type: Type.OBJECT,
properties: {
translated_text: { type: Type.STRING },
box_2d: {
type: Type.ARRAY,
items: { type: Type.NUMBER },
description: "[ymin, xmin, ymax, xmax] coordinates normalized 0-1000"
},
background_color: { type: Type.STRING }
}
}
}
}
}
}
},
required: ['pages']
}
}
});
// The backend server will receive this JSON and perform the heavy image manipulation
// returning a final processed URL or Base64 to the client.
return JSON.parse(cleanJson(response.text));
});
}