from detect_bubbles import detect_bubbles from process_bubble import extract_bubble_mask from add_text import add_text from translator import MangaTranslator from manga_ocr import MangaOcr from PIL import Image import numpy as np import cv2 import gradio as gr MODEL = "model.pt" # Modelos carregados uma única vez manga_translator = MangaTranslator() mocr = MangaOcr() def predict(img, translation_method, font): # ========================= # Imagens separadas por função # ========================= original_image = np.array(img).copy() # usada SOMENTE para OCR render_image = np.array(img).copy() # usada para limpeza + escrita results = detect_bubbles(MODEL, original_image) page_mask = np.zeros(original_image.shape[:2], dtype=np.uint8) bubbles_data = [] # ========================= # 1) OCR + extração de máscara (SEM modificar imagem) # ========================= for x1, y1, x2, y2, score, class_id in results: x1, y1, x2, y2 = map(int, (x1, y1, x2, y2)) crop_original = original_image[y1:y2, x1:x2] # OCR SEMPRE na imagem original pil_crop = Image.fromarray(crop_original.astype(np.uint8)) text = mocr(pil_crop) mask, contour = extract_bubble_mask(crop_original) if mask is None: continue page_mask[y1:y2, x1:x2][mask == 255] = 255 bubbles_data.append({ "box": (x1, y1, x2, y2), "text": text, "contour": contour }) # ========================= # 2) Limpeza global da página (UMA VEZ) # ========================= render_image[page_mask == 255] = (255, 255, 255) # ========================= # 3) Tradução + escrita # ========================= for bubble in bubbles_data: x1, y1, x2, y2 = bubble["box"] text = bubble["text"] contour = bubble["contour"] translated = manga_translator.translate( text, method=translation_method ) render_image[y1:y2, x1:x2] = add_text( render_image[y1:y2, x1:x2], translated, font, contour ) return Image.fromarray(render_image) # ========================= # Interface Gradio # ========================= demo = gr.Interface( fn=predict, inputs=[ "image", gr.Dropdown( [ ("Google", "google"), ("HuggingFace", "hf"), ("Bing", "bing"), ("Sogou", "sogou") ], value="google", label="Translation Method" ), gr.Dropdown( [ ("Wild Words Roman", "fonts/WildWordsRoman.ttf"), ("Anime Ace Italic", "fonts/animeace_i.ttf"), ("Mangati", "fonts/mangati.ttf"), ("Komika Axis", "fonts/KOMIKAX_.ttf") ], value="fonts/WildWordsRoman.ttf", label="Font" ) ], outputs=gr.Image(), title="Manga Translator", description="Translate manga speech bubbles safely and correctly" ) demo.launch(debug=False, share=False)