djscanlation / app.py
maggidev's picture
Update app.py
add03f2 verified
from detect_bubbles import detect_bubbles
from process_bubble import extract_bubble_mask
from add_text import add_text
from translator import MangaTranslator
from manga_ocr import MangaOcr
from PIL import Image
import numpy as np
import cv2
import gradio as gr
MODEL = "model.pt"
# Modelos carregados uma única vez
manga_translator = MangaTranslator()
mocr = MangaOcr()
def predict(img, translation_method, font):
# =========================
# Imagens separadas por função
# =========================
original_image = np.array(img).copy() # usada SOMENTE para OCR
render_image = np.array(img).copy() # usada para limpeza + escrita
results = detect_bubbles(MODEL, original_image)
page_mask = np.zeros(original_image.shape[:2], dtype=np.uint8)
bubbles_data = []
# =========================
# 1) OCR + extração de máscara (SEM modificar imagem)
# =========================
for x1, y1, x2, y2, score, class_id in results:
x1, y1, x2, y2 = map(int, (x1, y1, x2, y2))
crop_original = original_image[y1:y2, x1:x2]
# OCR SEMPRE na imagem original
pil_crop = Image.fromarray(crop_original.astype(np.uint8))
text = mocr(pil_crop)
mask, contour = extract_bubble_mask(crop_original)
if mask is None:
continue
page_mask[y1:y2, x1:x2][mask == 255] = 255
bubbles_data.append({
"box": (x1, y1, x2, y2),
"text": text,
"contour": contour
})
# =========================
# 2) Limpeza global da página (UMA VEZ)
# =========================
render_image[page_mask == 255] = (255, 255, 255)
# =========================
# 3) Tradução + escrita
# =========================
for bubble in bubbles_data:
x1, y1, x2, y2 = bubble["box"]
text = bubble["text"]
contour = bubble["contour"]
translated = manga_translator.translate(
text,
method=translation_method
)
render_image[y1:y2, x1:x2] = add_text(
render_image[y1:y2, x1:x2],
translated,
font,
contour
)
return Image.fromarray(render_image)
# =========================
# Interface Gradio
# =========================
demo = gr.Interface(
fn=predict,
inputs=[
"image",
gr.Dropdown(
[
("Google", "google"),
("HuggingFace", "hf"),
("Bing", "bing"),
("Sogou", "sogou")
],
value="google",
label="Translation Method"
),
gr.Dropdown(
[
("Wild Words Roman", "fonts/WildWordsRoman.ttf"),
("Anime Ace Italic", "fonts/animeace_i.ttf"),
("Mangati", "fonts/mangati.ttf"),
("Komika Axis", "fonts/KOMIKAX_.ttf")
],
value="fonts/WildWordsRoman.ttf",
label="Font"
)
],
outputs=gr.Image(),
title="Manga Translator",
description="Translate manga speech bubbles safely and correctly"
)
demo.launch(debug=False, share=False)