djscanlation

Sleeping

File size: 3,191 Bytes

6add590
4d840e4
 
6add590
 
f16ef2b
6add590
 
4d840e4
 
6add590
 
f16ef2b
add03f2
f16ef2b
 
 
6add590
0940960
add03f2
 
 
 
 
35019e5
add03f2
6add590
add03f2
4d840e4
6add590
add03f2
 
 
4d840e4
 
6add590
add03f2
 
 
 
 
 
 
4d840e4
 
6add590
4d840e4
f16ef2b
add03f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6add590
4d840e4
add03f2
 
f16ef2b
6add590
add03f2
 
4d840e4
f16ef2b
4d840e4
f16ef2b
6add590
add03f2
 
6add590
add03f2
 
 
f16ef2b
 
 
 
 
 
add03f2
 
 
 
 
 
4d840e4
add03f2
f16ef2b
 
add03f2
 
 
 
 
 
4d840e4
 
f16ef2b
 
 
add03f2
 
f16ef2b
 
add03f2

from detect_bubbles import detect_bubbles
from process_bubble import extract_bubble_mask
from add_text import add_text
from translator import MangaTranslator
from manga_ocr import MangaOcr

from PIL import Image
import numpy as np
import cv2
import gradio as gr

MODEL = "model.pt"

# Modelos carregados uma única vez
manga_translator = MangaTranslator()
mocr = MangaOcr()


def predict(img, translation_method, font):
    # =========================
    # Imagens separadas por função
    # =========================
    original_image = np.array(img).copy()   # usada SOMENTE para OCR
    render_image = np.array(img).copy()     # usada para limpeza + escrita

    results = detect_bubbles(MODEL, original_image)

    page_mask = np.zeros(original_image.shape[:2], dtype=np.uint8)
    bubbles_data = []

    # =========================
    # 1) OCR + extração de máscara (SEM modificar imagem)
    # =========================
    for x1, y1, x2, y2, score, class_id in results:
        x1, y1, x2, y2 = map(int, (x1, y1, x2, y2))

        crop_original = original_image[y1:y2, x1:x2]

        # OCR SEMPRE na imagem original
        pil_crop = Image.fromarray(crop_original.astype(np.uint8))
        text = mocr(pil_crop)

        mask, contour = extract_bubble_mask(crop_original)
        if mask is None:
            continue

        page_mask[y1:y2, x1:x2][mask == 255] = 255

        bubbles_data.append({
            "box": (x1, y1, x2, y2),
            "text": text,
            "contour": contour
        })

    # =========================
    # 2) Limpeza global da página (UMA VEZ)
    # =========================
    render_image[page_mask == 255] = (255, 255, 255)

    # =========================
    # 3) Tradução + escrita
    # =========================
    for bubble in bubbles_data:
        x1, y1, x2, y2 = bubble["box"]
        text = bubble["text"]
        contour = bubble["contour"]

        translated = manga_translator.translate(
            text,
            method=translation_method
        )

        render_image[y1:y2, x1:x2] = add_text(
            render_image[y1:y2, x1:x2],
            translated,
            font,
            contour
        )

    return Image.fromarray(render_image)


# =========================
# Interface Gradio
# =========================

demo = gr.Interface(
    fn=predict,
    inputs=[
        "image",
        gr.Dropdown(
            [
                ("Google", "google"),
                ("HuggingFace", "hf"),
                ("Bing", "bing"),
                ("Sogou", "sogou")
            ],
            value="google",
            label="Translation Method"
        ),
        gr.Dropdown(
            [
                ("Wild Words Roman", "fonts/WildWordsRoman.ttf"),
                ("Anime Ace Italic", "fonts/animeace_i.ttf"),
                ("Mangati", "fonts/mangati.ttf"),
                ("Komika Axis", "fonts/KOMIKAX_.ttf")
            ],
            value="fonts/WildWordsRoman.ttf",
            label="Font"
        )
    ],
    outputs=gr.Image(),
    title="Manga Translator",
    description="Translate manga speech bubbles safely and correctly"
)

demo.launch(debug=False, share=False)