Spaces:
Sleeping
Sleeping
File size: 3,191 Bytes
6add590 4d840e4 6add590 f16ef2b 6add590 4d840e4 6add590 f16ef2b add03f2 f16ef2b 6add590 0940960 add03f2 35019e5 add03f2 6add590 add03f2 4d840e4 6add590 add03f2 4d840e4 6add590 add03f2 4d840e4 6add590 4d840e4 f16ef2b add03f2 6add590 4d840e4 add03f2 f16ef2b 6add590 add03f2 4d840e4 f16ef2b 4d840e4 f16ef2b 6add590 add03f2 6add590 add03f2 f16ef2b add03f2 4d840e4 add03f2 f16ef2b add03f2 4d840e4 f16ef2b add03f2 f16ef2b add03f2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 | from detect_bubbles import detect_bubbles
from process_bubble import extract_bubble_mask
from add_text import add_text
from translator import MangaTranslator
from manga_ocr import MangaOcr
from PIL import Image
import numpy as np
import cv2
import gradio as gr
MODEL = "model.pt"
# Modelos carregados uma única vez
manga_translator = MangaTranslator()
mocr = MangaOcr()
def predict(img, translation_method, font):
# =========================
# Imagens separadas por função
# =========================
original_image = np.array(img).copy() # usada SOMENTE para OCR
render_image = np.array(img).copy() # usada para limpeza + escrita
results = detect_bubbles(MODEL, original_image)
page_mask = np.zeros(original_image.shape[:2], dtype=np.uint8)
bubbles_data = []
# =========================
# 1) OCR + extração de máscara (SEM modificar imagem)
# =========================
for x1, y1, x2, y2, score, class_id in results:
x1, y1, x2, y2 = map(int, (x1, y1, x2, y2))
crop_original = original_image[y1:y2, x1:x2]
# OCR SEMPRE na imagem original
pil_crop = Image.fromarray(crop_original.astype(np.uint8))
text = mocr(pil_crop)
mask, contour = extract_bubble_mask(crop_original)
if mask is None:
continue
page_mask[y1:y2, x1:x2][mask == 255] = 255
bubbles_data.append({
"box": (x1, y1, x2, y2),
"text": text,
"contour": contour
})
# =========================
# 2) Limpeza global da página (UMA VEZ)
# =========================
render_image[page_mask == 255] = (255, 255, 255)
# =========================
# 3) Tradução + escrita
# =========================
for bubble in bubbles_data:
x1, y1, x2, y2 = bubble["box"]
text = bubble["text"]
contour = bubble["contour"]
translated = manga_translator.translate(
text,
method=translation_method
)
render_image[y1:y2, x1:x2] = add_text(
render_image[y1:y2, x1:x2],
translated,
font,
contour
)
return Image.fromarray(render_image)
# =========================
# Interface Gradio
# =========================
demo = gr.Interface(
fn=predict,
inputs=[
"image",
gr.Dropdown(
[
("Google", "google"),
("HuggingFace", "hf"),
("Bing", "bing"),
("Sogou", "sogou")
],
value="google",
label="Translation Method"
),
gr.Dropdown(
[
("Wild Words Roman", "fonts/WildWordsRoman.ttf"),
("Anime Ace Italic", "fonts/animeace_i.ttf"),
("Mangati", "fonts/mangati.ttf"),
("Komika Axis", "fonts/KOMIKAX_.ttf")
],
value="fonts/WildWordsRoman.ttf",
label="Font"
)
],
outputs=gr.Image(),
title="Manga Translator",
description="Translate manga speech bubbles safely and correctly"
)
demo.launch(debug=False, share=False)
|