djscanlation

Sleeping

App Files Files Community

maggidev commited on Feb 10

Commit

add03f2

verified ·

1 Parent(s): 4d840e4

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -23

app.py CHANGED Viewed

@@ -11,69 +11,107 @@ import gradio as gr
 MODEL = "model.pt"
 manga_translator = MangaTranslator()
 mocr = MangaOcr()
 def predict(img, translation_method, font):
-    image = np.array(img)
-    results = detect_bubbles(MODEL, image)
-    page_mask = np.zeros(image.shape[:2], dtype=np.uint8)
     bubbles_data = []
-    # 1️⃣ Extrai TODAS as máscaras primeiro
     for x1, y1, x2, y2, score, class_id in results:
         x1, y1, x2, y2 = map(int, (x1, y1, x2, y2))
-        crop = image[y1:y2, x1:x2]
-        mask, contour = extract_bubble_mask(crop)
         if mask is None:
             continue
         page_mask[y1:y2, x1:x2][mask == 255] = 255
-        bubbles_data.append((x1, y1, x2, y2, crop, contour))
-    # 2️⃣ Limpa a página UMA VEZ
-    image[page_mask == 255] = (255, 255, 255)
-    # 3️⃣ OCR + Tradução + Escrita
-    for x1, y1, x2, y2, crop, contour in bubbles_data:
-        pil_crop = Image.fromarray(crop.astype(np.uint8))
-        text = mocr(pil_crop)
         translated = manga_translator.translate(
-            text, method=translation_method
         )
-        image[y1:y2, x1:x2] = add_text(
-            image[y1:y2, x1:x2],
             translated,
             font,
             contour
         )
-    return Image.fromarray(image)
 demo = gr.Interface(
     fn=predict,
     inputs=[
         "image",
         gr.Dropdown(
-            [("Google", "google"), ("HF", "hf")],
             value="google",
-            label="Translation"
         ),
         gr.Dropdown(
-            [("WildWords", "fonts/WildWordsRoman.ttf")],
             value="fonts/WildWordsRoman.ttf",
             label="Font"
         )
     ],
     outputs=gr.Image(),
-    title="Manga Translator"
 )
-demo.launch()

 MODEL = "model.pt"
+# Modelos carregados uma única vez
 manga_translator = MangaTranslator()
 mocr = MangaOcr()
 def predict(img, translation_method, font):
+    # =========================
+    # Imagens separadas por função
+    # =========================
+    original_image = np.array(img).copy()   # usada SOMENTE para OCR
+    render_image = np.array(img).copy()     # usada para limpeza + escrita
+    results = detect_bubbles(MODEL, original_image)
+    page_mask = np.zeros(original_image.shape[:2], dtype=np.uint8)
     bubbles_data = []
+    # =========================
+    # 1) OCR + extração de máscara (SEM modificar imagem)
+    # =========================
     for x1, y1, x2, y2, score, class_id in results:
         x1, y1, x2, y2 = map(int, (x1, y1, x2, y2))
+        crop_original = original_image[y1:y2, x1:x2]
+        # OCR SEMPRE na imagem original
+        pil_crop = Image.fromarray(crop_original.astype(np.uint8))
+        text = mocr(pil_crop)
+        mask, contour = extract_bubble_mask(crop_original)
         if mask is None:
             continue
         page_mask[y1:y2, x1:x2][mask == 255] = 255
+        bubbles_data.append({
+            "box": (x1, y1, x2, y2),
+            "text": text,
+            "contour": contour
+        })
+    # =========================
+    # 2) Limpeza global da página (UMA VEZ)
+    # =========================
+    render_image[page_mask == 255] = (255, 255, 255)
+    # =========================
+    # 3) Tradução + escrita
+    # =========================
+    for bubble in bubbles_data:
+        x1, y1, x2, y2 = bubble["box"]
+        text = bubble["text"]
+        contour = bubble["contour"]
         translated = manga_translator.translate(
+            text,
+            method=translation_method
         )
+        render_image[y1:y2, x1:x2] = add_text(
+            render_image[y1:y2, x1:x2],
             translated,
             font,
             contour
         )
+    return Image.fromarray(render_image)
+# =========================
+# Interface Gradio
+# =========================
 demo = gr.Interface(
     fn=predict,
     inputs=[
         "image",
         gr.Dropdown(
+            [
+                ("Google", "google"),
+                ("HuggingFace", "hf"),
+                ("Bing", "bing"),
+                ("Sogou", "sogou")
+            ],
             value="google",
+            label="Translation Method"
         ),
         gr.Dropdown(
+            [
+                ("Wild Words Roman", "fonts/WildWordsRoman.ttf"),
+                ("Anime Ace Italic", "fonts/animeace_i.ttf"),
+                ("Mangati", "fonts/mangati.ttf"),
+                ("Komika Axis", "fonts/KOMIKAX_.ttf")
+            ],
             value="fonts/WildWordsRoman.ttf",
             label="Font"
         )
     ],
     outputs=gr.Image(),
+    title="Manga Translator",
+    description="Translate manga speech bubbles safely and correctly"
 )
+demo.launch(debug=False, share=False)