djscanlation

Sleeping

App Files Files Community

maggidev commited on 16 days ago

Commit

f16ef2b

verified ·

1 Parent(s): c3fe4d5

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -42

app.py CHANGED Viewed

@@ -2,73 +2,117 @@ from add_text import add_text
 from detect_bubbles import detect_bubbles
 from process_bubble import process_bubble
 from translator import MangaTranslator
-from ultralytics import YOLO
 from manga_ocr import MangaOcr
 from PIL import Image
 import gradio as gr
 import numpy as np
-import cv2
 MODEL = "model.pt"
-EXAMPLE_LIST = [["examples/0.png"],
-                 ["examples/ex0.png"]]
 TITLE = "Manga Translator"
 DESCRIPTION = "Translate text in manga bubbles!"
 def predict(img, translation_method, font):
-    if translation_method == None:
-        translation_method = "google"
-    if font == None:
-        font = "fonts/WildWordsRoman.ttf"
-    results = detect_bubbles(MODEL, img)
-    manga_translator = MangaTranslator()
-    mocr = MangaOcr()
     image = np.array(img)
     for result in results:
         x1, y1, x2, y2, score, class_id = result
-        detected_image = image[int(y1):int(y2), int(x1):int(x2)]
-        im = Image.fromarray(np.uint8((detected_image)*255))
-        text = mocr(im)
         detected_image, cont = process_bubble(detected_image)
-        text_translated = manga_translator.translate(text,
-                                                     method=translation_method)
-        image[int(y1):int(y2), int(x1):int(x2)] = add_text(detected_image, text_translated, font, cont)
     return Image.fromarray(image)
-demo = gr.Interface(fn=predict,
-                    inputs=["image",
-                            gr.Dropdown([("Google", "google"),
-                                         ("Helsinki-NLP's opus-mt-ja-en model",
-                                          "hf"),
-                                         ("Sogou", "sogou"),
-                                         ("Bing", "bing")],
-                                        label="Translation Method",
-                                        value="google"),
-                            gr.Dropdown([("Wild Words Roman", "fonts/WildWordsRoman.ttf"),
-                                         ("animeace_i", "fonts/animeace_i.ttf"),
-                                         ("mangati", "fonts/mangati.ttf"),
-                                         ("ariali", "fonts/ariali.ttf"),
-                                         ("ariali", "fonts/KOMIKAX_.ttf")],
-                                        label="Text Font",
-                                        value="fonts/WildWordsRoman.ttf")
-                            ],
-                    outputs=[gr.Image()],
-                    examples=EXAMPLE_LIST,
-                    title=TITLE,
-                    description=DESCRIPTION)
-demo.launch(debug=False,
-            share=False)

 from detect_bubbles import detect_bubbles
 from process_bubble import process_bubble
 from translator import MangaTranslator
 from manga_ocr import MangaOcr
 from PIL import Image
 import gradio as gr
 import numpy as np
+# =========================
+# Configurações
+# =========================
 MODEL = "model.pt"
+EXAMPLE_LIST = [
+    ["examples/0.png"],
+    ["examples/ex0.png"]
+]
 TITLE = "Manga Translator"
 DESCRIPTION = "Translate text in manga bubbles!"
+DEFAULT_TRANSLATION_METHOD = "google"
+DEFAULT_FONT = "fonts/WildWordsRoman.ttf"
+# =========================
+# Carregamento único de modelos
+# =========================
+manga_translator = MangaTranslator()
+mocr = MangaOcr()
+# =========================
+# Função principal
+# =========================
 def predict(img, translation_method, font):
+    translation_method = translation_method or DEFAULT_TRANSLATION_METHOD
+    font = font or DEFAULT_FONT
+    # Detecta bolhas (assume que detect_bubbles NÃO recarrega o modelo internamente)
+    results = detect_bubbles(MODEL, img)
     image = np.array(img)
     for result in results:
         x1, y1, x2, y2, score, class_id = result
+        x1, y1, x2, y2 = map(int, (x1, y1, x2, y2))
+        detected_image = image[y1:y2, x1:x2]
+        # OCR
+        pil_crop = Image.fromarray(detected_image.astype(np.uint8))
+        text = mocr(pil_crop)
+        # Processa a bolha
         detected_image, cont = process_bubble(detected_image)
+        # Tradução
+        text_translated = manga_translator.translate(
+            text,
+            method=translation_method
+        )
+        # Escreve o texto traduzido
+        image[y1:y2, x1:x2] = add_text(
+            detected_image,
+            text_translated,
+            font,
+            cont
+        )
     return Image.fromarray(image)
+# =========================
+# Interface Gradio
+# =========================
+demo = gr.Interface(
+    fn=predict,
+    inputs=[
+        "image",
+        gr.Dropdown(
+            [
+                ("Google", "google"),
+                ("Helsinki-NLP opus-mt-ja-en", "hf"),
+                ("Sogou", "sogou"),
+                ("Bing", "bing")
+            ],
+            label="Translation Method",
+            value=DEFAULT_TRANSLATION_METHOD
+        ),
+        gr.Dropdown(
+            [
+                ("Wild Words Roman", "fonts/WildWordsRoman.ttf"),
+                ("Anime Ace Italic", "fonts/animeace_i.ttf"),
+                ("Mangati", "fonts/mangati.ttf"),
+                ("Arial Italic", "fonts/ariali.ttf"),
+                ("Komika Axis", "fonts/KOMIKAX_.ttf")
+            ],
+            label="Text Font",
+            value=DEFAULT_FONT
+        )
+    ],
+    outputs=gr.Image(),
+    examples=EXAMPLE_LIST,
+    title=TITLE,
+    description=DESCRIPTION
+)
+demo.launch(
+    debug=False,
+    share=False
+)