maggidev commited on
Commit
f16ef2b
·
verified ·
1 Parent(s): c3fe4d5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -42
app.py CHANGED
@@ -2,73 +2,117 @@ from add_text import add_text
2
  from detect_bubbles import detect_bubbles
3
  from process_bubble import process_bubble
4
  from translator import MangaTranslator
5
- from ultralytics import YOLO
6
  from manga_ocr import MangaOcr
 
7
  from PIL import Image
8
  import gradio as gr
9
  import numpy as np
10
- import cv2
11
 
 
 
 
12
 
13
  MODEL = "model.pt"
14
- EXAMPLE_LIST = [["examples/0.png"],
15
- ["examples/ex0.png"]]
 
 
 
 
16
  TITLE = "Manga Translator"
17
  DESCRIPTION = "Translate text in manga bubbles!"
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  def predict(img, translation_method, font):
21
- if translation_method == None:
22
- translation_method = "google"
23
- if font == None:
24
- font = "fonts/WildWordsRoman.ttf"
25
 
26
- results = detect_bubbles(MODEL, img)
 
27
 
28
- manga_translator = MangaTranslator()
29
- mocr = MangaOcr()
30
 
31
  image = np.array(img)
32
 
33
  for result in results:
34
  x1, y1, x2, y2, score, class_id = result
35
 
36
- detected_image = image[int(y1):int(y2), int(x1):int(x2)]
 
 
37
 
38
- im = Image.fromarray(np.uint8((detected_image)*255))
39
- text = mocr(im)
 
40
 
 
41
  detected_image, cont = process_bubble(detected_image)
42
 
43
- text_translated = manga_translator.translate(text,
44
- method=translation_method)
 
 
 
45
 
46
- image[int(y1):int(y2), int(x1):int(x2)] = add_text(detected_image, text_translated, font, cont)
 
 
 
 
 
 
47
 
48
  return Image.fromarray(image)
49
 
50
- demo = gr.Interface(fn=predict,
51
- inputs=["image",
52
- gr.Dropdown([("Google", "google"),
53
- ("Helsinki-NLP's opus-mt-ja-en model",
54
- "hf"),
55
- ("Sogou", "sogou"),
56
- ("Bing", "bing")],
57
- label="Translation Method",
58
- value="google"),
59
- gr.Dropdown([("Wild Words Roman", "fonts/WildWordsRoman.ttf"),
60
- ("animeace_i", "fonts/animeace_i.ttf"),
61
- ("mangati", "fonts/mangati.ttf"),
62
- ("ariali", "fonts/ariali.ttf"),
63
- ("ariali", "fonts/KOMIKAX_.ttf")],
64
- label="Text Font",
65
- value="fonts/WildWordsRoman.ttf")
66
- ],
67
- outputs=[gr.Image()],
68
- examples=EXAMPLE_LIST,
69
- title=TITLE,
70
- description=DESCRIPTION)
71
-
72
-
73
- demo.launch(debug=False,
74
- share=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from detect_bubbles import detect_bubbles
3
  from process_bubble import process_bubble
4
  from translator import MangaTranslator
 
5
  from manga_ocr import MangaOcr
6
+
7
  from PIL import Image
8
  import gradio as gr
9
  import numpy as np
 
10
 
11
+ # =========================
12
+ # Configurações
13
+ # =========================
14
 
15
  MODEL = "model.pt"
16
+
17
+ EXAMPLE_LIST = [
18
+ ["examples/0.png"],
19
+ ["examples/ex0.png"]
20
+ ]
21
+
22
  TITLE = "Manga Translator"
23
  DESCRIPTION = "Translate text in manga bubbles!"
24
 
25
+ DEFAULT_TRANSLATION_METHOD = "google"
26
+ DEFAULT_FONT = "fonts/WildWordsRoman.ttf"
27
+
28
+ # =========================
29
+ # Carregamento único de modelos
30
+ # =========================
31
+
32
+ manga_translator = MangaTranslator()
33
+ mocr = MangaOcr()
34
+
35
+ # =========================
36
+ # Função principal
37
+ # =========================
38
 
39
  def predict(img, translation_method, font):
 
 
 
 
40
 
41
+ translation_method = translation_method or DEFAULT_TRANSLATION_METHOD
42
+ font = font or DEFAULT_FONT
43
 
44
+ # Detecta bolhas (assume que detect_bubbles NÃO recarrega o modelo internamente)
45
+ results = detect_bubbles(MODEL, img)
46
 
47
  image = np.array(img)
48
 
49
  for result in results:
50
  x1, y1, x2, y2, score, class_id = result
51
 
52
+ x1, y1, x2, y2 = map(int, (x1, y1, x2, y2))
53
+
54
+ detected_image = image[y1:y2, x1:x2]
55
 
56
+ # OCR
57
+ pil_crop = Image.fromarray(detected_image.astype(np.uint8))
58
+ text = mocr(pil_crop)
59
 
60
+ # Processa a bolha
61
  detected_image, cont = process_bubble(detected_image)
62
 
63
+ # Tradução
64
+ text_translated = manga_translator.translate(
65
+ text,
66
+ method=translation_method
67
+ )
68
 
69
+ # Escreve o texto traduzido
70
+ image[y1:y2, x1:x2] = add_text(
71
+ detected_image,
72
+ text_translated,
73
+ font,
74
+ cont
75
+ )
76
 
77
  return Image.fromarray(image)
78
 
79
+ # =========================
80
+ # Interface Gradio
81
+ # =========================
82
+
83
+ demo = gr.Interface(
84
+ fn=predict,
85
+ inputs=[
86
+ "image",
87
+ gr.Dropdown(
88
+ [
89
+ ("Google", "google"),
90
+ ("Helsinki-NLP opus-mt-ja-en", "hf"),
91
+ ("Sogou", "sogou"),
92
+ ("Bing", "bing")
93
+ ],
94
+ label="Translation Method",
95
+ value=DEFAULT_TRANSLATION_METHOD
96
+ ),
97
+ gr.Dropdown(
98
+ [
99
+ ("Wild Words Roman", "fonts/WildWordsRoman.ttf"),
100
+ ("Anime Ace Italic", "fonts/animeace_i.ttf"),
101
+ ("Mangati", "fonts/mangati.ttf"),
102
+ ("Arial Italic", "fonts/ariali.ttf"),
103
+ ("Komika Axis", "fonts/KOMIKAX_.ttf")
104
+ ],
105
+ label="Text Font",
106
+ value=DEFAULT_FONT
107
+ )
108
+ ],
109
+ outputs=gr.Image(),
110
+ examples=EXAMPLE_LIST,
111
+ title=TITLE,
112
+ description=DESCRIPTION
113
+ )
114
+
115
+ demo.launch(
116
+ debug=False,
117
+ share=False
118
+ )