tonyliu404 commited on
Commit
35672c7
·
2 Parent(s): 3f59fede315f91

Merge branch 'main' of https://github.com/TonyLiu2004/Multimodal-Manga-Translator

Browse files
__pycache__/helpers.cpython-310.pyc DELETED
Binary file (1.46 kB)
 
main.py CHANGED
@@ -1,6 +1,7 @@
1
  from services.OCR_glm_service import OCR_Glm_Service
2
  from services.translate_tencentHY_service import Translate_Tencent_Service
3
  from services.bubble_detector_kitsumed_service import Bubble_Detector_Kitsumed_Service
 
4
  from services.bubble_detector_kiuyha_service import Bubble_Detector_Kiuyha_Service
5
  from services.OCR_japanese_service import OCR_Japanese_Service
6
  from services.translate_qwen_service import Translate_Qwen_Service
@@ -12,6 +13,7 @@ import torch
12
  from pathlib import Path
13
  from helpers import get_project_root, setup_fonts
14
  from fastapi import FastAPI
 
15
 
16
  ###
17
  ###
@@ -84,18 +86,18 @@ def show_boxes(image_path):
84
  # Get coordinates as a list of floats
85
  coords = box.xyxy[0].tolist() # [x1, y1, x2, y2]
86
  draw.rectangle(coords, outline="red", width=1)
87
-
88
  # label
89
  conf = box.conf[0].item()
90
  box_cropped = img.crop(coords)
91
  # box_cropped = upscale_for_ocr(box_cropped, scale=3)
92
  with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as f:
93
- box_cropped.save(f.name)
94
  temp_path = f.name
95
  draw.text(
96
- (coords[0], coords[1] - 10),
97
- "b",
98
- fill="red",
99
  font=font
100
  )
101
  img.show()
@@ -114,34 +116,34 @@ def get_wrapped_text(text, font, max_width):
114
  else:
115
  lines.append(' '.join(current_line))
116
  current_line = [word]
117
-
118
  lines.append(' '.join(current_line))
119
  return lines
120
 
121
  def fit_text_to_box(draw, text, box_coords, font_path, padding=5, initial_size=40):
122
  x1, y1, x2, y2 = box_coords
123
-
124
  padding = padding
125
  target_width = (x2 - x1) - (padding * 2)
126
  target_height = (y2 - y1) - (padding * 2)
127
-
128
  current_size = initial_size
129
  lines = []
130
-
131
  while current_size > 8:
132
  # index=0 for Japanese, 1 for Korean in NotoSansCJK
133
  font = ImageFont.truetype(font_path, size=current_size)
134
  lines = get_wrapped_text(text, font, target_width)
135
-
136
  # Use a more reliable line height measurement
137
  # getbbox can be inconsistent; use font.size * constant for better leading
138
- line_height = int(current_size * 1.2)
139
  total_height = line_height * len(lines)
140
-
141
  if total_height <= target_height:
142
  break
143
  current_size -= 2 # Step down by 2 for speed
144
-
145
  return lines, font, current_size, line_height
146
 
147
  def upscale_for_ocr(img, scale=2):
@@ -152,7 +154,7 @@ def process_image(image_path, language):
152
  bubble_results = bubble_detector_model.predict(image_path)
153
  img = Image.open(image_path)
154
  draw = ImageDraw.Draw(img)
155
-
156
  texts = []
157
  coordinates={}
158
  i=0
@@ -164,7 +166,7 @@ def process_image(image_path, language):
164
  # box_cropped.show()
165
 
166
  with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as f:
167
- box_cropped.save(f.name)
168
  temp_path = f.name
169
 
170
  text = ""
@@ -184,13 +186,26 @@ def process_image(image_path, language):
184
  print("translating...")
185
  translated = translate_model.translate(texts)
186
  print(translated)
187
- for id, translated_text in translated.items():
188
- coords = coordinates[int(id)]
189
- original_text = texts[int(id)]['text']
190
- print(f"{id}: {original_text}")
 
 
 
 
 
 
191
  print(translated_text)
192
  print("==================================")
193
 
 
 
 
 
 
 
 
194
  #wipe the space
195
  draw.rectangle(coords, fill="white", outline="white")
196
 
@@ -207,23 +222,23 @@ def process_image(image_path, language):
207
  for line in lines:
208
  line = line.strip()
209
  if not line: continue
210
-
211
  # Horizontal Centering
212
  line_w = draw.textlength(line, font=best_font)
213
  start_x = coords[0] + ((coords[2] - coords[0]) - line_w) / 2
214
-
215
  draw.text((start_x, start_y), line, font=best_font, fill="black")
216
  start_y += line_h
217
 
218
- return img
219
 
220
  def translate_text(text, language):
221
  # translated_text = ""
222
  # if language == "japanese":
223
- # translated_text =
224
 
225
  translated_text = translate_model.translate(text)
226
-
227
  return translated_text
228
 
229
  def runOCRTests():
@@ -238,10 +253,60 @@ def runOCRTests():
238
  print(f"failed on {i}")
239
  break
240
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  def main():
242
  img_path = ROOT / "test_images" / "test_2.png"
243
- img = process_image(img_path, "japanese")
244
  img.show()
 
 
 
245
 
246
  @app.get("/")
247
  def home():
 
1
  from services.OCR_glm_service import OCR_Glm_Service
2
  from services.translate_tencentHY_service import Translate_Tencent_Service
3
  from services.bubble_detector_kitsumed_service import Bubble_Detector_Kitsumed_Service
4
+
5
  from services.bubble_detector_kiuyha_service import Bubble_Detector_Kiuyha_Service
6
  from services.OCR_japanese_service import OCR_Japanese_Service
7
  from services.translate_qwen_service import Translate_Qwen_Service
 
13
  from pathlib import Path
14
  from helpers import get_project_root, setup_fonts
15
  from fastapi import FastAPI
16
+ import db as manga_db
17
 
18
  ###
19
  ###
 
86
  # Get coordinates as a list of floats
87
  coords = box.xyxy[0].tolist() # [x1, y1, x2, y2]
88
  draw.rectangle(coords, outline="red", width=1)
89
+
90
  # label
91
  conf = box.conf[0].item()
92
  box_cropped = img.crop(coords)
93
  # box_cropped = upscale_for_ocr(box_cropped, scale=3)
94
  with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as f:
95
+ box_cropped.save(f.name)
96
  temp_path = f.name
97
  draw.text(
98
+ (coords[0], coords[1] - 10),
99
+ "b",
100
+ fill="red",
101
  font=font
102
  )
103
  img.show()
 
116
  else:
117
  lines.append(' '.join(current_line))
118
  current_line = [word]
119
+
120
  lines.append(' '.join(current_line))
121
  return lines
122
 
123
  def fit_text_to_box(draw, text, box_coords, font_path, padding=5, initial_size=40):
124
  x1, y1, x2, y2 = box_coords
125
+
126
  padding = padding
127
  target_width = (x2 - x1) - (padding * 2)
128
  target_height = (y2 - y1) - (padding * 2)
129
+
130
  current_size = initial_size
131
  lines = []
132
+
133
  while current_size > 8:
134
  # index=0 for Japanese, 1 for Korean in NotoSansCJK
135
  font = ImageFont.truetype(font_path, size=current_size)
136
  lines = get_wrapped_text(text, font, target_width)
137
+
138
  # Use a more reliable line height measurement
139
  # getbbox can be inconsistent; use font.size * constant for better leading
140
+ line_height = int(current_size * 1.2)
141
  total_height = line_height * len(lines)
142
+
143
  if total_height <= target_height:
144
  break
145
  current_size -= 2 # Step down by 2 for speed
146
+
147
  return lines, font, current_size, line_height
148
 
149
  def upscale_for_ocr(img, scale=2):
 
154
  bubble_results = bubble_detector_model.predict(image_path)
155
  img = Image.open(image_path)
156
  draw = ImageDraw.Draw(img)
157
+
158
  texts = []
159
  coordinates={}
160
  i=0
 
166
  # box_cropped.show()
167
 
168
  with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as f:
169
+ box_cropped.save(f.name)
170
  temp_path = f.name
171
 
172
  text = ""
 
186
  print("translating...")
187
  translated = translate_model.translate(texts)
188
  print(translated)
189
+
190
+ bubble_data = []
191
+ for i in range(len(texts)):
192
+ coords = coordinates[i]
193
+ x1, y1, x2, y2 = coords
194
+ original_text = texts[i]["text"]
195
+ translated_text = translated.get(str(i), translated.get(i, ""))
196
+ if not isinstance(translated_text, str):
197
+ translated_text = str(translated_text)
198
+ print(f"{i}: {original_text}")
199
  print(translated_text)
200
  print("==================================")
201
 
202
+ bubble_data.append({
203
+ "bubble_index": i,
204
+ "x1": float(x1), "y1": float(y1), "x2": float(x2), "y2": float(y2),
205
+ "original_text": original_text,
206
+ "translated_text": translated_text,
207
+ })
208
+
209
  #wipe the space
210
  draw.rectangle(coords, fill="white", outline="white")
211
 
 
222
  for line in lines:
223
  line = line.strip()
224
  if not line: continue
225
+
226
  # Horizontal Centering
227
  line_w = draw.textlength(line, font=best_font)
228
  start_x = coords[0] + ((coords[2] - coords[0]) - line_w) / 2
229
+
230
  draw.text((start_x, start_y), line, font=best_font, fill="black")
231
  start_y += line_h
232
 
233
+ return img, bubble_data
234
 
235
  def translate_text(text, language):
236
  # translated_text = ""
237
  # if language == "japanese":
238
+ # translated_text =
239
 
240
  translated_text = translate_model.translate(text)
241
+
242
  return translated_text
243
 
244
  def runOCRTests():
 
253
  print(f"failed on {i}")
254
  break
255
 
256
+ def _language_to_code(language: str) -> str:
257
+ """Map language name to ISO 639-1 style code for DB."""
258
+ m = {"japanese": "ja", "english": "en", "korean": "ko", "chinese": "zh"}
259
+ return m.get(language.lower(), language[:2] if len(language) >= 2 else "ja")
260
+
261
+
262
+ def process_chapter(
263
+ manga_title: str,
264
+ chapter_number: float,
265
+ page_paths: list,
266
+ language: str = "japanese",
267
+ provider_id: str = "local",
268
+ db_url: str = None,
269
+ ):
270
+ """
271
+ Process each page of a chapter, draw translated text on images, and save
272
+ to the PostgreSQL text repository (provider_id, manga_title, chapter/page,
273
+ segment coordinates, original/translated text, language code). No images stored.
274
+ page_paths: list of paths to page images in order.
275
+ provider_id: source/provider identifier (e.g. 'mangadex', 'local').
276
+ db_url: PostgreSQL URL or set DATABASE_URL.
277
+ Returns (list of (img, bubble_data) per page).
278
+ """
279
+ manga_db.init_db(db_url)
280
+ language_code = _language_to_code(language)
281
+ results = []
282
+ for page_number, image_path in enumerate(page_paths, start=1):
283
+ path = Path(image_path)
284
+ if not path.exists():
285
+ print(f"Skip missing page {page_number}: {path}")
286
+ continue
287
+ print(f"Processing chapter {chapter_number} page {page_number}/{len(page_paths)}: {path.name}")
288
+ img, bubble_data = process_image(str(path), language)
289
+ manga_db.save_page_translation(
290
+ provider_id=provider_id,
291
+ manga_title=manga_title,
292
+ chapter_number=chapter_number,
293
+ page_number=page_number,
294
+ bubbles=bubble_data,
295
+ language_code=language_code,
296
+ db_url=db_url,
297
+ )
298
+ results.append((img, bubble_data))
299
+ print(f"Chapter '{manga_title}' ch.{chapter_number} saved to DB ({len(results)} pages).")
300
+ return results
301
+
302
+
303
  def main():
304
  img_path = ROOT / "test_images" / "test_2.png"
305
+ img, bubble_data = process_image(img_path, "japanese")
306
  img.show()
307
+ # manga_db.save_page_translation(provider_id="local", manga_title="Test", chapter_number=0,
308
+ # page_number=1, bubbles=bubble_data, language_code="ja")
309
+
310
 
311
  @app.get("/")
312
  def home():
services/bubble_detector_kitsumed_service.py CHANGED
@@ -18,5 +18,4 @@ class Bubble_Detector_Kitsumed_Service:
18
  show_conf=show_conf,
19
  imgsz=imgsz,
20
  )
21
- return results[0]
22
-
 
18
  show_conf=show_conf,
19
  imgsz=imgsz,
20
  )
21
+ return results[0]