tonyliu404 commited on
Commit
47dc635
·
1 Parent(s): c955d4f

Overhauled backend API structure

Browse files
Files changed (4) hide show
  1. Dockerfile +3 -1
  2. api.py +26 -30
  3. main.py +34 -244
  4. services/image_processor.py +284 -0
Dockerfile CHANGED
@@ -4,6 +4,8 @@ FROM python:3.12-slim
4
  # 2. Set environment variables
5
  ENV PYTHONDONTWRITEBYTECODE=1
6
  ENV PYTHONUNBUFFERED=1
 
 
7
 
8
  # 3. Install system dependencies
9
  RUN apt-get update && apt-get install -y libgl1 libglib2.0-0
@@ -24,5 +26,5 @@ RUN env PYTHONPATH=. python -c "from helpers import setup_fonts; setup_fonts()"
24
  RUN env PYTHONPATH=. python -c "from manga_ocr import MangaOcr; MangaOcr()"
25
 
26
  # 8. Expose and Start
27
- EXPOSE 8000
28
  CMD ["python", "main.py"]
 
4
  # 2. Set environment variables
5
  ENV PYTHONDONTWRITEBYTECODE=1
6
  ENV PYTHONUNBUFFERED=1
7
+ #huggingface port
8
+ ENV PORT=7860
9
 
10
  # 3. Install system dependencies
11
  RUN apt-get update && apt-get install -y libgl1 libglib2.0-0
 
26
  RUN env PYTHONPATH=. python -c "from manga_ocr import MangaOcr; MangaOcr()"
27
 
28
  # 8. Expose and Start
29
+ EXPOSE 7860
30
  CMD ["python", "main.py"]
api.py CHANGED
@@ -3,33 +3,35 @@ Read-only API for the frontend. Wraps db list_entries, get_segments, get_chapter
3
  Run from backend: uvicorn api:app --reload --host 0.0.0.0 --port 8000
4
  """
5
 
6
- from fastapi import FastAPI, Query
7
  from fastapi.middleware.cors import CORSMiddleware
8
  from fastapi.responses import JSONResponse
9
  import proxy
10
  from services import mangadex_service
 
11
  import httpx
12
  import db
13
  from sqlmodel import Session, text
14
  from db.models import Manga
15
  from db.schemas import ChapterListOut, SegmentListOut
16
 
17
- app = FastAPI(
18
- title="Manga Translator API",
19
- description="Read endpoints for chapters and segments",
20
- version="1.0.0",
21
- )
22
 
23
- app.add_middleware(
24
- CORSMiddleware,
25
- allow_origins=["*"], # Currently allow all origins, should be restricted to specific origins in production
26
- allow_credentials=True,
27
- allow_methods=["*"],
28
- allow_headers=["*"],
29
- )
30
 
 
31
 
32
- @app.get("/")
33
  def root():
34
  """API root - confirms the API is running."""
35
  return {
@@ -39,7 +41,7 @@ def root():
39
  "health": "/health",
40
  }
41
 
42
- @app.get("/health/db")
43
  def health_db():
44
  engine = db.get_engine()
45
  try:
@@ -50,7 +52,7 @@ def health_db():
50
  return {"status": "error", "detail": str(e)}
51
 
52
 
53
- @app.get("/mangas", response_model=list[Manga])
54
  def list_mangas(
55
  order_by: str = Query("created_at", description="manga_title | created_at | updated_at"),
56
  order_desc: bool = Query(True, description="Sort descending"),
@@ -60,7 +62,7 @@ def list_mangas(
60
  """List mangas (manga_title, created_at, updated_at). Supports pagination."""
61
  return db.list_mangas(order_by=order_by, order_desc=order_desc, limit=limit, offset=offset)
62
 
63
- @app.get("/chapters", response_model=list[ChapterListOut])
64
  def list_chapters(
65
  manga_title: str = Query(...),
66
  provider_id: str | None = Query(None, description="e.g. local, mangadex"),
@@ -71,7 +73,7 @@ def list_chapters(
71
  return db.list_chapters(manga_title, provider_id, limit=limit, offset=offset)
72
 
73
 
74
- @app.get("/segments", response_model=list[SegmentListOut])
75
  def get_segments(
76
  provider_id: str | None = Query(None, description="e.g. local, mangadex"),
77
  manga_title: str | None = Query(None),
@@ -91,7 +93,7 @@ def get_segments(
91
  )
92
 
93
 
94
- @app.get("/chapters/segments", response_model=list[SegmentListOut])
95
  def get_chapter_segments(
96
  provider_id: str = Query(..., description="e.g. local, mangadex"),
97
  manga_title: str = Query(...),
@@ -104,22 +106,16 @@ def get_chapter_segments(
104
 
105
 
106
  # to make sure api is running and responding
107
- @app.get("/health")
108
  def health():
109
  """Health check."""
110
  return {"status": "ok"}
111
 
112
-
113
- @app.exception_handler(ValueError)
114
- def value_error_handler(request, exc):
115
- """Return 400 for invalid provider_id etc."""
116
- return JSONResponse(status_code=400, content={"detail": str(exc)})
117
-
118
  ###########
119
  ###########
120
  ###########
121
 
122
- @app.get("/api/manga/chapter/{chapter_id}/page/{page_index}")
123
  async def proxy_manga_page(chapter_id: str, page_index: int):
124
  urls = mangadex_service.get_chapter_panel_urls(chapter_id)
125
  if not urls or page_index >= len(urls):
@@ -127,12 +123,12 @@ async def proxy_manga_page(chapter_id: str, page_index: int):
127
 
128
  return await proxy.get_manga_page_stream(urls[page_index])
129
 
130
- @app.get("/api/manga/cover_art")
131
  async def proxy_manga_cover_art(manga_id: str, cover_url: str, size: int = 256):
132
  url = f"https://uploads.mangadex.org/covers/{manga_id}/{cover_url}.{size}.jpg"
133
  return await proxy.get_manga_page_stream(url)
134
 
135
- @app.get("/api/manga/search")
136
  async def get_popular_manga(
137
  title: str = "",
138
  limit: int = 15,
@@ -152,7 +148,7 @@ async def get_popular_manga(
152
  return results
153
 
154
 
155
- @app.get("/api/manga/{manga_id}/chapters")
156
  async def get_chapters(
157
  manga_id: str,
158
  limit: int = 100,
 
3
  Run from backend: uvicorn api:app --reload --host 0.0.0.0 --port 8000
4
  """
5
 
6
+ from fastapi import APIRouter, Query
7
  from fastapi.middleware.cors import CORSMiddleware
8
  from fastapi.responses import JSONResponse
9
  import proxy
10
  from services import mangadex_service
11
+ from services.image_processor import ImageProcessor
12
  import httpx
13
  import db
14
  from sqlmodel import Session, text
15
  from db.models import Manga
16
  from db.schemas import ChapterListOut, SegmentListOut
17
 
18
+ # app = FastAPI(
19
+ # title="Manga Translator API",
20
+ # description="Read endpoints for chapters and segments",
21
+ # version="1.0.0",
22
+ # )
23
 
24
+ # app.add_middleware(
25
+ # CORSMiddleware,
26
+ # allow_origins=["*"], # Currently allow all origins, should be restricted to specific origins in production
27
+ # allow_credentials=True,
28
+ # allow_methods=["*"],
29
+ # allow_headers=["*"],
30
+ # )
31
 
32
+ router = APIRouter()
33
 
34
+ @router.get("/")
35
  def root():
36
  """API root - confirms the API is running."""
37
  return {
 
41
  "health": "/health",
42
  }
43
 
44
+ @router.get("/health/db")
45
  def health_db():
46
  engine = db.get_engine()
47
  try:
 
52
  return {"status": "error", "detail": str(e)}
53
 
54
 
55
+ @router.get("/mangas", response_model=list[Manga])
56
  def list_mangas(
57
  order_by: str = Query("created_at", description="manga_title | created_at | updated_at"),
58
  order_desc: bool = Query(True, description="Sort descending"),
 
62
  """List mangas (manga_title, created_at, updated_at). Supports pagination."""
63
  return db.list_mangas(order_by=order_by, order_desc=order_desc, limit=limit, offset=offset)
64
 
65
+ @router.get("/chapters", response_model=list[ChapterListOut])
66
  def list_chapters(
67
  manga_title: str = Query(...),
68
  provider_id: str | None = Query(None, description="e.g. local, mangadex"),
 
73
  return db.list_chapters(manga_title, provider_id, limit=limit, offset=offset)
74
 
75
 
76
+ @router.get("/segments", response_model=list[SegmentListOut])
77
  def get_segments(
78
  provider_id: str | None = Query(None, description="e.g. local, mangadex"),
79
  manga_title: str | None = Query(None),
 
93
  )
94
 
95
 
96
+ @router.get("/chapters/segments", response_model=list[SegmentListOut])
97
  def get_chapter_segments(
98
  provider_id: str = Query(..., description="e.g. local, mangadex"),
99
  manga_title: str = Query(...),
 
106
 
107
 
108
  # to make sure api is running and responding
109
+ @router.get("/health")
110
  def health():
111
  """Health check."""
112
  return {"status": "ok"}
113
 
 
 
 
 
 
 
114
  ###########
115
  ###########
116
  ###########
117
 
118
+ @router.get("/api/manga/chapter/{chapter_id}/page/{page_index}")
119
  async def proxy_manga_page(chapter_id: str, page_index: int):
120
  urls = mangadex_service.get_chapter_panel_urls(chapter_id)
121
  if not urls or page_index >= len(urls):
 
123
 
124
  return await proxy.get_manga_page_stream(urls[page_index])
125
 
126
+ @router.get("/api/manga/cover_art")
127
  async def proxy_manga_cover_art(manga_id: str, cover_url: str, size: int = 256):
128
  url = f"https://uploads.mangadex.org/covers/{manga_id}/{cover_url}.{size}.jpg"
129
  return await proxy.get_manga_page_stream(url)
130
 
131
+ @router.get("/api/manga/search")
132
  async def get_popular_manga(
133
  title: str = "",
134
  limit: int = 15,
 
148
  return results
149
 
150
 
151
+ @router.get("/api/manga/{manga_id}/chapters")
152
  async def get_chapters(
153
  manga_id: str,
154
  limit: int = 100,
main.py CHANGED
@@ -15,7 +15,31 @@ from fastapi import FastAPI
15
  from typing import Optional
16
  import db as manga_db
17
  import uvicorn
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
 
19
 
20
  ###
21
  ###
@@ -44,9 +68,7 @@ device = torch.device(device_name)
44
 
45
  print(f"Loading models from {MODEL_PATH} and fonts from {FONT_PATH}")
46
 
47
- app = FastAPI()
48
-
49
- #####################
50
 
51
  # GLMOCR_MODEL_DIR = MODEL_PATH / "GlmOcr"
52
  # ocr_model = OCR_Glm_Service(GLMOCR_MODEL_DIR)
@@ -61,6 +83,7 @@ bubble_detector_model = Bubble_Detector_Kiuyha_Service(BUBBLE_DETECTOR_MODEL_DIR
61
 
62
  translate_model = Translate_Qwen_Service()
63
 
 
64
 
65
  if not FONT_PATH.exists():
66
  print(f"Font NotoSansCJK not found at {FONT_PATH}. Attempting to download.")
@@ -75,251 +98,20 @@ if FONT_PATH.exists():
75
  else:
76
  raise FileNotFoundError(f"Font NotoSansCJK not found at {FONT_PATH}")
77
 
 
78
  print("Finished loading all models and fonts")
79
 
80
  ###
81
  ###
82
  ###
83
 
84
- def get_ocr():
85
- from manga_ocr import MangaOcr
86
- return MangaOcr()
87
-
88
- def show_boxes(image_path):
89
- result = bubble_detector_model.predict(image_path)
90
- img = Image.open(image_path).convert("RGB")
91
- draw = ImageDraw.Draw(img)
92
- for box in result.boxes:
93
- # Get coordinates as a list of floats
94
- coords = box.xyxy[0].tolist() # [x1, y1, x2, y2]
95
- draw.rectangle(coords, outline="red", width=1)
96
-
97
- # label
98
- conf = box.conf[0].item()
99
- box_cropped = img.crop(coords)
100
- # box_cropped = upscale_for_ocr(box_cropped, scale=3)
101
- with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as f:
102
- box_cropped.save(f.name)
103
- temp_path = f.name
104
- draw.text(
105
- (coords[0], coords[1] - 10),
106
- "b",
107
- fill="red",
108
- font=font
109
- )
110
- img.show()
111
-
112
- def get_wrapped_text(text, font, max_width):
113
- lines = []
114
- words = text.split(' ') # Split by words for English
115
- current_line = []
116
-
117
- for word in words:
118
- # Check if adding the next word exceeds the width
119
- test_line = ' '.join(current_line + [word])
120
- # getlength() is more accurate than getbbox for text width
121
- if font.getlength(test_line) <= max_width:
122
- current_line.append(word)
123
- else:
124
- lines.append(' '.join(current_line))
125
- current_line = [word]
126
-
127
- lines.append(' '.join(current_line))
128
- return lines
129
-
130
- def fit_text_to_box(draw, text, box_coords, font_path, padding=5, initial_size=40):
131
- x1, y1, x2, y2 = box_coords
132
-
133
- padding = padding
134
- target_width = (x2 - x1) - (padding * 2)
135
- target_height = (y2 - y1) - (padding * 2)
136
-
137
- current_size = initial_size
138
- lines = []
139
-
140
- while current_size > 8:
141
- # index=0 for Japanese, 1 for Korean in NotoSansCJK
142
- font = ImageFont.truetype(font_path, size=current_size)
143
- lines = get_wrapped_text(text, font, target_width)
144
-
145
- # Use a more reliable line height measurement
146
- # getbbox can be inconsistent; use font.size * constant for better leading
147
- line_height = int(current_size * 1.2)
148
- total_height = line_height * len(lines)
149
-
150
- if total_height <= target_height:
151
- break
152
- current_size -= 2 # Step down by 2 for speed
153
-
154
- return lines, font, current_size, line_height
155
-
156
- def upscale_for_ocr(img, scale=2):
157
- w, h = img.size
158
- return img.resize((w*scale, h*scale), Image.BICUBIC)
159
-
160
- def process_image(image_path, language):
161
- bubble_results = bubble_detector_model.predict(image_path)
162
- print(f"bubble results: {bubble_results}")
163
- img = Image.open(image_path)
164
- draw = ImageDraw.Draw(img)
165
-
166
- ocr_model = get_ocr()
167
- texts = []
168
- coordinates={}
169
- i=0
170
- for box_data in bubble_results:
171
- print(f"box_data {i}")
172
- coords = box_data['coords']
173
- draw.rectangle(coords, outline="red", width=1)
174
- box_cropped = img.crop(coords)
175
- # box_cropped = upscale_for_ocr(box_cropped, scale=3)
176
- # box_cropped.show()
177
-
178
- with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as f:
179
- box_cropped.save(f.name)
180
- temp_path = f.name
181
-
182
- text = ""
183
- # if language == "japanese":
184
- # # text = ocr_japanese_model.runOCR(temp_path)
185
- # text = ocr_model(temp_path)
186
- # else:
187
- # text = ocr_model.runOCR(temp_path)
188
-
189
- try:
190
- # MangaOcr is callable: mocr(image)
191
- text = ocr_model(box_cropped)
192
- except Exception as e:
193
- print(f"OCR Error on bubble {i}: {e}")
194
-
195
- text = re.sub(r'[\n\r\u2028\u2029]+', ' ', text) #remove new lines
196
- texts.append({"id": i, "text": text})
197
- coordinates[i] = coords
198
- i+=1
199
- print(f'OCR Complete, total {len(texts)} bubbles.')
200
-
201
- #add translated text to manga image
202
- try:
203
- print("Translating with cloud Qwen model...")
204
- translated = translate_model.translate_cloud(texts)
205
- except Exception as e:
206
- print("API translation failed with Qwen, falling back to local model...")
207
- translated = translate_model.translate(texts)
208
-
209
- print(translated)
210
-
211
- bubble_data = []
212
- for i in range(len(texts)):
213
- coords = coordinates[i]
214
- x1, y1, x2, y2 = coords
215
- original_text = texts[i]["text"]
216
- translated_text = translated.get(str(i), translated.get(i, ""))
217
- if not isinstance(translated_text, str):
218
- translated_text = str(translated_text)
219
- print(f"{i}: {original_text}")
220
- print(translated_text)
221
- print("==================================")
222
-
223
- bubble_data.append({
224
- "bubble_index": i,
225
- "x1": float(x1), "y1": float(y1), "x2": float(x2), "y2": float(y2),
226
- "original_text": original_text,
227
- "translated_text": translated_text,
228
- })
229
-
230
- #wipe the space
231
- draw.rectangle(coords, fill="white", outline="white")
232
-
233
- # 1. Calculate the best fit
234
- lines, best_font, final_size, line_h = fit_text_to_box(draw, translated_text, coords, FONT_PATH)
235
-
236
- # Calculate total height of the block
237
- total_h = line_h * len(lines)
238
-
239
- # Start_y adjusted for the block height relative to the box center
240
- start_y = coords[1] + ((coords[3] - coords[1]) - total_h) / 2
241
-
242
- # 3. Draw each line centered horizontally
243
- for line in lines:
244
- line = line.strip()
245
- if not line: continue
246
-
247
- # Horizontal Centering
248
- line_w = draw.textlength(line, font=best_font)
249
- start_x = coords[0] + ((coords[2] - coords[0]) - line_w) / 2
250
-
251
- draw.text((start_x, start_y), line, font=best_font, fill="black")
252
- start_y += line_h
253
-
254
- return img, bubble_data
255
-
256
- def translate_text(text, language):
257
- # translated_text = ""
258
- # if language == "japanese":
259
- # translated_text =
260
-
261
- translated_text = translate_model.translate(text)
262
-
263
- return translated_text
264
-
265
- def _language_to_code(language: str) -> str:
266
- """Map language name to ISO 639-1 style code for DB."""
267
- m = {"japanese": "ja", "english": "en", "korean": "ko", "chinese": "zh"}
268
- return m.get(language.lower(), language[:2] if len(language) >= 2 else "ja")
269
-
270
-
271
- def process_chapter(
272
- manga_title: str,
273
- chapter_number: float,
274
- page_paths: list,
275
- language: str = "japanese",
276
- provider_id: str = "local",
277
- external_manga_id: Optional[str] = None,
278
- db_url: str = None,
279
- ):
280
- """
281
- Process each page of a chapter, draw translated text on images, and save
282
- to the PostgreSQL text repository (provider_id, manga_title, chapter/page,
283
- segment coordinates, original/translated text, language code). No images stored.
284
- page_paths: list of paths to page images in order.
285
- provider_id: source/provider identifier (e.g. 'mangadex', 'local').
286
- db_url: PostgreSQL URL or set DATABASE_URL.
287
- Returns (list of (img, bubble_data) per page).
288
- """
289
- manga_db.init_db(db_url)
290
- language_code = _language_to_code(language)
291
- results = []
292
- for page_number, image_path in enumerate(page_paths, start=1):
293
- path = Path(image_path)
294
- if not path.exists():
295
- print(f"Skip missing page {page_number}: {path}")
296
- continue
297
- print(f"Processing chapter {chapter_number} page {page_number}/{len(page_paths)}: {path.name}")
298
- img, bubble_data = process_image(str(path), language)
299
- manga_db.save_page_translation(
300
- provider_id=provider_id,
301
- manga_title=manga_title,
302
- chapter_number=chapter_number,
303
- page_number=page_number,
304
- bubbles=bubble_data,
305
- language_code=language_code,
306
- external_manga_id=external_manga_id,
307
- db_url=db_url,
308
- )
309
- results.append((img, bubble_data))
310
- print(f"Chapter '{manga_title}' ch.{chapter_number} saved to DB ({len(results)} pages).")
311
- return results
312
-
313
 
314
- def main():
315
- img_path = "./test_2.png"
316
- img, bubble_data = process_image(img_path, "japanese")
317
- print(bubble_data)
318
- img.show()
319
- # manga_db.save_page_translation(provider_id="local", manga_title="Test", chapter_number=0,
320
- # page_number=1, bubbles=bubble_data, language_code="ja")
321
 
322
- @app.post("/")
323
  def test(img_path: Optional[str] = None):
324
  print("test called")
325
  if not img_path:
@@ -327,15 +119,13 @@ def test(img_path: Optional[str] = None):
327
  img_path = Path(img_path)
328
  print(f"image path: {img_path}")
329
  if img_path.exists():
330
- img, bubble_data = process_image(img_path, "japanese")
331
  print(bubble_data)
332
  return {"result": bubble_data}
333
  else:
334
  print(f"{img_path} does not exist")
335
 
336
  if __name__ == "__main__":
337
- # main()
338
  port = int(os.environ.get("PORT", 8000))
339
  print(f"--- Starting Production Server on Port {port} ---")
340
- uvicorn.run("api:app", host="0.0.0.0", port=port, reload=False) #uses api.py
341
- # uvicorn.run("main:app", host="0.0.0.0", port=port, reload=False)
 
15
  from typing import Optional
16
  import db as manga_db
17
  import uvicorn
18
+ from manga_ocr import MangaOcr
19
+ from fastapi.middleware.cors import CORSMiddleware
20
+ from api import router as manga_router
21
+ from fastapi.responses import JSONResponse
22
+ from services.image_processor import ImageProcessor
23
+
24
+ ######################
25
+
26
+ app = FastAPI(
27
+ title="Manga Translator API",
28
+ description="Read endpoints for chapters and segments",
29
+ version="1.0.0",
30
+ )
31
+
32
+ app.add_middleware(
33
+ CORSMiddleware,
34
+ allow_origins=["*"], # Currently allow all origins, should be restricted to specific origins in production
35
+ allow_credentials=True,
36
+ allow_methods=["*"],
37
+ allow_headers=["*"],
38
+ )
39
+
40
+ app.include_router(manga_router)
41
 
42
+ #####################
43
 
44
  ###
45
  ###
 
68
 
69
  print(f"Loading models from {MODEL_PATH} and fonts from {FONT_PATH}")
70
 
71
+ ################################################
 
 
72
 
73
  # GLMOCR_MODEL_DIR = MODEL_PATH / "GlmOcr"
74
  # ocr_model = OCR_Glm_Service(GLMOCR_MODEL_DIR)
 
83
 
84
  translate_model = Translate_Qwen_Service()
85
 
86
+ ocr_model = MangaOcr()
87
 
88
  if not FONT_PATH.exists():
89
  print(f"Font NotoSansCJK not found at {FONT_PATH}. Attempting to download.")
 
98
  else:
99
  raise FileNotFoundError(f"Font NotoSansCJK not found at {FONT_PATH}")
100
 
101
+ processor = ImageProcessor(bubble_detector_model, ocr_model, translate_model)
102
  print("Finished loading all models and fonts")
103
 
104
  ###
105
  ###
106
  ###
107
 
108
+ @app.exception_handler(ValueError)
109
+ def value_error_handler(request, exc):
110
+ """Return 400 for invalid provider_id etc."""
111
+ return JSONResponse(status_code=400, content={"detail": str(exc)})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
 
 
 
 
 
 
 
113
 
114
+ @app.post("/test")
115
  def test(img_path: Optional[str] = None):
116
  print("test called")
117
  if not img_path:
 
119
  img_path = Path(img_path)
120
  print(f"image path: {img_path}")
121
  if img_path.exists():
122
+ bubble_data = processor.process_image(img_path, "japanese")
123
  print(bubble_data)
124
  return {"result": bubble_data}
125
  else:
126
  print(f"{img_path} does not exist")
127
 
128
  if __name__ == "__main__":
 
129
  port = int(os.environ.get("PORT", 8000))
130
  print(f"--- Starting Production Server on Port {port} ---")
131
+ uvicorn.run("main:app", host="0.0.0.0", port=port, reload=False)
 
services/image_processor.py ADDED
@@ -0,0 +1,284 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from services.bubble_detector_kiuyha_service import Bubble_Detector_Kiuyha_Service
2
+ from services.translate_qwen_service import Translate_Qwen_Service
3
+ from PIL import Image, ImageDraw, ImageFont
4
+ import tempfile
5
+ import os
6
+ import re
7
+ import torch
8
+ from pathlib import Path
9
+ from helpers import get_project_root, setup_fonts
10
+ from manga_ocr import MangaOcr
11
+
12
+ class ImageProcessor:
13
+ def __init__(self, bubble_detector, ocr_model, translate_model):
14
+ self.bubble_detector_model = bubble_detector
15
+ self.ocr_model = ocr_model
16
+ self.translate_model = translate_model
17
+
18
+ def process_image(self, image_path, language):
19
+ bubble_results = self.bubble_detector_model.predict(image_path)
20
+ print(f"bubble results: {bubble_results}")
21
+ img = Image.open(image_path)
22
+ draw = ImageDraw.Draw(img)
23
+
24
+ texts = []
25
+ coordinates={}
26
+ i=0
27
+ for box_data in bubble_results:
28
+ coords = box_data['coords']
29
+ draw.rectangle(coords, outline="red", width=1)
30
+ box_cropped = img.crop(coords)
31
+ # box_cropped = upscale_for_ocr(box_cropped, scale=3)
32
+ # box_cropped.show()
33
+
34
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as f:
35
+ box_cropped.save(f.name)
36
+ temp_path = f.name
37
+
38
+ text = ""
39
+ try:
40
+ text = self.ocr_model(box_cropped)
41
+ except Exception as e:
42
+ print(f"text OCR failed for {i}")
43
+
44
+ text = re.sub(r'[\n\r\u2028\u2029]+', ' ', text) #remove new lines
45
+ texts.append({"id": i, "text": text})
46
+ coordinates[i] = coords
47
+ i+=1
48
+ print(f'OCR Complete, total {len(texts)} bubbles.')
49
+
50
+ #add translated text to manga image
51
+ try:
52
+ print("Translating with cloud Qwen model...")
53
+ translated = self.translate_model.translate_cloud(texts)
54
+ except Exception as e:
55
+ print("API translation failed with Qwen, falling back to local model...")
56
+ translated = self.translate_model.translate(texts)
57
+
58
+ print(translated)
59
+
60
+ bubble_data = []
61
+ for i in range(len(texts)):
62
+ coords = coordinates[i]
63
+ x1, y1, x2, y2 = coords
64
+ original_text = texts[i]["text"]
65
+ translated_text = translated.get(str(i), translated.get(i, ""))
66
+ if not isinstance(translated_text, str):
67
+ translated_text = str(translated_text)
68
+ print(f"{i}: {original_text}")
69
+ print(translated_text)
70
+ print("==================================")
71
+
72
+ bubble_data.append({
73
+ "bubble_index": i,
74
+ "x1": float(x1), "y1": float(y1), "x2": float(x2), "y2": float(y2),
75
+ "original_text": original_text,
76
+ "translated_text": translated_text,
77
+ })
78
+
79
+ ######### Code for drawing translated text onto manga panel directly) ###########
80
+
81
+ # #wipe the space
82
+ # draw.rectangle(coords, fill="white", outline="white")
83
+
84
+ # # 1. Calculate the best fit
85
+ # lines, best_font, final_size, line_h = fit_text_to_box(draw, translated_text, coords, FONT_PATH)
86
+
87
+ # # Calculate total height of the block
88
+ # total_h = line_h * len(lines)
89
+
90
+ # # Start_y adjusted for the block height relative to the box center
91
+ # start_y = coords[1] + ((coords[3] - coords[1]) - total_h) / 2
92
+
93
+ # # 3. Draw each line centered horizontally
94
+ # for line in lines:
95
+ # line = line.strip()
96
+ # if not line: continue
97
+
98
+ # # Horizontal Centering
99
+ # line_w = draw.textlength(line, font=best_font)
100
+ # start_x = coords[0] + ((coords[2] - coords[0]) - line_w) / 2
101
+
102
+ # draw.text((start_x, start_y), line, font=best_font, fill="black")
103
+ # start_y += line_h
104
+
105
+ return bubble_data #img, bubble_data
106
+
107
+
108
+ ########Test code, keeping it here as reference. Remove later################
109
+ # def show_boxes(image_path):
110
+ # result = bubble_detector_model.predict(image_path)
111
+ # img = Image.open(image_path).convert("RGB")
112
+ # draw = ImageDraw.Draw(img)
113
+ # for box in result.boxes:
114
+ # # Get coordinates as a list of floats
115
+ # coords = box.xyxy[0].tolist() # [x1, y1, x2, y2]
116
+ # draw.rectangle(coords, outline="red", width=1)
117
+
118
+ # # label
119
+ # conf = box.conf[0].item()
120
+ # box_cropped = img.crop(coords)
121
+ # # box_cropped = upscale_for_ocr(box_cropped, scale=3)
122
+ # with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as f:
123
+ # box_cropped.save(f.name)
124
+ # temp_path = f.name
125
+ # draw.text(
126
+ # (coords[0], coords[1] - 10),
127
+ # "b",
128
+ # fill="red",
129
+ # font=font
130
+ # )
131
+ # img.show()
132
+
133
+ # def get_wrapped_text(text, font, max_width):
134
+ # lines = []
135
+ # words = text.split(' ') # Split by words for English
136
+ # current_line = []
137
+
138
+ # for word in words:
139
+ # # Check if adding the next word exceeds the width
140
+ # test_line = ' '.join(current_line + [word])
141
+ # # getlength() is more accurate than getbbox for text width
142
+ # if font.getlength(test_line) <= max_width:
143
+ # current_line.append(word)
144
+ # else:
145
+ # lines.append(' '.join(current_line))
146
+ # current_line = [word]
147
+
148
+ # lines.append(' '.join(current_line))
149
+ # return lines
150
+
151
+ # def fit_text_to_box(draw, text, box_coords, font_path, padding=5, initial_size=40):
152
+ # x1, y1, x2, y2 = box_coords
153
+
154
+ # padding = padding
155
+ # target_width = (x2 - x1) - (padding * 2)
156
+ # target_height = (y2 - y1) - (padding * 2)
157
+
158
+ # current_size = initial_size
159
+ # lines = []
160
+
161
+ # while current_size > 8:
162
+ # # index=0 for Japanese, 1 for Korean in NotoSansCJK
163
+ # font = ImageFont.truetype(font_path, size=current_size)
164
+ # lines = get_wrapped_text(text, font, target_width)
165
+
166
+ # # Use a more reliable line height measurement
167
+ # # getbbox can be inconsistent; use font.size * constant for better leading
168
+ # line_height = int(current_size * 1.2)
169
+ # total_height = line_height * len(lines)
170
+
171
+ # if total_height <= target_height:
172
+ # break
173
+ # current_size -= 2 # Step down by 2 for speed
174
+
175
+ # return lines, font, current_size, line_height
176
+
177
+ # def upscale_for_ocr(img, scale=2):
178
+ # w, h = img.size
179
+ # return img.resize((w*scale, h*scale), Image.BICUBIC)
180
+
181
+ # def process_image(image_path, language):
182
+ # bubble_results = bubble_detector_model.predict(image_path)
183
+ # print(f"bubble results: {bubble_results}")
184
+ # img = Image.open(image_path)
185
+ # draw = ImageDraw.Draw(img)
186
+
187
+ # texts = []
188
+ # coordinates={}
189
+ # i=0
190
+ # for box_data in bubble_results:
191
+ # coords = box_data['coords']
192
+ # draw.rectangle(coords, outline="red", width=1)
193
+ # box_cropped = img.crop(coords)
194
+ # # box_cropped = upscale_for_ocr(box_cropped, scale=3)
195
+ # # box_cropped.show()
196
+
197
+ # with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as f:
198
+ # box_cropped.save(f.name)
199
+ # temp_path = f.name
200
+
201
+ # text = ""
202
+ # # if language == "japanese":
203
+ # # # text = ocr_japanese_model.runOCR(temp_path)
204
+ # # text = ocr_model(temp_path)
205
+ # # else:
206
+ # # text = ocr_model.runOCR(temp_path)
207
+
208
+ # text = ocr_model(box_cropped)
209
+
210
+ # text = re.sub(r'[\n\r\u2028\u2029]+', ' ', text) #remove new lines
211
+ # texts.append({"id": i, "text": text})
212
+ # coordinates[i] = coords
213
+ # i+=1
214
+ # print(f'OCR Complete, total {len(texts)} bubbles.')
215
+
216
+ # #add translated text to manga image
217
+ # try:
218
+ # print("Translating with cloud Qwen model...")
219
+ # translated = translate_model.translate_cloud(texts)
220
+ # except Exception as e:
221
+ # print("API translation failed with Qwen, falling back to local model...")
222
+ # translated = translate_model.translate(texts)
223
+
224
+ # print(translated)
225
+
226
+ # bubble_data = []
227
+ # for i in range(len(texts)):
228
+ # coords = coordinates[i]
229
+ # x1, y1, x2, y2 = coords
230
+ # original_text = texts[i]["text"]
231
+ # translated_text = translated.get(str(i), translated.get(i, ""))
232
+ # if not isinstance(translated_text, str):
233
+ # translated_text = str(translated_text)
234
+ # print(f"{i}: {original_text}")
235
+ # print(translated_text)
236
+ # print("==================================")
237
+
238
+ # bubble_data.append({
239
+ # "bubble_index": i,
240
+ # "x1": float(x1), "y1": float(y1), "x2": float(x2), "y2": float(y2),
241
+ # "original_text": original_text,
242
+ # "translated_text": translated_text,
243
+ # })
244
+
245
+ # #wipe the space
246
+ # draw.rectangle(coords, fill="white", outline="white")
247
+
248
+ # # 1. Calculate the best fit
249
+ # lines, best_font, final_size, line_h = fit_text_to_box(draw, translated_text, coords, FONT_PATH)
250
+
251
+ # # Calculate total height of the block
252
+ # total_h = line_h * len(lines)
253
+
254
+ # # Start_y adjusted for the block height relative to the box center
255
+ # start_y = coords[1] + ((coords[3] - coords[1]) - total_h) / 2
256
+
257
+ # # 3. Draw each line centered horizontally
258
+ # for line in lines:
259
+ # line = line.strip()
260
+ # if not line: continue
261
+
262
+ # # Horizontal Centering
263
+ # line_w = draw.textlength(line, font=best_font)
264
+ # start_x = coords[0] + ((coords[2] - coords[0]) - line_w) / 2
265
+
266
+ # draw.text((start_x, start_y), line, font=best_font, fill="black")
267
+ # start_y += line_h
268
+
269
+ # return img, bubble_data
270
+
271
+ # def translate_text(text, language):
272
+ # # translated_text = ""
273
+ # # if language == "japanese":
274
+ # # translated_text =
275
+
276
+ # translated_text = translate_model.translate(text)
277
+
278
+ # return translated_text
279
+
280
+ # def _language_to_code(language: str) -> str:
281
+ # """Map language name to ISO 639-1 style code for DB."""
282
+ # m = {"japanese": "ja", "english": "en", "korean": "ko", "chinese": "zh"}
283
+ # return m.get(language.lower(), language[:2] if len(language) >= 2 else "ja")
284
+