Spaces:
Running
Running
Commit ·
47dc635
1
Parent(s): c955d4f
Overhauled backend API structure
Browse files- Dockerfile +3 -1
- api.py +26 -30
- main.py +34 -244
- services/image_processor.py +284 -0
Dockerfile
CHANGED
|
@@ -4,6 +4,8 @@ FROM python:3.12-slim
|
|
| 4 |
# 2. Set environment variables
|
| 5 |
ENV PYTHONDONTWRITEBYTECODE=1
|
| 6 |
ENV PYTHONUNBUFFERED=1
|
|
|
|
|
|
|
| 7 |
|
| 8 |
# 3. Install system dependencies
|
| 9 |
RUN apt-get update && apt-get install -y libgl1 libglib2.0-0
|
|
@@ -24,5 +26,5 @@ RUN env PYTHONPATH=. python -c "from helpers import setup_fonts; setup_fonts()"
|
|
| 24 |
RUN env PYTHONPATH=. python -c "from manga_ocr import MangaOcr; MangaOcr()"
|
| 25 |
|
| 26 |
# 8. Expose and Start
|
| 27 |
-
EXPOSE
|
| 28 |
CMD ["python", "main.py"]
|
|
|
|
| 4 |
# 2. Set environment variables
|
| 5 |
ENV PYTHONDONTWRITEBYTECODE=1
|
| 6 |
ENV PYTHONUNBUFFERED=1
|
| 7 |
+
#huggingface port
|
| 8 |
+
ENV PORT=7860
|
| 9 |
|
| 10 |
# 3. Install system dependencies
|
| 11 |
RUN apt-get update && apt-get install -y libgl1 libglib2.0-0
|
|
|
|
| 26 |
RUN env PYTHONPATH=. python -c "from manga_ocr import MangaOcr; MangaOcr()"
|
| 27 |
|
| 28 |
# 8. Expose and Start
|
| 29 |
+
EXPOSE 7860
|
| 30 |
CMD ["python", "main.py"]
|
api.py
CHANGED
|
@@ -3,33 +3,35 @@ Read-only API for the frontend. Wraps db list_entries, get_segments, get_chapter
|
|
| 3 |
Run from backend: uvicorn api:app --reload --host 0.0.0.0 --port 8000
|
| 4 |
"""
|
| 5 |
|
| 6 |
-
from fastapi import
|
| 7 |
from fastapi.middleware.cors import CORSMiddleware
|
| 8 |
from fastapi.responses import JSONResponse
|
| 9 |
import proxy
|
| 10 |
from services import mangadex_service
|
|
|
|
| 11 |
import httpx
|
| 12 |
import db
|
| 13 |
from sqlmodel import Session, text
|
| 14 |
from db.models import Manga
|
| 15 |
from db.schemas import ChapterListOut, SegmentListOut
|
| 16 |
|
| 17 |
-
app = FastAPI(
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
)
|
| 22 |
|
| 23 |
-
app.add_middleware(
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
)
|
| 30 |
|
|
|
|
| 31 |
|
| 32 |
-
@
|
| 33 |
def root():
|
| 34 |
"""API root - confirms the API is running."""
|
| 35 |
return {
|
|
@@ -39,7 +41,7 @@ def root():
|
|
| 39 |
"health": "/health",
|
| 40 |
}
|
| 41 |
|
| 42 |
-
@
|
| 43 |
def health_db():
|
| 44 |
engine = db.get_engine()
|
| 45 |
try:
|
|
@@ -50,7 +52,7 @@ def health_db():
|
|
| 50 |
return {"status": "error", "detail": str(e)}
|
| 51 |
|
| 52 |
|
| 53 |
-
@
|
| 54 |
def list_mangas(
|
| 55 |
order_by: str = Query("created_at", description="manga_title | created_at | updated_at"),
|
| 56 |
order_desc: bool = Query(True, description="Sort descending"),
|
|
@@ -60,7 +62,7 @@ def list_mangas(
|
|
| 60 |
"""List mangas (manga_title, created_at, updated_at). Supports pagination."""
|
| 61 |
return db.list_mangas(order_by=order_by, order_desc=order_desc, limit=limit, offset=offset)
|
| 62 |
|
| 63 |
-
@
|
| 64 |
def list_chapters(
|
| 65 |
manga_title: str = Query(...),
|
| 66 |
provider_id: str | None = Query(None, description="e.g. local, mangadex"),
|
|
@@ -71,7 +73,7 @@ def list_chapters(
|
|
| 71 |
return db.list_chapters(manga_title, provider_id, limit=limit, offset=offset)
|
| 72 |
|
| 73 |
|
| 74 |
-
@
|
| 75 |
def get_segments(
|
| 76 |
provider_id: str | None = Query(None, description="e.g. local, mangadex"),
|
| 77 |
manga_title: str | None = Query(None),
|
|
@@ -91,7 +93,7 @@ def get_segments(
|
|
| 91 |
)
|
| 92 |
|
| 93 |
|
| 94 |
-
@
|
| 95 |
def get_chapter_segments(
|
| 96 |
provider_id: str = Query(..., description="e.g. local, mangadex"),
|
| 97 |
manga_title: str = Query(...),
|
|
@@ -104,22 +106,16 @@ def get_chapter_segments(
|
|
| 104 |
|
| 105 |
|
| 106 |
# to make sure api is running and responding
|
| 107 |
-
@
|
| 108 |
def health():
|
| 109 |
"""Health check."""
|
| 110 |
return {"status": "ok"}
|
| 111 |
|
| 112 |
-
|
| 113 |
-
@app.exception_handler(ValueError)
|
| 114 |
-
def value_error_handler(request, exc):
|
| 115 |
-
"""Return 400 for invalid provider_id etc."""
|
| 116 |
-
return JSONResponse(status_code=400, content={"detail": str(exc)})
|
| 117 |
-
|
| 118 |
###########
|
| 119 |
###########
|
| 120 |
###########
|
| 121 |
|
| 122 |
-
@
|
| 123 |
async def proxy_manga_page(chapter_id: str, page_index: int):
|
| 124 |
urls = mangadex_service.get_chapter_panel_urls(chapter_id)
|
| 125 |
if not urls or page_index >= len(urls):
|
|
@@ -127,12 +123,12 @@ async def proxy_manga_page(chapter_id: str, page_index: int):
|
|
| 127 |
|
| 128 |
return await proxy.get_manga_page_stream(urls[page_index])
|
| 129 |
|
| 130 |
-
@
|
| 131 |
async def proxy_manga_cover_art(manga_id: str, cover_url: str, size: int = 256):
|
| 132 |
url = f"https://uploads.mangadex.org/covers/{manga_id}/{cover_url}.{size}.jpg"
|
| 133 |
return await proxy.get_manga_page_stream(url)
|
| 134 |
|
| 135 |
-
@
|
| 136 |
async def get_popular_manga(
|
| 137 |
title: str = "",
|
| 138 |
limit: int = 15,
|
|
@@ -152,7 +148,7 @@ async def get_popular_manga(
|
|
| 152 |
return results
|
| 153 |
|
| 154 |
|
| 155 |
-
@
|
| 156 |
async def get_chapters(
|
| 157 |
manga_id: str,
|
| 158 |
limit: int = 100,
|
|
|
|
| 3 |
Run from backend: uvicorn api:app --reload --host 0.0.0.0 --port 8000
|
| 4 |
"""
|
| 5 |
|
| 6 |
+
from fastapi import APIRouter, Query
|
| 7 |
from fastapi.middleware.cors import CORSMiddleware
|
| 8 |
from fastapi.responses import JSONResponse
|
| 9 |
import proxy
|
| 10 |
from services import mangadex_service
|
| 11 |
+
from services.image_processor import ImageProcessor
|
| 12 |
import httpx
|
| 13 |
import db
|
| 14 |
from sqlmodel import Session, text
|
| 15 |
from db.models import Manga
|
| 16 |
from db.schemas import ChapterListOut, SegmentListOut
|
| 17 |
|
| 18 |
+
# app = FastAPI(
|
| 19 |
+
# title="Manga Translator API",
|
| 20 |
+
# description="Read endpoints for chapters and segments",
|
| 21 |
+
# version="1.0.0",
|
| 22 |
+
# )
|
| 23 |
|
| 24 |
+
# app.add_middleware(
|
| 25 |
+
# CORSMiddleware,
|
| 26 |
+
# allow_origins=["*"], # Currently allow all origins, should be restricted to specific origins in production
|
| 27 |
+
# allow_credentials=True,
|
| 28 |
+
# allow_methods=["*"],
|
| 29 |
+
# allow_headers=["*"],
|
| 30 |
+
# )
|
| 31 |
|
| 32 |
+
router = APIRouter()
|
| 33 |
|
| 34 |
+
@router.get("/")
|
| 35 |
def root():
|
| 36 |
"""API root - confirms the API is running."""
|
| 37 |
return {
|
|
|
|
| 41 |
"health": "/health",
|
| 42 |
}
|
| 43 |
|
| 44 |
+
@router.get("/health/db")
|
| 45 |
def health_db():
|
| 46 |
engine = db.get_engine()
|
| 47 |
try:
|
|
|
|
| 52 |
return {"status": "error", "detail": str(e)}
|
| 53 |
|
| 54 |
|
| 55 |
+
@router.get("/mangas", response_model=list[Manga])
|
| 56 |
def list_mangas(
|
| 57 |
order_by: str = Query("created_at", description="manga_title | created_at | updated_at"),
|
| 58 |
order_desc: bool = Query(True, description="Sort descending"),
|
|
|
|
| 62 |
"""List mangas (manga_title, created_at, updated_at). Supports pagination."""
|
| 63 |
return db.list_mangas(order_by=order_by, order_desc=order_desc, limit=limit, offset=offset)
|
| 64 |
|
| 65 |
+
@router.get("/chapters", response_model=list[ChapterListOut])
|
| 66 |
def list_chapters(
|
| 67 |
manga_title: str = Query(...),
|
| 68 |
provider_id: str | None = Query(None, description="e.g. local, mangadex"),
|
|
|
|
| 73 |
return db.list_chapters(manga_title, provider_id, limit=limit, offset=offset)
|
| 74 |
|
| 75 |
|
| 76 |
+
@router.get("/segments", response_model=list[SegmentListOut])
|
| 77 |
def get_segments(
|
| 78 |
provider_id: str | None = Query(None, description="e.g. local, mangadex"),
|
| 79 |
manga_title: str | None = Query(None),
|
|
|
|
| 93 |
)
|
| 94 |
|
| 95 |
|
| 96 |
+
@router.get("/chapters/segments", response_model=list[SegmentListOut])
|
| 97 |
def get_chapter_segments(
|
| 98 |
provider_id: str = Query(..., description="e.g. local, mangadex"),
|
| 99 |
manga_title: str = Query(...),
|
|
|
|
| 106 |
|
| 107 |
|
| 108 |
# to make sure api is running and responding
|
| 109 |
+
@router.get("/health")
|
| 110 |
def health():
|
| 111 |
"""Health check."""
|
| 112 |
return {"status": "ok"}
|
| 113 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
###########
|
| 115 |
###########
|
| 116 |
###########
|
| 117 |
|
| 118 |
+
@router.get("/api/manga/chapter/{chapter_id}/page/{page_index}")
|
| 119 |
async def proxy_manga_page(chapter_id: str, page_index: int):
|
| 120 |
urls = mangadex_service.get_chapter_panel_urls(chapter_id)
|
| 121 |
if not urls or page_index >= len(urls):
|
|
|
|
| 123 |
|
| 124 |
return await proxy.get_manga_page_stream(urls[page_index])
|
| 125 |
|
| 126 |
+
@router.get("/api/manga/cover_art")
|
| 127 |
async def proxy_manga_cover_art(manga_id: str, cover_url: str, size: int = 256):
|
| 128 |
url = f"https://uploads.mangadex.org/covers/{manga_id}/{cover_url}.{size}.jpg"
|
| 129 |
return await proxy.get_manga_page_stream(url)
|
| 130 |
|
| 131 |
+
@router.get("/api/manga/search")
|
| 132 |
async def get_popular_manga(
|
| 133 |
title: str = "",
|
| 134 |
limit: int = 15,
|
|
|
|
| 148 |
return results
|
| 149 |
|
| 150 |
|
| 151 |
+
@router.get("/api/manga/{manga_id}/chapters")
|
| 152 |
async def get_chapters(
|
| 153 |
manga_id: str,
|
| 154 |
limit: int = 100,
|
main.py
CHANGED
|
@@ -15,7 +15,31 @@ from fastapi import FastAPI
|
|
| 15 |
from typing import Optional
|
| 16 |
import db as manga_db
|
| 17 |
import uvicorn
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
|
|
|
| 19 |
|
| 20 |
###
|
| 21 |
###
|
|
@@ -44,9 +68,7 @@ device = torch.device(device_name)
|
|
| 44 |
|
| 45 |
print(f"Loading models from {MODEL_PATH} and fonts from {FONT_PATH}")
|
| 46 |
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
#####################
|
| 50 |
|
| 51 |
# GLMOCR_MODEL_DIR = MODEL_PATH / "GlmOcr"
|
| 52 |
# ocr_model = OCR_Glm_Service(GLMOCR_MODEL_DIR)
|
|
@@ -61,6 +83,7 @@ bubble_detector_model = Bubble_Detector_Kiuyha_Service(BUBBLE_DETECTOR_MODEL_DIR
|
|
| 61 |
|
| 62 |
translate_model = Translate_Qwen_Service()
|
| 63 |
|
|
|
|
| 64 |
|
| 65 |
if not FONT_PATH.exists():
|
| 66 |
print(f"Font NotoSansCJK not found at {FONT_PATH}. Attempting to download.")
|
|
@@ -75,251 +98,20 @@ if FONT_PATH.exists():
|
|
| 75 |
else:
|
| 76 |
raise FileNotFoundError(f"Font NotoSansCJK not found at {FONT_PATH}")
|
| 77 |
|
|
|
|
| 78 |
print("Finished loading all models and fonts")
|
| 79 |
|
| 80 |
###
|
| 81 |
###
|
| 82 |
###
|
| 83 |
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
def show_boxes(image_path):
|
| 89 |
-
result = bubble_detector_model.predict(image_path)
|
| 90 |
-
img = Image.open(image_path).convert("RGB")
|
| 91 |
-
draw = ImageDraw.Draw(img)
|
| 92 |
-
for box in result.boxes:
|
| 93 |
-
# Get coordinates as a list of floats
|
| 94 |
-
coords = box.xyxy[0].tolist() # [x1, y1, x2, y2]
|
| 95 |
-
draw.rectangle(coords, outline="red", width=1)
|
| 96 |
-
|
| 97 |
-
# label
|
| 98 |
-
conf = box.conf[0].item()
|
| 99 |
-
box_cropped = img.crop(coords)
|
| 100 |
-
# box_cropped = upscale_for_ocr(box_cropped, scale=3)
|
| 101 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as f:
|
| 102 |
-
box_cropped.save(f.name)
|
| 103 |
-
temp_path = f.name
|
| 104 |
-
draw.text(
|
| 105 |
-
(coords[0], coords[1] - 10),
|
| 106 |
-
"b",
|
| 107 |
-
fill="red",
|
| 108 |
-
font=font
|
| 109 |
-
)
|
| 110 |
-
img.show()
|
| 111 |
-
|
| 112 |
-
def get_wrapped_text(text, font, max_width):
|
| 113 |
-
lines = []
|
| 114 |
-
words = text.split(' ') # Split by words for English
|
| 115 |
-
current_line = []
|
| 116 |
-
|
| 117 |
-
for word in words:
|
| 118 |
-
# Check if adding the next word exceeds the width
|
| 119 |
-
test_line = ' '.join(current_line + [word])
|
| 120 |
-
# getlength() is more accurate than getbbox for text width
|
| 121 |
-
if font.getlength(test_line) <= max_width:
|
| 122 |
-
current_line.append(word)
|
| 123 |
-
else:
|
| 124 |
-
lines.append(' '.join(current_line))
|
| 125 |
-
current_line = [word]
|
| 126 |
-
|
| 127 |
-
lines.append(' '.join(current_line))
|
| 128 |
-
return lines
|
| 129 |
-
|
| 130 |
-
def fit_text_to_box(draw, text, box_coords, font_path, padding=5, initial_size=40):
|
| 131 |
-
x1, y1, x2, y2 = box_coords
|
| 132 |
-
|
| 133 |
-
padding = padding
|
| 134 |
-
target_width = (x2 - x1) - (padding * 2)
|
| 135 |
-
target_height = (y2 - y1) - (padding * 2)
|
| 136 |
-
|
| 137 |
-
current_size = initial_size
|
| 138 |
-
lines = []
|
| 139 |
-
|
| 140 |
-
while current_size > 8:
|
| 141 |
-
# index=0 for Japanese, 1 for Korean in NotoSansCJK
|
| 142 |
-
font = ImageFont.truetype(font_path, size=current_size)
|
| 143 |
-
lines = get_wrapped_text(text, font, target_width)
|
| 144 |
-
|
| 145 |
-
# Use a more reliable line height measurement
|
| 146 |
-
# getbbox can be inconsistent; use font.size * constant for better leading
|
| 147 |
-
line_height = int(current_size * 1.2)
|
| 148 |
-
total_height = line_height * len(lines)
|
| 149 |
-
|
| 150 |
-
if total_height <= target_height:
|
| 151 |
-
break
|
| 152 |
-
current_size -= 2 # Step down by 2 for speed
|
| 153 |
-
|
| 154 |
-
return lines, font, current_size, line_height
|
| 155 |
-
|
| 156 |
-
def upscale_for_ocr(img, scale=2):
|
| 157 |
-
w, h = img.size
|
| 158 |
-
return img.resize((w*scale, h*scale), Image.BICUBIC)
|
| 159 |
-
|
| 160 |
-
def process_image(image_path, language):
|
| 161 |
-
bubble_results = bubble_detector_model.predict(image_path)
|
| 162 |
-
print(f"bubble results: {bubble_results}")
|
| 163 |
-
img = Image.open(image_path)
|
| 164 |
-
draw = ImageDraw.Draw(img)
|
| 165 |
-
|
| 166 |
-
ocr_model = get_ocr()
|
| 167 |
-
texts = []
|
| 168 |
-
coordinates={}
|
| 169 |
-
i=0
|
| 170 |
-
for box_data in bubble_results:
|
| 171 |
-
print(f"box_data {i}")
|
| 172 |
-
coords = box_data['coords']
|
| 173 |
-
draw.rectangle(coords, outline="red", width=1)
|
| 174 |
-
box_cropped = img.crop(coords)
|
| 175 |
-
# box_cropped = upscale_for_ocr(box_cropped, scale=3)
|
| 176 |
-
# box_cropped.show()
|
| 177 |
-
|
| 178 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as f:
|
| 179 |
-
box_cropped.save(f.name)
|
| 180 |
-
temp_path = f.name
|
| 181 |
-
|
| 182 |
-
text = ""
|
| 183 |
-
# if language == "japanese":
|
| 184 |
-
# # text = ocr_japanese_model.runOCR(temp_path)
|
| 185 |
-
# text = ocr_model(temp_path)
|
| 186 |
-
# else:
|
| 187 |
-
# text = ocr_model.runOCR(temp_path)
|
| 188 |
-
|
| 189 |
-
try:
|
| 190 |
-
# MangaOcr is callable: mocr(image)
|
| 191 |
-
text = ocr_model(box_cropped)
|
| 192 |
-
except Exception as e:
|
| 193 |
-
print(f"OCR Error on bubble {i}: {e}")
|
| 194 |
-
|
| 195 |
-
text = re.sub(r'[\n\r\u2028\u2029]+', ' ', text) #remove new lines
|
| 196 |
-
texts.append({"id": i, "text": text})
|
| 197 |
-
coordinates[i] = coords
|
| 198 |
-
i+=1
|
| 199 |
-
print(f'OCR Complete, total {len(texts)} bubbles.')
|
| 200 |
-
|
| 201 |
-
#add translated text to manga image
|
| 202 |
-
try:
|
| 203 |
-
print("Translating with cloud Qwen model...")
|
| 204 |
-
translated = translate_model.translate_cloud(texts)
|
| 205 |
-
except Exception as e:
|
| 206 |
-
print("API translation failed with Qwen, falling back to local model...")
|
| 207 |
-
translated = translate_model.translate(texts)
|
| 208 |
-
|
| 209 |
-
print(translated)
|
| 210 |
-
|
| 211 |
-
bubble_data = []
|
| 212 |
-
for i in range(len(texts)):
|
| 213 |
-
coords = coordinates[i]
|
| 214 |
-
x1, y1, x2, y2 = coords
|
| 215 |
-
original_text = texts[i]["text"]
|
| 216 |
-
translated_text = translated.get(str(i), translated.get(i, ""))
|
| 217 |
-
if not isinstance(translated_text, str):
|
| 218 |
-
translated_text = str(translated_text)
|
| 219 |
-
print(f"{i}: {original_text}")
|
| 220 |
-
print(translated_text)
|
| 221 |
-
print("==================================")
|
| 222 |
-
|
| 223 |
-
bubble_data.append({
|
| 224 |
-
"bubble_index": i,
|
| 225 |
-
"x1": float(x1), "y1": float(y1), "x2": float(x2), "y2": float(y2),
|
| 226 |
-
"original_text": original_text,
|
| 227 |
-
"translated_text": translated_text,
|
| 228 |
-
})
|
| 229 |
-
|
| 230 |
-
#wipe the space
|
| 231 |
-
draw.rectangle(coords, fill="white", outline="white")
|
| 232 |
-
|
| 233 |
-
# 1. Calculate the best fit
|
| 234 |
-
lines, best_font, final_size, line_h = fit_text_to_box(draw, translated_text, coords, FONT_PATH)
|
| 235 |
-
|
| 236 |
-
# Calculate total height of the block
|
| 237 |
-
total_h = line_h * len(lines)
|
| 238 |
-
|
| 239 |
-
# Start_y adjusted for the block height relative to the box center
|
| 240 |
-
start_y = coords[1] + ((coords[3] - coords[1]) - total_h) / 2
|
| 241 |
-
|
| 242 |
-
# 3. Draw each line centered horizontally
|
| 243 |
-
for line in lines:
|
| 244 |
-
line = line.strip()
|
| 245 |
-
if not line: continue
|
| 246 |
-
|
| 247 |
-
# Horizontal Centering
|
| 248 |
-
line_w = draw.textlength(line, font=best_font)
|
| 249 |
-
start_x = coords[0] + ((coords[2] - coords[0]) - line_w) / 2
|
| 250 |
-
|
| 251 |
-
draw.text((start_x, start_y), line, font=best_font, fill="black")
|
| 252 |
-
start_y += line_h
|
| 253 |
-
|
| 254 |
-
return img, bubble_data
|
| 255 |
-
|
| 256 |
-
def translate_text(text, language):
|
| 257 |
-
# translated_text = ""
|
| 258 |
-
# if language == "japanese":
|
| 259 |
-
# translated_text =
|
| 260 |
-
|
| 261 |
-
translated_text = translate_model.translate(text)
|
| 262 |
-
|
| 263 |
-
return translated_text
|
| 264 |
-
|
| 265 |
-
def _language_to_code(language: str) -> str:
|
| 266 |
-
"""Map language name to ISO 639-1 style code for DB."""
|
| 267 |
-
m = {"japanese": "ja", "english": "en", "korean": "ko", "chinese": "zh"}
|
| 268 |
-
return m.get(language.lower(), language[:2] if len(language) >= 2 else "ja")
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
def process_chapter(
|
| 272 |
-
manga_title: str,
|
| 273 |
-
chapter_number: float,
|
| 274 |
-
page_paths: list,
|
| 275 |
-
language: str = "japanese",
|
| 276 |
-
provider_id: str = "local",
|
| 277 |
-
external_manga_id: Optional[str] = None,
|
| 278 |
-
db_url: str = None,
|
| 279 |
-
):
|
| 280 |
-
"""
|
| 281 |
-
Process each page of a chapter, draw translated text on images, and save
|
| 282 |
-
to the PostgreSQL text repository (provider_id, manga_title, chapter/page,
|
| 283 |
-
segment coordinates, original/translated text, language code). No images stored.
|
| 284 |
-
page_paths: list of paths to page images in order.
|
| 285 |
-
provider_id: source/provider identifier (e.g. 'mangadex', 'local').
|
| 286 |
-
db_url: PostgreSQL URL or set DATABASE_URL.
|
| 287 |
-
Returns (list of (img, bubble_data) per page).
|
| 288 |
-
"""
|
| 289 |
-
manga_db.init_db(db_url)
|
| 290 |
-
language_code = _language_to_code(language)
|
| 291 |
-
results = []
|
| 292 |
-
for page_number, image_path in enumerate(page_paths, start=1):
|
| 293 |
-
path = Path(image_path)
|
| 294 |
-
if not path.exists():
|
| 295 |
-
print(f"Skip missing page {page_number}: {path}")
|
| 296 |
-
continue
|
| 297 |
-
print(f"Processing chapter {chapter_number} page {page_number}/{len(page_paths)}: {path.name}")
|
| 298 |
-
img, bubble_data = process_image(str(path), language)
|
| 299 |
-
manga_db.save_page_translation(
|
| 300 |
-
provider_id=provider_id,
|
| 301 |
-
manga_title=manga_title,
|
| 302 |
-
chapter_number=chapter_number,
|
| 303 |
-
page_number=page_number,
|
| 304 |
-
bubbles=bubble_data,
|
| 305 |
-
language_code=language_code,
|
| 306 |
-
external_manga_id=external_manga_id,
|
| 307 |
-
db_url=db_url,
|
| 308 |
-
)
|
| 309 |
-
results.append((img, bubble_data))
|
| 310 |
-
print(f"Chapter '{manga_title}' ch.{chapter_number} saved to DB ({len(results)} pages).")
|
| 311 |
-
return results
|
| 312 |
-
|
| 313 |
|
| 314 |
-
def main():
|
| 315 |
-
img_path = "./test_2.png"
|
| 316 |
-
img, bubble_data = process_image(img_path, "japanese")
|
| 317 |
-
print(bubble_data)
|
| 318 |
-
img.show()
|
| 319 |
-
# manga_db.save_page_translation(provider_id="local", manga_title="Test", chapter_number=0,
|
| 320 |
-
# page_number=1, bubbles=bubble_data, language_code="ja")
|
| 321 |
|
| 322 |
-
@app.post("/")
|
| 323 |
def test(img_path: Optional[str] = None):
|
| 324 |
print("test called")
|
| 325 |
if not img_path:
|
|
@@ -327,15 +119,13 @@ def test(img_path: Optional[str] = None):
|
|
| 327 |
img_path = Path(img_path)
|
| 328 |
print(f"image path: {img_path}")
|
| 329 |
if img_path.exists():
|
| 330 |
-
|
| 331 |
print(bubble_data)
|
| 332 |
return {"result": bubble_data}
|
| 333 |
else:
|
| 334 |
print(f"{img_path} does not exist")
|
| 335 |
|
| 336 |
if __name__ == "__main__":
|
| 337 |
-
# main()
|
| 338 |
port = int(os.environ.get("PORT", 8000))
|
| 339 |
print(f"--- Starting Production Server on Port {port} ---")
|
| 340 |
-
uvicorn.run("
|
| 341 |
-
# uvicorn.run("main:app", host="0.0.0.0", port=port, reload=False)
|
|
|
|
| 15 |
from typing import Optional
|
| 16 |
import db as manga_db
|
| 17 |
import uvicorn
|
| 18 |
+
from manga_ocr import MangaOcr
|
| 19 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 20 |
+
from api import router as manga_router
|
| 21 |
+
from fastapi.responses import JSONResponse
|
| 22 |
+
from services.image_processor import ImageProcessor
|
| 23 |
+
|
| 24 |
+
######################
|
| 25 |
+
|
| 26 |
+
app = FastAPI(
|
| 27 |
+
title="Manga Translator API",
|
| 28 |
+
description="Read endpoints for chapters and segments",
|
| 29 |
+
version="1.0.0",
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
app.add_middleware(
|
| 33 |
+
CORSMiddleware,
|
| 34 |
+
allow_origins=["*"], # Currently allow all origins, should be restricted to specific origins in production
|
| 35 |
+
allow_credentials=True,
|
| 36 |
+
allow_methods=["*"],
|
| 37 |
+
allow_headers=["*"],
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
app.include_router(manga_router)
|
| 41 |
|
| 42 |
+
#####################
|
| 43 |
|
| 44 |
###
|
| 45 |
###
|
|
|
|
| 68 |
|
| 69 |
print(f"Loading models from {MODEL_PATH} and fonts from {FONT_PATH}")
|
| 70 |
|
| 71 |
+
################################################
|
|
|
|
|
|
|
| 72 |
|
| 73 |
# GLMOCR_MODEL_DIR = MODEL_PATH / "GlmOcr"
|
| 74 |
# ocr_model = OCR_Glm_Service(GLMOCR_MODEL_DIR)
|
|
|
|
| 83 |
|
| 84 |
translate_model = Translate_Qwen_Service()
|
| 85 |
|
| 86 |
+
ocr_model = MangaOcr()
|
| 87 |
|
| 88 |
if not FONT_PATH.exists():
|
| 89 |
print(f"Font NotoSansCJK not found at {FONT_PATH}. Attempting to download.")
|
|
|
|
| 98 |
else:
|
| 99 |
raise FileNotFoundError(f"Font NotoSansCJK not found at {FONT_PATH}")
|
| 100 |
|
| 101 |
+
processor = ImageProcessor(bubble_detector_model, ocr_model, translate_model)
|
| 102 |
print("Finished loading all models and fonts")
|
| 103 |
|
| 104 |
###
|
| 105 |
###
|
| 106 |
###
|
| 107 |
|
| 108 |
+
@app.exception_handler(ValueError)
|
| 109 |
+
def value_error_handler(request, exc):
|
| 110 |
+
"""Return 400 for invalid provider_id etc."""
|
| 111 |
+
return JSONResponse(status_code=400, content={"detail": str(exc)})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
+
@app.post("/test")
|
| 115 |
def test(img_path: Optional[str] = None):
|
| 116 |
print("test called")
|
| 117 |
if not img_path:
|
|
|
|
| 119 |
img_path = Path(img_path)
|
| 120 |
print(f"image path: {img_path}")
|
| 121 |
if img_path.exists():
|
| 122 |
+
bubble_data = processor.process_image(img_path, "japanese")
|
| 123 |
print(bubble_data)
|
| 124 |
return {"result": bubble_data}
|
| 125 |
else:
|
| 126 |
print(f"{img_path} does not exist")
|
| 127 |
|
| 128 |
if __name__ == "__main__":
|
|
|
|
| 129 |
port = int(os.environ.get("PORT", 8000))
|
| 130 |
print(f"--- Starting Production Server on Port {port} ---")
|
| 131 |
+
uvicorn.run("main:app", host="0.0.0.0", port=port, reload=False)
|
|
|
services/image_processor.py
ADDED
|
@@ -0,0 +1,284 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from services.bubble_detector_kiuyha_service import Bubble_Detector_Kiuyha_Service
|
| 2 |
+
from services.translate_qwen_service import Translate_Qwen_Service
|
| 3 |
+
from PIL import Image, ImageDraw, ImageFont
|
| 4 |
+
import tempfile
|
| 5 |
+
import os
|
| 6 |
+
import re
|
| 7 |
+
import torch
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
from helpers import get_project_root, setup_fonts
|
| 10 |
+
from manga_ocr import MangaOcr
|
| 11 |
+
|
| 12 |
+
class ImageProcessor:
|
| 13 |
+
def __init__(self, bubble_detector, ocr_model, translate_model):
|
| 14 |
+
self.bubble_detector_model = bubble_detector
|
| 15 |
+
self.ocr_model = ocr_model
|
| 16 |
+
self.translate_model = translate_model
|
| 17 |
+
|
| 18 |
+
def process_image(self, image_path, language):
|
| 19 |
+
bubble_results = self.bubble_detector_model.predict(image_path)
|
| 20 |
+
print(f"bubble results: {bubble_results}")
|
| 21 |
+
img = Image.open(image_path)
|
| 22 |
+
draw = ImageDraw.Draw(img)
|
| 23 |
+
|
| 24 |
+
texts = []
|
| 25 |
+
coordinates={}
|
| 26 |
+
i=0
|
| 27 |
+
for box_data in bubble_results:
|
| 28 |
+
coords = box_data['coords']
|
| 29 |
+
draw.rectangle(coords, outline="red", width=1)
|
| 30 |
+
box_cropped = img.crop(coords)
|
| 31 |
+
# box_cropped = upscale_for_ocr(box_cropped, scale=3)
|
| 32 |
+
# box_cropped.show()
|
| 33 |
+
|
| 34 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as f:
|
| 35 |
+
box_cropped.save(f.name)
|
| 36 |
+
temp_path = f.name
|
| 37 |
+
|
| 38 |
+
text = ""
|
| 39 |
+
try:
|
| 40 |
+
text = self.ocr_model(box_cropped)
|
| 41 |
+
except Exception as e:
|
| 42 |
+
print(f"text OCR failed for {i}")
|
| 43 |
+
|
| 44 |
+
text = re.sub(r'[\n\r\u2028\u2029]+', ' ', text) #remove new lines
|
| 45 |
+
texts.append({"id": i, "text": text})
|
| 46 |
+
coordinates[i] = coords
|
| 47 |
+
i+=1
|
| 48 |
+
print(f'OCR Complete, total {len(texts)} bubbles.')
|
| 49 |
+
|
| 50 |
+
#add translated text to manga image
|
| 51 |
+
try:
|
| 52 |
+
print("Translating with cloud Qwen model...")
|
| 53 |
+
translated = self.translate_model.translate_cloud(texts)
|
| 54 |
+
except Exception as e:
|
| 55 |
+
print("API translation failed with Qwen, falling back to local model...")
|
| 56 |
+
translated = self.translate_model.translate(texts)
|
| 57 |
+
|
| 58 |
+
print(translated)
|
| 59 |
+
|
| 60 |
+
bubble_data = []
|
| 61 |
+
for i in range(len(texts)):
|
| 62 |
+
coords = coordinates[i]
|
| 63 |
+
x1, y1, x2, y2 = coords
|
| 64 |
+
original_text = texts[i]["text"]
|
| 65 |
+
translated_text = translated.get(str(i), translated.get(i, ""))
|
| 66 |
+
if not isinstance(translated_text, str):
|
| 67 |
+
translated_text = str(translated_text)
|
| 68 |
+
print(f"{i}: {original_text}")
|
| 69 |
+
print(translated_text)
|
| 70 |
+
print("==================================")
|
| 71 |
+
|
| 72 |
+
bubble_data.append({
|
| 73 |
+
"bubble_index": i,
|
| 74 |
+
"x1": float(x1), "y1": float(y1), "x2": float(x2), "y2": float(y2),
|
| 75 |
+
"original_text": original_text,
|
| 76 |
+
"translated_text": translated_text,
|
| 77 |
+
})
|
| 78 |
+
|
| 79 |
+
######### Code for drawing translated text onto manga panel directly) ###########
|
| 80 |
+
|
| 81 |
+
# #wipe the space
|
| 82 |
+
# draw.rectangle(coords, fill="white", outline="white")
|
| 83 |
+
|
| 84 |
+
# # 1. Calculate the best fit
|
| 85 |
+
# lines, best_font, final_size, line_h = fit_text_to_box(draw, translated_text, coords, FONT_PATH)
|
| 86 |
+
|
| 87 |
+
# # Calculate total height of the block
|
| 88 |
+
# total_h = line_h * len(lines)
|
| 89 |
+
|
| 90 |
+
# # Start_y adjusted for the block height relative to the box center
|
| 91 |
+
# start_y = coords[1] + ((coords[3] - coords[1]) - total_h) / 2
|
| 92 |
+
|
| 93 |
+
# # 3. Draw each line centered horizontally
|
| 94 |
+
# for line in lines:
|
| 95 |
+
# line = line.strip()
|
| 96 |
+
# if not line: continue
|
| 97 |
+
|
| 98 |
+
# # Horizontal Centering
|
| 99 |
+
# line_w = draw.textlength(line, font=best_font)
|
| 100 |
+
# start_x = coords[0] + ((coords[2] - coords[0]) - line_w) / 2
|
| 101 |
+
|
| 102 |
+
# draw.text((start_x, start_y), line, font=best_font, fill="black")
|
| 103 |
+
# start_y += line_h
|
| 104 |
+
|
| 105 |
+
return bubble_data #img, bubble_data
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
########Test code, keeping it here as reference. Remove later################
|
| 109 |
+
# def show_boxes(image_path):
|
| 110 |
+
# result = bubble_detector_model.predict(image_path)
|
| 111 |
+
# img = Image.open(image_path).convert("RGB")
|
| 112 |
+
# draw = ImageDraw.Draw(img)
|
| 113 |
+
# for box in result.boxes:
|
| 114 |
+
# # Get coordinates as a list of floats
|
| 115 |
+
# coords = box.xyxy[0].tolist() # [x1, y1, x2, y2]
|
| 116 |
+
# draw.rectangle(coords, outline="red", width=1)
|
| 117 |
+
|
| 118 |
+
# # label
|
| 119 |
+
# conf = box.conf[0].item()
|
| 120 |
+
# box_cropped = img.crop(coords)
|
| 121 |
+
# # box_cropped = upscale_for_ocr(box_cropped, scale=3)
|
| 122 |
+
# with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as f:
|
| 123 |
+
# box_cropped.save(f.name)
|
| 124 |
+
# temp_path = f.name
|
| 125 |
+
# draw.text(
|
| 126 |
+
# (coords[0], coords[1] - 10),
|
| 127 |
+
# "b",
|
| 128 |
+
# fill="red",
|
| 129 |
+
# font=font
|
| 130 |
+
# )
|
| 131 |
+
# img.show()
|
| 132 |
+
|
| 133 |
+
# def get_wrapped_text(text, font, max_width):
|
| 134 |
+
# lines = []
|
| 135 |
+
# words = text.split(' ') # Split by words for English
|
| 136 |
+
# current_line = []
|
| 137 |
+
|
| 138 |
+
# for word in words:
|
| 139 |
+
# # Check if adding the next word exceeds the width
|
| 140 |
+
# test_line = ' '.join(current_line + [word])
|
| 141 |
+
# # getlength() is more accurate than getbbox for text width
|
| 142 |
+
# if font.getlength(test_line) <= max_width:
|
| 143 |
+
# current_line.append(word)
|
| 144 |
+
# else:
|
| 145 |
+
# lines.append(' '.join(current_line))
|
| 146 |
+
# current_line = [word]
|
| 147 |
+
|
| 148 |
+
# lines.append(' '.join(current_line))
|
| 149 |
+
# return lines
|
| 150 |
+
|
| 151 |
+
# def fit_text_to_box(draw, text, box_coords, font_path, padding=5, initial_size=40):
|
| 152 |
+
# x1, y1, x2, y2 = box_coords
|
| 153 |
+
|
| 154 |
+
# padding = padding
|
| 155 |
+
# target_width = (x2 - x1) - (padding * 2)
|
| 156 |
+
# target_height = (y2 - y1) - (padding * 2)
|
| 157 |
+
|
| 158 |
+
# current_size = initial_size
|
| 159 |
+
# lines = []
|
| 160 |
+
|
| 161 |
+
# while current_size > 8:
|
| 162 |
+
# # index=0 for Japanese, 1 for Korean in NotoSansCJK
|
| 163 |
+
# font = ImageFont.truetype(font_path, size=current_size)
|
| 164 |
+
# lines = get_wrapped_text(text, font, target_width)
|
| 165 |
+
|
| 166 |
+
# # Use a more reliable line height measurement
|
| 167 |
+
# # getbbox can be inconsistent; use font.size * constant for better leading
|
| 168 |
+
# line_height = int(current_size * 1.2)
|
| 169 |
+
# total_height = line_height * len(lines)
|
| 170 |
+
|
| 171 |
+
# if total_height <= target_height:
|
| 172 |
+
# break
|
| 173 |
+
# current_size -= 2 # Step down by 2 for speed
|
| 174 |
+
|
| 175 |
+
# return lines, font, current_size, line_height
|
| 176 |
+
|
| 177 |
+
# def upscale_for_ocr(img, scale=2):
|
| 178 |
+
# w, h = img.size
|
| 179 |
+
# return img.resize((w*scale, h*scale), Image.BICUBIC)
|
| 180 |
+
|
| 181 |
+
# def process_image(image_path, language):
|
| 182 |
+
# bubble_results = bubble_detector_model.predict(image_path)
|
| 183 |
+
# print(f"bubble results: {bubble_results}")
|
| 184 |
+
# img = Image.open(image_path)
|
| 185 |
+
# draw = ImageDraw.Draw(img)
|
| 186 |
+
|
| 187 |
+
# texts = []
|
| 188 |
+
# coordinates={}
|
| 189 |
+
# i=0
|
| 190 |
+
# for box_data in bubble_results:
|
| 191 |
+
# coords = box_data['coords']
|
| 192 |
+
# draw.rectangle(coords, outline="red", width=1)
|
| 193 |
+
# box_cropped = img.crop(coords)
|
| 194 |
+
# # box_cropped = upscale_for_ocr(box_cropped, scale=3)
|
| 195 |
+
# # box_cropped.show()
|
| 196 |
+
|
| 197 |
+
# with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as f:
|
| 198 |
+
# box_cropped.save(f.name)
|
| 199 |
+
# temp_path = f.name
|
| 200 |
+
|
| 201 |
+
# text = ""
|
| 202 |
+
# # if language == "japanese":
|
| 203 |
+
# # # text = ocr_japanese_model.runOCR(temp_path)
|
| 204 |
+
# # text = ocr_model(temp_path)
|
| 205 |
+
# # else:
|
| 206 |
+
# # text = ocr_model.runOCR(temp_path)
|
| 207 |
+
|
| 208 |
+
# text = ocr_model(box_cropped)
|
| 209 |
+
|
| 210 |
+
# text = re.sub(r'[\n\r\u2028\u2029]+', ' ', text) #remove new lines
|
| 211 |
+
# texts.append({"id": i, "text": text})
|
| 212 |
+
# coordinates[i] = coords
|
| 213 |
+
# i+=1
|
| 214 |
+
# print(f'OCR Complete, total {len(texts)} bubbles.')
|
| 215 |
+
|
| 216 |
+
# #add translated text to manga image
|
| 217 |
+
# try:
|
| 218 |
+
# print("Translating with cloud Qwen model...")
|
| 219 |
+
# translated = translate_model.translate_cloud(texts)
|
| 220 |
+
# except Exception as e:
|
| 221 |
+
# print("API translation failed with Qwen, falling back to local model...")
|
| 222 |
+
# translated = translate_model.translate(texts)
|
| 223 |
+
|
| 224 |
+
# print(translated)
|
| 225 |
+
|
| 226 |
+
# bubble_data = []
|
| 227 |
+
# for i in range(len(texts)):
|
| 228 |
+
# coords = coordinates[i]
|
| 229 |
+
# x1, y1, x2, y2 = coords
|
| 230 |
+
# original_text = texts[i]["text"]
|
| 231 |
+
# translated_text = translated.get(str(i), translated.get(i, ""))
|
| 232 |
+
# if not isinstance(translated_text, str):
|
| 233 |
+
# translated_text = str(translated_text)
|
| 234 |
+
# print(f"{i}: {original_text}")
|
| 235 |
+
# print(translated_text)
|
| 236 |
+
# print("==================================")
|
| 237 |
+
|
| 238 |
+
# bubble_data.append({
|
| 239 |
+
# "bubble_index": i,
|
| 240 |
+
# "x1": float(x1), "y1": float(y1), "x2": float(x2), "y2": float(y2),
|
| 241 |
+
# "original_text": original_text,
|
| 242 |
+
# "translated_text": translated_text,
|
| 243 |
+
# })
|
| 244 |
+
|
| 245 |
+
# #wipe the space
|
| 246 |
+
# draw.rectangle(coords, fill="white", outline="white")
|
| 247 |
+
|
| 248 |
+
# # 1. Calculate the best fit
|
| 249 |
+
# lines, best_font, final_size, line_h = fit_text_to_box(draw, translated_text, coords, FONT_PATH)
|
| 250 |
+
|
| 251 |
+
# # Calculate total height of the block
|
| 252 |
+
# total_h = line_h * len(lines)
|
| 253 |
+
|
| 254 |
+
# # Start_y adjusted for the block height relative to the box center
|
| 255 |
+
# start_y = coords[1] + ((coords[3] - coords[1]) - total_h) / 2
|
| 256 |
+
|
| 257 |
+
# # 3. Draw each line centered horizontally
|
| 258 |
+
# for line in lines:
|
| 259 |
+
# line = line.strip()
|
| 260 |
+
# if not line: continue
|
| 261 |
+
|
| 262 |
+
# # Horizontal Centering
|
| 263 |
+
# line_w = draw.textlength(line, font=best_font)
|
| 264 |
+
# start_x = coords[0] + ((coords[2] - coords[0]) - line_w) / 2
|
| 265 |
+
|
| 266 |
+
# draw.text((start_x, start_y), line, font=best_font, fill="black")
|
| 267 |
+
# start_y += line_h
|
| 268 |
+
|
| 269 |
+
# return img, bubble_data
|
| 270 |
+
|
| 271 |
+
# def translate_text(text, language):
|
| 272 |
+
# # translated_text = ""
|
| 273 |
+
# # if language == "japanese":
|
| 274 |
+
# # translated_text =
|
| 275 |
+
|
| 276 |
+
# translated_text = translate_model.translate(text)
|
| 277 |
+
|
| 278 |
+
# return translated_text
|
| 279 |
+
|
| 280 |
+
# def _language_to_code(language: str) -> str:
|
| 281 |
+
# """Map language name to ISO 639-1 style code for DB."""
|
| 282 |
+
# m = {"japanese": "ja", "english": "en", "korean": "ko", "chinese": "zh"}
|
| 283 |
+
# return m.get(language.lower(), language[:2] if len(language) >= 2 else "ja")
|
| 284 |
+
|