File size: 16,430 Bytes
66b63ad | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 | #!/usr/bin/env python3
"""
Multi-Language OCR System for Comic Translation
===============================================
A comprehensive OCR system that automatically selects the best OCR engine
based on the source language:
- manga-ocr: Specialized for Japanese manga text
- PaddleOCR: Optimized for Chinese manhua text
- EasyOCR: Good for Korean manhwa and multilingual text
- TrOCR: General purpose fallback OCR
Author: MangaTranslator Team
License: MIT
"""
# Standard library imports
import cv2
import numpy as np
from PIL import Image
import torch
# OCR engine imports
import easyocr
# PaddleOCR with auto-installer and runtime fixer
try:
# Try auto-installer first
from auto_install_paddle import ensure_paddle_ready
ensure_paddle_ready()
# Then try runtime fixer
from paddle_runtime_fixer import create_paddle_ocr_instance, ensure_paddle_available
PADDLE_AVAILABLE = True
print("β
PaddleOCR with auto-installer and runtime fixer ready")
except ImportError:
# Fallback to direct import
try:
from paddleocr import PaddleOCR
PADDLE_AVAILABLE = True
print("β
PaddleOCR direct import successful")
except ImportError:
PADDLE_AVAILABLE = False
print("β οΈ PaddleOCR not available - Chinese text recognition will be disabled")
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from manga_ocr import MangaOcr
class MultiLanguageOCR:
"""
Multi-language OCR system that automatically selects the best OCR engine
based on the target language for optimal text recognition.
"""
def __init__(self):
"""Initialize multi-language OCR engines lazily for better performance"""
print("π§ Initializing Multi-Language OCR engines...")
# OCR engines - initialized on demand for better memory usage
self.manga_ocr = None # Japanese OCR (manga-ocr) - Best for manga
self.paddle_ocr = None # Chinese OCR (PaddleOCR) - Best for manhua
self.easy_ocr = None # Multi-language OCR (EasyOCR) - Good for manhwa
self.easy_ocr_ja = None # Japanese EasyOCR (separate instance)
self.trocr_processor = None # General OCR (TrOCR) - Fallback
self.trocr_model = None
print("β
OCR engines ready for initialization")
def _init_manga_ocr(self):
"""Initialize Japanese manga OCR engine"""
if self.manga_ocr is None:
print("π Loading manga-ocr for Japanese...")
self.manga_ocr = MangaOcr()
print("β
manga-ocr ready for Japanese text")
def _init_paddle_ocr(self):
"""Initialize PaddleOCR for Chinese text with auto-fixing"""
if self.paddle_ocr is None:
print("πΌ Loading PaddleOCR for Chinese...")
if not PADDLE_AVAILABLE:
print("β PaddleOCR not available - skipping Chinese OCR initialization")
return
try:
# Try using the runtime fixer first
if 'create_paddle_ocr_instance' in globals():
print("π§ Using PaddleOCR runtime fixer...")
self.paddle_ocr = create_paddle_ocr_instance(
use_doc_orientation_classify=False,
use_doc_unwarping=False,
use_textline_orientation=True,
lang='ch'
)
print("β
PaddleOCR ready for Chinese text (with auto-fixer)")
return
except Exception as e:
print(f"π§ Runtime fixer failed: {e}")
print("π‘ Trying direct PaddleOCR initialization...")
try:
# Fallback to direct PaddleOCR initialization
from paddleocr import PaddleOCR
self.paddle_ocr = PaddleOCR(
use_doc_orientation_classify=False,
use_doc_unwarping=False,
use_textline_orientation=True,
lang='ch'
)
print("β
PaddleOCR ready for Chinese text")
except Exception as e:
print(f"β PaddleOCR initialization failed: {e}")
print("π‘ Trying fallback initialization...")
try:
# Fallback to older API
self.paddle_ocr = PaddleOCR(use_textline_orientation=True, lang='ch')
print("β
PaddleOCR ready (fallback mode)")
except Exception as e2:
print(f"β PaddleOCR fallback failed: {e2}")
print("β PaddleOCR not initialized")
self.paddle_ocr = None
def _init_easy_ocr(self):
"""Initialize Korean manhwa OCR"""
if self.easy_ocr is None:
print("π Loading EasyOCR for multi-language...")
# Use only Korean and English to avoid compatibility issues
# Japanese conflicts with other Asian languages in EasyOCR
self.easy_ocr = easyocr.Reader(['ko', 'en'], gpu=False)
print("β
EasyOCR ready for Korean + English")
def _init_easy_ocr_ja(self):
"""Initialize Japanese EasyOCR (separate from Korean OCR)"""
if self.easy_ocr_ja is None:
print("π Loading EasyOCR for Japanese...")
# Japanese only works with English in EasyOCR
self.easy_ocr_ja = easyocr.Reader(['ja', 'en'], gpu=False)
print("β
EasyOCR ready for Japanese + English")
def _init_trocr(self):
"""Initialize TrOCR for general text"""
if self.trocr_processor is None:
print("π€ Loading TrOCR for general text...")
self.trocr_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
self.trocr_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed")
print("β
TrOCR ready for general text")
def extract_text(self, image, source_lang="auto", method="auto"):
"""
Extract text from comic bubble image
Args:
image: PIL Image or numpy array
source_lang: "ja", "zh", "ko", "en", "auto"
method: "manga_ocr", "paddle", "easy", "trocr", "auto"
"""
# Convert to PIL if numpy array
if isinstance(image, np.ndarray):
if image.dtype != np.uint8:
image = (image * 255).astype(np.uint8)
image = Image.fromarray(image)
# Auto-select OCR based on language
if method == "auto":
if source_lang == "ja":
method = "manga_ocr" # Best for Japanese manga
elif source_lang == "zh":
method = "paddle" # Best for Chinese manhua
elif source_lang == "ko":
method = "easy" # Good for Korean manhwa
elif source_lang == "en":
method = "easy" # Good for English comics
else: # auto or unknown
method = "easy" # EasyOCR as general fallback
try:
if method == "manga_ocr":
return self._extract_with_manga_ocr(image)
elif method == "paddle":
return self._extract_with_paddle_ocr(image)
elif method == "easy":
# Use appropriate EasyOCR based on source language
if source_lang == "ja":
return self._extract_with_easy_ocr_ja(image)
else:
return self._extract_with_easy_ocr(image)
elif method == "trocr":
return self._extract_with_trocr(image)
else:
# Fallback to appropriate EasyOCR
if source_lang == "ja":
return self._extract_with_easy_ocr_ja(image)
else:
return self._extract_with_easy_ocr(image)
except Exception as e:
print(f"β OCR failed with {method}: {e}")
# Smart fallback based on language
try:
if source_lang == "ja":
# For Japanese: try EasyOCR-JA -> manga-ocr
if method != "easy_ja":
return self._extract_with_easy_ocr_ja(image)
elif method != "manga_ocr":
return self._extract_with_manga_ocr(image)
elif source_lang == "zh":
# For Chinese: try EasyOCR -> TrOCR
if method != "easy":
return self._extract_with_easy_ocr(image)
else:
return self._extract_with_trocr(image)
elif source_lang == "ko":
# For Korean: try TrOCR -> manga-ocr
if method != "trocr":
return self._extract_with_trocr(image)
else:
return self._extract_with_manga_ocr(image)
else:
# For others: general fallback
return self._extract_with_easy_ocr(image)
except:
return "OCR_ERROR"
def _extract_with_manga_ocr(self, image):
"""Extract Japanese text using manga-ocr"""
self._init_manga_ocr()
try:
text = self.manga_ocr(image)
return text.strip()
except Exception as e:
print(f"β manga-ocr error: {e}")
return ""
def _extract_with_paddle_ocr(self, image):
"""Extract Chinese text using PaddleOCR"""
self._init_paddle_ocr()
if self.paddle_ocr is None:
print("β PaddleOCR not initialized")
return ""
try:
# Convert PIL to numpy for PaddleOCR
img_array = np.array(image)
# Use new PaddleOCR API (predict)
results = self.paddle_ocr.predict(img_array)
if results:
texts = []
# Parse new PaddleOCR format - OCRResult object
for result in results:
try:
rec_texts = result['rec_texts']
rec_scores = result['rec_scores']
for text, score in zip(rec_texts, rec_scores):
if text.strip() and score > 0.5: # Filter by confidence and non-empty
texts.append(text.strip())
except (KeyError, TypeError) as e:
print(f"β PaddleOCR result parsing error: {e}")
continue
return " ".join(texts) if texts else ""
return ""
except Exception as e:
print(f"β PaddleOCR error: {e}")
return ""
def _extract_with_easy_ocr(self, image):
"""Extract text using EasyOCR (Korean + English)"""
self._init_easy_ocr()
try:
# Convert PIL to numpy for EasyOCR
img_array = np.array(image)
# EasyOCR returns [(box, text, confidence)] or [(box, text)]
results = self.easy_ocr.readtext(img_array, paragraph=True)
if results:
texts = []
for result in results:
if len(result) >= 2: # Handle both formats
bbox, text = result[0], result[1]
conf = result[2] if len(result) > 2 else 1.0
if conf > 0.5: # confidence threshold
texts.append(text)
return " ".join(texts)
return ""
except Exception as e:
print(f"β EasyOCR error: {e}")
return ""
def _extract_with_easy_ocr_ja(self, image):
"""Extract Japanese text using EasyOCR (Japanese + English only)"""
self._init_easy_ocr_ja()
try:
# Convert PIL to numpy for EasyOCR
img_array = np.array(image)
# EasyOCR returns [(box, text, confidence)] or [(box, text)]
results = self.easy_ocr_ja.readtext(img_array, paragraph=True)
if results:
texts = []
for result in results:
if len(result) >= 2: # Handle both formats
bbox, text = result[0], result[1]
conf = result[2] if len(result) > 2 else 1.0
if conf > 0.5: # confidence threshold
texts.append(text)
return " ".join(texts)
return ""
except Exception as e:
print(f"β EasyOCR Japanese error: {e}")
return ""
def _extract_with_trocr(self, image):
"""Extract text using TrOCR (general purpose)"""
self._init_trocr()
try:
# Preprocess image
pixel_values = self.trocr_processor(image, return_tensors="pt").pixel_values
# Generate text
generated_ids = self.trocr_model.generate(pixel_values)
generated_text = self.trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
return generated_text.strip()
except Exception as e:
print(f"β TrOCR error: {e}")
return ""
def get_best_ocr_for_language(self, source_lang):
"""Get recommended OCR method for language"""
recommendations = {
"ja": ("manga_ocr", "π―π΅ manga-ocr β EasyOCR-JA (Specialized for Japanese)"),
"zh": ("paddle", "π¨π³ PaddleOCR β EasyOCR (Optimized for Chinese)"),
"ko": ("easy", "π°π· EasyOCR β TrOCR (Good for Korean manhwa)"),
"en": ("easy", "πΊπΈ EasyOCR (Multi-language support)"),
"auto": ("easy", "π EasyOCR β Smart fallback (Auto-detect)")
}
return recommendations.get(source_lang, ("easy", "π EasyOCR (Fallback)"))
def benchmark_ocr_methods(self, image, source_lang="auto"):
"""Compare all OCR methods on the same image"""
print(f"\nπ§ͺ OCR Benchmark for language: {source_lang}")
print("=" * 60)
methods = [
("manga_ocr", "π―π΅ manga-ocr"),
("paddle", "π¨π³ PaddleOCR"),
("easy", "π°π· EasyOCR"),
("trocr", "π€ TrOCR")
]
results = {}
for method, name in methods:
try:
import time
start_time = time.time()
text = self.extract_text(image, source_lang, method)
elapsed = time.time() - start_time
results[method] = {
'text': text,
'time': elapsed,
'success': len(text.strip()) > 0
}
print(f"{name:20} | {elapsed:5.2f}s | {text[:50]}")
except Exception as e:
results[method] = {
'text': f"ERROR: {e}",
'time': 0,
'success': False
}
print(f"{name:20} | ERROR | {str(e)[:50]}")
return results
if __name__ == "__main__":
# Test script
print("π§ͺ Testing Multi-Language OCR")
ocr = MultiLanguageOCR()
# Test recommendations
for lang in ["ja", "zh", "ko", "en", "auto"]:
method, desc = ocr.get_best_ocr_for_language(lang)
print(f"Language '{lang}': {desc}")
|