|
|
|
|
|
"""
|
|
|
Multi-Language OCR System for Comic Translation
|
|
|
===============================================
|
|
|
|
|
|
A comprehensive OCR system that automatically selects the best OCR engine
|
|
|
based on the source language:
|
|
|
|
|
|
- manga-ocr: Specialized for Japanese manga text
|
|
|
- PaddleOCR: Optimized for Chinese manhua text
|
|
|
- EasyOCR: Good for Korean manhwa and multilingual text
|
|
|
- TrOCR: General purpose fallback OCR
|
|
|
|
|
|
Author: MangaTranslator Team
|
|
|
License: MIT
|
|
|
"""
|
|
|
|
|
|
|
|
|
import cv2
|
|
|
import numpy as np
|
|
|
from PIL import Image
|
|
|
import torch
|
|
|
|
|
|
|
|
|
import easyocr
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
from auto_install_paddle import ensure_paddle_ready
|
|
|
ensure_paddle_ready()
|
|
|
|
|
|
|
|
|
from paddle_runtime_fixer import create_paddle_ocr_instance, ensure_paddle_available
|
|
|
PADDLE_AVAILABLE = True
|
|
|
print("β
PaddleOCR with auto-installer and runtime fixer ready")
|
|
|
except ImportError:
|
|
|
|
|
|
try:
|
|
|
from paddleocr import PaddleOCR
|
|
|
PADDLE_AVAILABLE = True
|
|
|
print("β
PaddleOCR direct import successful")
|
|
|
except ImportError:
|
|
|
PADDLE_AVAILABLE = False
|
|
|
print("β οΈ PaddleOCR not available - Chinese text recognition will be disabled")
|
|
|
|
|
|
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
|
|
from manga_ocr import MangaOcr
|
|
|
|
|
|
|
|
|
class MultiLanguageOCR:
|
|
|
"""
|
|
|
Multi-language OCR system that automatically selects the best OCR engine
|
|
|
based on the target language for optimal text recognition.
|
|
|
"""
|
|
|
|
|
|
def __init__(self):
|
|
|
"""Initialize multi-language OCR engines lazily for better performance"""
|
|
|
print("π§ Initializing Multi-Language OCR engines...")
|
|
|
|
|
|
|
|
|
self.manga_ocr = None
|
|
|
self.paddle_ocr = None
|
|
|
self.easy_ocr = None
|
|
|
self.easy_ocr_ja = None
|
|
|
self.trocr_processor = None
|
|
|
self.trocr_model = None
|
|
|
|
|
|
print("β
OCR engines ready for initialization")
|
|
|
|
|
|
def _init_manga_ocr(self):
|
|
|
"""Initialize Japanese manga OCR engine"""
|
|
|
if self.manga_ocr is None:
|
|
|
print("π Loading manga-ocr for Japanese...")
|
|
|
self.manga_ocr = MangaOcr()
|
|
|
print("β
manga-ocr ready for Japanese text")
|
|
|
|
|
|
def _init_paddle_ocr(self):
|
|
|
"""Initialize PaddleOCR for Chinese text with auto-fixing"""
|
|
|
if self.paddle_ocr is None:
|
|
|
print("πΌ Loading PaddleOCR for Chinese...")
|
|
|
|
|
|
if not PADDLE_AVAILABLE:
|
|
|
print("β PaddleOCR not available - skipping Chinese OCR initialization")
|
|
|
return
|
|
|
|
|
|
try:
|
|
|
|
|
|
if 'create_paddle_ocr_instance' in globals():
|
|
|
print("π§ Using PaddleOCR runtime fixer...")
|
|
|
self.paddle_ocr = create_paddle_ocr_instance(
|
|
|
use_doc_orientation_classify=False,
|
|
|
use_doc_unwarping=False,
|
|
|
use_textline_orientation=True,
|
|
|
lang='ch'
|
|
|
)
|
|
|
print("β
PaddleOCR ready for Chinese text (with auto-fixer)")
|
|
|
return
|
|
|
except Exception as e:
|
|
|
print(f"π§ Runtime fixer failed: {e}")
|
|
|
print("π‘ Trying direct PaddleOCR initialization...")
|
|
|
|
|
|
try:
|
|
|
|
|
|
from paddleocr import PaddleOCR
|
|
|
self.paddle_ocr = PaddleOCR(
|
|
|
use_doc_orientation_classify=False,
|
|
|
use_doc_unwarping=False,
|
|
|
use_textline_orientation=True,
|
|
|
lang='ch'
|
|
|
)
|
|
|
print("β
PaddleOCR ready for Chinese text")
|
|
|
except Exception as e:
|
|
|
print(f"β PaddleOCR initialization failed: {e}")
|
|
|
print("π‘ Trying fallback initialization...")
|
|
|
try:
|
|
|
|
|
|
self.paddle_ocr = PaddleOCR(use_textline_orientation=True, lang='ch')
|
|
|
print("β
PaddleOCR ready (fallback mode)")
|
|
|
except Exception as e2:
|
|
|
print(f"β PaddleOCR fallback failed: {e2}")
|
|
|
print("β PaddleOCR not initialized")
|
|
|
self.paddle_ocr = None
|
|
|
|
|
|
def _init_easy_ocr(self):
|
|
|
"""Initialize Korean manhwa OCR"""
|
|
|
if self.easy_ocr is None:
|
|
|
print("π Loading EasyOCR for multi-language...")
|
|
|
|
|
|
|
|
|
self.easy_ocr = easyocr.Reader(['ko', 'en'], gpu=False)
|
|
|
print("β
EasyOCR ready for Korean + English")
|
|
|
|
|
|
def _init_easy_ocr_ja(self):
|
|
|
"""Initialize Japanese EasyOCR (separate from Korean OCR)"""
|
|
|
if self.easy_ocr_ja is None:
|
|
|
print("π Loading EasyOCR for Japanese...")
|
|
|
|
|
|
self.easy_ocr_ja = easyocr.Reader(['ja', 'en'], gpu=False)
|
|
|
print("β
EasyOCR ready for Japanese + English")
|
|
|
|
|
|
def _init_trocr(self):
|
|
|
"""Initialize TrOCR for general text"""
|
|
|
if self.trocr_processor is None:
|
|
|
print("π€ Loading TrOCR for general text...")
|
|
|
self.trocr_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
|
|
|
self.trocr_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed")
|
|
|
print("β
TrOCR ready for general text")
|
|
|
|
|
|
def extract_text(self, image, source_lang="auto", method="auto"):
|
|
|
"""
|
|
|
Extract text from comic bubble image
|
|
|
|
|
|
Args:
|
|
|
image: PIL Image or numpy array
|
|
|
source_lang: "ja", "zh", "ko", "en", "auto"
|
|
|
method: "manga_ocr", "paddle", "easy", "trocr", "auto"
|
|
|
"""
|
|
|
|
|
|
|
|
|
if isinstance(image, np.ndarray):
|
|
|
if image.dtype != np.uint8:
|
|
|
image = (image * 255).astype(np.uint8)
|
|
|
image = Image.fromarray(image)
|
|
|
|
|
|
|
|
|
if method == "auto":
|
|
|
if source_lang == "ja":
|
|
|
method = "manga_ocr"
|
|
|
elif source_lang == "zh":
|
|
|
method = "paddle"
|
|
|
elif source_lang == "ko":
|
|
|
method = "easy"
|
|
|
elif source_lang == "en":
|
|
|
method = "easy"
|
|
|
else:
|
|
|
method = "easy"
|
|
|
|
|
|
try:
|
|
|
if method == "manga_ocr":
|
|
|
return self._extract_with_manga_ocr(image)
|
|
|
elif method == "paddle":
|
|
|
return self._extract_with_paddle_ocr(image)
|
|
|
elif method == "easy":
|
|
|
|
|
|
if source_lang == "ja":
|
|
|
return self._extract_with_easy_ocr_ja(image)
|
|
|
else:
|
|
|
return self._extract_with_easy_ocr(image)
|
|
|
elif method == "trocr":
|
|
|
return self._extract_with_trocr(image)
|
|
|
else:
|
|
|
|
|
|
if source_lang == "ja":
|
|
|
return self._extract_with_easy_ocr_ja(image)
|
|
|
else:
|
|
|
return self._extract_with_easy_ocr(image)
|
|
|
|
|
|
except Exception as e:
|
|
|
print(f"β OCR failed with {method}: {e}")
|
|
|
|
|
|
try:
|
|
|
if source_lang == "ja":
|
|
|
|
|
|
if method != "easy_ja":
|
|
|
return self._extract_with_easy_ocr_ja(image)
|
|
|
elif method != "manga_ocr":
|
|
|
return self._extract_with_manga_ocr(image)
|
|
|
elif source_lang == "zh":
|
|
|
|
|
|
if method != "easy":
|
|
|
return self._extract_with_easy_ocr(image)
|
|
|
else:
|
|
|
return self._extract_with_trocr(image)
|
|
|
elif source_lang == "ko":
|
|
|
|
|
|
if method != "trocr":
|
|
|
return self._extract_with_trocr(image)
|
|
|
else:
|
|
|
return self._extract_with_manga_ocr(image)
|
|
|
else:
|
|
|
|
|
|
return self._extract_with_easy_ocr(image)
|
|
|
except:
|
|
|
return "OCR_ERROR"
|
|
|
|
|
|
def _extract_with_manga_ocr(self, image):
|
|
|
"""Extract Japanese text using manga-ocr"""
|
|
|
self._init_manga_ocr()
|
|
|
try:
|
|
|
text = self.manga_ocr(image)
|
|
|
return text.strip()
|
|
|
except Exception as e:
|
|
|
print(f"β manga-ocr error: {e}")
|
|
|
return ""
|
|
|
|
|
|
def _extract_with_paddle_ocr(self, image):
|
|
|
"""Extract Chinese text using PaddleOCR"""
|
|
|
self._init_paddle_ocr()
|
|
|
|
|
|
if self.paddle_ocr is None:
|
|
|
print("β PaddleOCR not initialized")
|
|
|
return ""
|
|
|
|
|
|
try:
|
|
|
|
|
|
img_array = np.array(image)
|
|
|
|
|
|
|
|
|
results = self.paddle_ocr.predict(img_array)
|
|
|
|
|
|
if results:
|
|
|
texts = []
|
|
|
|
|
|
|
|
|
for result in results:
|
|
|
try:
|
|
|
rec_texts = result['rec_texts']
|
|
|
rec_scores = result['rec_scores']
|
|
|
|
|
|
for text, score in zip(rec_texts, rec_scores):
|
|
|
if text.strip() and score > 0.5:
|
|
|
texts.append(text.strip())
|
|
|
except (KeyError, TypeError) as e:
|
|
|
print(f"β PaddleOCR result parsing error: {e}")
|
|
|
continue
|
|
|
|
|
|
return " ".join(texts) if texts else ""
|
|
|
|
|
|
return ""
|
|
|
|
|
|
except Exception as e:
|
|
|
print(f"β PaddleOCR error: {e}")
|
|
|
return ""
|
|
|
|
|
|
def _extract_with_easy_ocr(self, image):
|
|
|
"""Extract text using EasyOCR (Korean + English)"""
|
|
|
self._init_easy_ocr()
|
|
|
try:
|
|
|
|
|
|
img_array = np.array(image)
|
|
|
|
|
|
|
|
|
results = self.easy_ocr.readtext(img_array, paragraph=True)
|
|
|
|
|
|
if results:
|
|
|
texts = []
|
|
|
for result in results:
|
|
|
if len(result) >= 2:
|
|
|
bbox, text = result[0], result[1]
|
|
|
conf = result[2] if len(result) > 2 else 1.0
|
|
|
|
|
|
if conf > 0.5:
|
|
|
texts.append(text)
|
|
|
return " ".join(texts)
|
|
|
return ""
|
|
|
|
|
|
except Exception as e:
|
|
|
print(f"β EasyOCR error: {e}")
|
|
|
return ""
|
|
|
|
|
|
def _extract_with_easy_ocr_ja(self, image):
|
|
|
"""Extract Japanese text using EasyOCR (Japanese + English only)"""
|
|
|
self._init_easy_ocr_ja()
|
|
|
try:
|
|
|
|
|
|
img_array = np.array(image)
|
|
|
|
|
|
|
|
|
results = self.easy_ocr_ja.readtext(img_array, paragraph=True)
|
|
|
|
|
|
if results:
|
|
|
texts = []
|
|
|
for result in results:
|
|
|
if len(result) >= 2:
|
|
|
bbox, text = result[0], result[1]
|
|
|
conf = result[2] if len(result) > 2 else 1.0
|
|
|
|
|
|
if conf > 0.5:
|
|
|
texts.append(text)
|
|
|
return " ".join(texts)
|
|
|
return ""
|
|
|
|
|
|
except Exception as e:
|
|
|
print(f"β EasyOCR Japanese error: {e}")
|
|
|
return ""
|
|
|
|
|
|
def _extract_with_trocr(self, image):
|
|
|
"""Extract text using TrOCR (general purpose)"""
|
|
|
self._init_trocr()
|
|
|
try:
|
|
|
|
|
|
pixel_values = self.trocr_processor(image, return_tensors="pt").pixel_values
|
|
|
|
|
|
|
|
|
generated_ids = self.trocr_model.generate(pixel_values)
|
|
|
generated_text = self.trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
|
|
|
|
|
return generated_text.strip()
|
|
|
|
|
|
except Exception as e:
|
|
|
print(f"β TrOCR error: {e}")
|
|
|
return ""
|
|
|
|
|
|
def get_best_ocr_for_language(self, source_lang):
|
|
|
"""Get recommended OCR method for language"""
|
|
|
recommendations = {
|
|
|
"ja": ("manga_ocr", "π―π΅ manga-ocr β EasyOCR-JA (Specialized for Japanese)"),
|
|
|
"zh": ("paddle", "π¨π³ PaddleOCR β EasyOCR (Optimized for Chinese)"),
|
|
|
"ko": ("easy", "π°π· EasyOCR β TrOCR (Good for Korean manhwa)"),
|
|
|
"en": ("easy", "πΊπΈ EasyOCR (Multi-language support)"),
|
|
|
"auto": ("easy", "π EasyOCR β Smart fallback (Auto-detect)")
|
|
|
}
|
|
|
return recommendations.get(source_lang, ("easy", "π EasyOCR (Fallback)"))
|
|
|
|
|
|
def benchmark_ocr_methods(self, image, source_lang="auto"):
|
|
|
"""Compare all OCR methods on the same image"""
|
|
|
print(f"\nπ§ͺ OCR Benchmark for language: {source_lang}")
|
|
|
print("=" * 60)
|
|
|
|
|
|
methods = [
|
|
|
("manga_ocr", "π―π΅ manga-ocr"),
|
|
|
("paddle", "π¨π³ PaddleOCR"),
|
|
|
("easy", "π°π· EasyOCR"),
|
|
|
("trocr", "π€ TrOCR")
|
|
|
]
|
|
|
|
|
|
results = {}
|
|
|
for method, name in methods:
|
|
|
try:
|
|
|
import time
|
|
|
start_time = time.time()
|
|
|
text = self.extract_text(image, source_lang, method)
|
|
|
elapsed = time.time() - start_time
|
|
|
|
|
|
results[method] = {
|
|
|
'text': text,
|
|
|
'time': elapsed,
|
|
|
'success': len(text.strip()) > 0
|
|
|
}
|
|
|
|
|
|
print(f"{name:20} | {elapsed:5.2f}s | {text[:50]}")
|
|
|
|
|
|
except Exception as e:
|
|
|
results[method] = {
|
|
|
'text': f"ERROR: {e}",
|
|
|
'time': 0,
|
|
|
'success': False
|
|
|
}
|
|
|
print(f"{name:20} | ERROR | {str(e)[:50]}")
|
|
|
|
|
|
return results
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
|
print("π§ͺ Testing Multi-Language OCR")
|
|
|
|
|
|
ocr = MultiLanguageOCR()
|
|
|
|
|
|
|
|
|
for lang in ["ja", "zh", "ko", "en", "auto"]:
|
|
|
method, desc = ocr.get_best_ocr_for_language(lang)
|
|
|
print(f"Language '{lang}': {desc}")
|
|
|
|