testmanga / multi_ocr.py
haaaaus's picture
Upload 24 files
66b63ad verified
#!/usr/bin/env python3
"""
Multi-Language OCR System for Comic Translation
===============================================
A comprehensive OCR system that automatically selects the best OCR engine
based on the source language:
- manga-ocr: Specialized for Japanese manga text
- PaddleOCR: Optimized for Chinese manhua text
- EasyOCR: Good for Korean manhwa and multilingual text
- TrOCR: General purpose fallback OCR
Author: MangaTranslator Team
License: MIT
"""
# Standard library imports
import cv2
import numpy as np
from PIL import Image
import torch
# OCR engine imports
import easyocr
# PaddleOCR with auto-installer and runtime fixer
try:
# Try auto-installer first
from auto_install_paddle import ensure_paddle_ready
ensure_paddle_ready()
# Then try runtime fixer
from paddle_runtime_fixer import create_paddle_ocr_instance, ensure_paddle_available
PADDLE_AVAILABLE = True
print("βœ… PaddleOCR with auto-installer and runtime fixer ready")
except ImportError:
# Fallback to direct import
try:
from paddleocr import PaddleOCR
PADDLE_AVAILABLE = True
print("βœ… PaddleOCR direct import successful")
except ImportError:
PADDLE_AVAILABLE = False
print("⚠️ PaddleOCR not available - Chinese text recognition will be disabled")
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from manga_ocr import MangaOcr
class MultiLanguageOCR:
"""
Multi-language OCR system that automatically selects the best OCR engine
based on the target language for optimal text recognition.
"""
def __init__(self):
"""Initialize multi-language OCR engines lazily for better performance"""
print("πŸ”§ Initializing Multi-Language OCR engines...")
# OCR engines - initialized on demand for better memory usage
self.manga_ocr = None # Japanese OCR (manga-ocr) - Best for manga
self.paddle_ocr = None # Chinese OCR (PaddleOCR) - Best for manhua
self.easy_ocr = None # Multi-language OCR (EasyOCR) - Good for manhwa
self.easy_ocr_ja = None # Japanese EasyOCR (separate instance)
self.trocr_processor = None # General OCR (TrOCR) - Fallback
self.trocr_model = None
print("βœ… OCR engines ready for initialization")
def _init_manga_ocr(self):
"""Initialize Japanese manga OCR engine"""
if self.manga_ocr is None:
print("πŸ“š Loading manga-ocr for Japanese...")
self.manga_ocr = MangaOcr()
print("βœ… manga-ocr ready for Japanese text")
def _init_paddle_ocr(self):
"""Initialize PaddleOCR for Chinese text with auto-fixing"""
if self.paddle_ocr is None:
print("🐼 Loading PaddleOCR for Chinese...")
if not PADDLE_AVAILABLE:
print("❌ PaddleOCR not available - skipping Chinese OCR initialization")
return
try:
# Try using the runtime fixer first
if 'create_paddle_ocr_instance' in globals():
print("πŸ”§ Using PaddleOCR runtime fixer...")
self.paddle_ocr = create_paddle_ocr_instance(
use_doc_orientation_classify=False,
use_doc_unwarping=False,
use_textline_orientation=True,
lang='ch'
)
print("βœ… PaddleOCR ready for Chinese text (with auto-fixer)")
return
except Exception as e:
print(f"πŸ”§ Runtime fixer failed: {e}")
print("πŸ’‘ Trying direct PaddleOCR initialization...")
try:
# Fallback to direct PaddleOCR initialization
from paddleocr import PaddleOCR
self.paddle_ocr = PaddleOCR(
use_doc_orientation_classify=False,
use_doc_unwarping=False,
use_textline_orientation=True,
lang='ch'
)
print("βœ… PaddleOCR ready for Chinese text")
except Exception as e:
print(f"❌ PaddleOCR initialization failed: {e}")
print("πŸ’‘ Trying fallback initialization...")
try:
# Fallback to older API
self.paddle_ocr = PaddleOCR(use_textline_orientation=True, lang='ch')
print("βœ… PaddleOCR ready (fallback mode)")
except Exception as e2:
print(f"❌ PaddleOCR fallback failed: {e2}")
print("❌ PaddleOCR not initialized")
self.paddle_ocr = None
def _init_easy_ocr(self):
"""Initialize Korean manhwa OCR"""
if self.easy_ocr is None:
print("πŸ‘€ Loading EasyOCR for multi-language...")
# Use only Korean and English to avoid compatibility issues
# Japanese conflicts with other Asian languages in EasyOCR
self.easy_ocr = easyocr.Reader(['ko', 'en'], gpu=False)
print("βœ… EasyOCR ready for Korean + English")
def _init_easy_ocr_ja(self):
"""Initialize Japanese EasyOCR (separate from Korean OCR)"""
if self.easy_ocr_ja is None:
print("πŸ‘€ Loading EasyOCR for Japanese...")
# Japanese only works with English in EasyOCR
self.easy_ocr_ja = easyocr.Reader(['ja', 'en'], gpu=False)
print("βœ… EasyOCR ready for Japanese + English")
def _init_trocr(self):
"""Initialize TrOCR for general text"""
if self.trocr_processor is None:
print("πŸ€– Loading TrOCR for general text...")
self.trocr_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
self.trocr_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed")
print("βœ… TrOCR ready for general text")
def extract_text(self, image, source_lang="auto", method="auto"):
"""
Extract text from comic bubble image
Args:
image: PIL Image or numpy array
source_lang: "ja", "zh", "ko", "en", "auto"
method: "manga_ocr", "paddle", "easy", "trocr", "auto"
"""
# Convert to PIL if numpy array
if isinstance(image, np.ndarray):
if image.dtype != np.uint8:
image = (image * 255).astype(np.uint8)
image = Image.fromarray(image)
# Auto-select OCR based on language
if method == "auto":
if source_lang == "ja":
method = "manga_ocr" # Best for Japanese manga
elif source_lang == "zh":
method = "paddle" # Best for Chinese manhua
elif source_lang == "ko":
method = "easy" # Good for Korean manhwa
elif source_lang == "en":
method = "easy" # Good for English comics
else: # auto or unknown
method = "easy" # EasyOCR as general fallback
try:
if method == "manga_ocr":
return self._extract_with_manga_ocr(image)
elif method == "paddle":
return self._extract_with_paddle_ocr(image)
elif method == "easy":
# Use appropriate EasyOCR based on source language
if source_lang == "ja":
return self._extract_with_easy_ocr_ja(image)
else:
return self._extract_with_easy_ocr(image)
elif method == "trocr":
return self._extract_with_trocr(image)
else:
# Fallback to appropriate EasyOCR
if source_lang == "ja":
return self._extract_with_easy_ocr_ja(image)
else:
return self._extract_with_easy_ocr(image)
except Exception as e:
print(f"❌ OCR failed with {method}: {e}")
# Smart fallback based on language
try:
if source_lang == "ja":
# For Japanese: try EasyOCR-JA -> manga-ocr
if method != "easy_ja":
return self._extract_with_easy_ocr_ja(image)
elif method != "manga_ocr":
return self._extract_with_manga_ocr(image)
elif source_lang == "zh":
# For Chinese: try EasyOCR -> TrOCR
if method != "easy":
return self._extract_with_easy_ocr(image)
else:
return self._extract_with_trocr(image)
elif source_lang == "ko":
# For Korean: try TrOCR -> manga-ocr
if method != "trocr":
return self._extract_with_trocr(image)
else:
return self._extract_with_manga_ocr(image)
else:
# For others: general fallback
return self._extract_with_easy_ocr(image)
except:
return "OCR_ERROR"
def _extract_with_manga_ocr(self, image):
"""Extract Japanese text using manga-ocr"""
self._init_manga_ocr()
try:
text = self.manga_ocr(image)
return text.strip()
except Exception as e:
print(f"❌ manga-ocr error: {e}")
return ""
def _extract_with_paddle_ocr(self, image):
"""Extract Chinese text using PaddleOCR"""
self._init_paddle_ocr()
if self.paddle_ocr is None:
print("❌ PaddleOCR not initialized")
return ""
try:
# Convert PIL to numpy for PaddleOCR
img_array = np.array(image)
# Use new PaddleOCR API (predict)
results = self.paddle_ocr.predict(img_array)
if results:
texts = []
# Parse new PaddleOCR format - OCRResult object
for result in results:
try:
rec_texts = result['rec_texts']
rec_scores = result['rec_scores']
for text, score in zip(rec_texts, rec_scores):
if text.strip() and score > 0.5: # Filter by confidence and non-empty
texts.append(text.strip())
except (KeyError, TypeError) as e:
print(f"❌ PaddleOCR result parsing error: {e}")
continue
return " ".join(texts) if texts else ""
return ""
except Exception as e:
print(f"❌ PaddleOCR error: {e}")
return ""
def _extract_with_easy_ocr(self, image):
"""Extract text using EasyOCR (Korean + English)"""
self._init_easy_ocr()
try:
# Convert PIL to numpy for EasyOCR
img_array = np.array(image)
# EasyOCR returns [(box, text, confidence)] or [(box, text)]
results = self.easy_ocr.readtext(img_array, paragraph=True)
if results:
texts = []
for result in results:
if len(result) >= 2: # Handle both formats
bbox, text = result[0], result[1]
conf = result[2] if len(result) > 2 else 1.0
if conf > 0.5: # confidence threshold
texts.append(text)
return " ".join(texts)
return ""
except Exception as e:
print(f"❌ EasyOCR error: {e}")
return ""
def _extract_with_easy_ocr_ja(self, image):
"""Extract Japanese text using EasyOCR (Japanese + English only)"""
self._init_easy_ocr_ja()
try:
# Convert PIL to numpy for EasyOCR
img_array = np.array(image)
# EasyOCR returns [(box, text, confidence)] or [(box, text)]
results = self.easy_ocr_ja.readtext(img_array, paragraph=True)
if results:
texts = []
for result in results:
if len(result) >= 2: # Handle both formats
bbox, text = result[0], result[1]
conf = result[2] if len(result) > 2 else 1.0
if conf > 0.5: # confidence threshold
texts.append(text)
return " ".join(texts)
return ""
except Exception as e:
print(f"❌ EasyOCR Japanese error: {e}")
return ""
def _extract_with_trocr(self, image):
"""Extract text using TrOCR (general purpose)"""
self._init_trocr()
try:
# Preprocess image
pixel_values = self.trocr_processor(image, return_tensors="pt").pixel_values
# Generate text
generated_ids = self.trocr_model.generate(pixel_values)
generated_text = self.trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
return generated_text.strip()
except Exception as e:
print(f"❌ TrOCR error: {e}")
return ""
def get_best_ocr_for_language(self, source_lang):
"""Get recommended OCR method for language"""
recommendations = {
"ja": ("manga_ocr", "πŸ‡―πŸ‡΅ manga-ocr β†’ EasyOCR-JA (Specialized for Japanese)"),
"zh": ("paddle", "πŸ‡¨πŸ‡³ PaddleOCR β†’ EasyOCR (Optimized for Chinese)"),
"ko": ("easy", "πŸ‡°πŸ‡· EasyOCR β†’ TrOCR (Good for Korean manhwa)"),
"en": ("easy", "πŸ‡ΊπŸ‡Έ EasyOCR (Multi-language support)"),
"auto": ("easy", "🌍 EasyOCR β†’ Smart fallback (Auto-detect)")
}
return recommendations.get(source_lang, ("easy", "🌍 EasyOCR (Fallback)"))
def benchmark_ocr_methods(self, image, source_lang="auto"):
"""Compare all OCR methods on the same image"""
print(f"\nπŸ§ͺ OCR Benchmark for language: {source_lang}")
print("=" * 60)
methods = [
("manga_ocr", "πŸ‡―πŸ‡΅ manga-ocr"),
("paddle", "πŸ‡¨πŸ‡³ PaddleOCR"),
("easy", "πŸ‡°πŸ‡· EasyOCR"),
("trocr", "πŸ€– TrOCR")
]
results = {}
for method, name in methods:
try:
import time
start_time = time.time()
text = self.extract_text(image, source_lang, method)
elapsed = time.time() - start_time
results[method] = {
'text': text,
'time': elapsed,
'success': len(text.strip()) > 0
}
print(f"{name:20} | {elapsed:5.2f}s | {text[:50]}")
except Exception as e:
results[method] = {
'text': f"ERROR: {e}",
'time': 0,
'success': False
}
print(f"{name:20} | ERROR | {str(e)[:50]}")
return results
if __name__ == "__main__":
# Test script
print("πŸ§ͺ Testing Multi-Language OCR")
ocr = MultiLanguageOCR()
# Test recommendations
for lang in ["ja", "zh", "ko", "en", "auto"]:
method, desc = ocr.get_best_ocr_for_language(lang)
print(f"Language '{lang}': {desc}")