File size: 16,430 Bytes
66b63ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
#!/usr/bin/env python3
"""

Multi-Language OCR System for Comic Translation

===============================================



A comprehensive OCR system that automatically selects the best OCR engine

based on the source language:



- manga-ocr: Specialized for Japanese manga text

- PaddleOCR: Optimized for Chinese manhua text  

- EasyOCR: Good for Korean manhwa and multilingual text

- TrOCR: General purpose fallback OCR



Author: MangaTranslator Team

License: MIT

"""

# Standard library imports
import cv2
import numpy as np
from PIL import Image
import torch

# OCR engine imports
import easyocr

# PaddleOCR with auto-installer and runtime fixer
try:
    # Try auto-installer first
    from auto_install_paddle import ensure_paddle_ready
    ensure_paddle_ready()
    
    # Then try runtime fixer
    from paddle_runtime_fixer import create_paddle_ocr_instance, ensure_paddle_available
    PADDLE_AVAILABLE = True
    print("βœ… PaddleOCR with auto-installer and runtime fixer ready")
except ImportError:
    # Fallback to direct import
    try:
        from paddleocr import PaddleOCR
        PADDLE_AVAILABLE = True
        print("βœ… PaddleOCR direct import successful")
    except ImportError:
        PADDLE_AVAILABLE = False
        print("⚠️ PaddleOCR not available - Chinese text recognition will be disabled")

from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from manga_ocr import MangaOcr


class MultiLanguageOCR:
    """

    Multi-language OCR system that automatically selects the best OCR engine

    based on the target language for optimal text recognition.

    """
    
    def __init__(self):
        """Initialize multi-language OCR engines lazily for better performance"""
        print("πŸ”§ Initializing Multi-Language OCR engines...")
        
        # OCR engines - initialized on demand for better memory usage
        self.manga_ocr = None          # Japanese OCR (manga-ocr) - Best for manga
        self.paddle_ocr = None         # Chinese OCR (PaddleOCR) - Best for manhua
        self.easy_ocr = None           # Multi-language OCR (EasyOCR) - Good for manhwa
        self.easy_ocr_ja = None        # Japanese EasyOCR (separate instance)
        self.trocr_processor = None    # General OCR (TrOCR) - Fallback
        self.trocr_model = None
        
        print("βœ… OCR engines ready for initialization")

    def _init_manga_ocr(self):
        """Initialize Japanese manga OCR engine"""
        if self.manga_ocr is None:
            print("πŸ“š Loading manga-ocr for Japanese...")
            self.manga_ocr = MangaOcr()
            print("βœ… manga-ocr ready for Japanese text")

    def _init_paddle_ocr(self):
        """Initialize PaddleOCR for Chinese text with auto-fixing"""
        if self.paddle_ocr is None:
            print("🐼 Loading PaddleOCR for Chinese...")
            
            if not PADDLE_AVAILABLE:
                print("❌ PaddleOCR not available - skipping Chinese OCR initialization")
                return
            
            try:
                # Try using the runtime fixer first
                if 'create_paddle_ocr_instance' in globals():
                    print("πŸ”§ Using PaddleOCR runtime fixer...")
                    self.paddle_ocr = create_paddle_ocr_instance(
                        use_doc_orientation_classify=False,
                        use_doc_unwarping=False, 
                        use_textline_orientation=True,
                        lang='ch'
                    )
                    print("βœ… PaddleOCR ready for Chinese text (with auto-fixer)")
                    return
            except Exception as e:
                print(f"πŸ”§ Runtime fixer failed: {e}")
                print("πŸ’‘ Trying direct PaddleOCR initialization...")
            
            try:
                # Fallback to direct PaddleOCR initialization
                from paddleocr import PaddleOCR
                self.paddle_ocr = PaddleOCR(
                    use_doc_orientation_classify=False,
                    use_doc_unwarping=False, 
                    use_textline_orientation=True,
                    lang='ch'
                )
                print("βœ… PaddleOCR ready for Chinese text")
            except Exception as e:
                print(f"❌ PaddleOCR initialization failed: {e}")
                print("πŸ’‘ Trying fallback initialization...")
                try:
                    # Fallback to older API
                    self.paddle_ocr = PaddleOCR(use_textline_orientation=True, lang='ch')
                    print("βœ… PaddleOCR ready (fallback mode)")
                except Exception as e2:
                    print(f"❌ PaddleOCR fallback failed: {e2}")
                    print("❌ PaddleOCR not initialized")
                    self.paddle_ocr = None

    def _init_easy_ocr(self):
        """Initialize Korean manhwa OCR"""
        if self.easy_ocr is None:
            print("πŸ‘€ Loading EasyOCR for multi-language...")
            # Use only Korean and English to avoid compatibility issues
            # Japanese conflicts with other Asian languages in EasyOCR
            self.easy_ocr = easyocr.Reader(['ko', 'en'], gpu=False)
            print("βœ… EasyOCR ready for Korean + English")

    def _init_easy_ocr_ja(self):
        """Initialize Japanese EasyOCR (separate from Korean OCR)"""
        if self.easy_ocr_ja is None:
            print("πŸ‘€ Loading EasyOCR for Japanese...")
            # Japanese only works with English in EasyOCR
            self.easy_ocr_ja = easyocr.Reader(['ja', 'en'], gpu=False)
            print("βœ… EasyOCR ready for Japanese + English")

    def _init_trocr(self):
        """Initialize TrOCR for general text"""
        if self.trocr_processor is None:
            print("πŸ€– Loading TrOCR for general text...")
            self.trocr_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
            self.trocr_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed")
            print("βœ… TrOCR ready for general text")

    def extract_text(self, image, source_lang="auto", method="auto"):
        """

        Extract text from comic bubble image

        

        Args:

            image: PIL Image or numpy array

            source_lang: "ja", "zh", "ko", "en", "auto"

            method: "manga_ocr", "paddle", "easy", "trocr", "auto"

        """
        
        # Convert to PIL if numpy array
        if isinstance(image, np.ndarray):
            if image.dtype != np.uint8:
                image = (image * 255).astype(np.uint8)
            image = Image.fromarray(image)

        # Auto-select OCR based on language
        if method == "auto":
            if source_lang == "ja":
                method = "manga_ocr"  # Best for Japanese manga
            elif source_lang == "zh":
                method = "paddle"     # Best for Chinese manhua
            elif source_lang == "ko":
                method = "easy"       # Good for Korean manhwa
            elif source_lang == "en":
                method = "easy"       # Good for English comics
            else:  # auto or unknown
                method = "easy"       # EasyOCR as general fallback

        try:
            if method == "manga_ocr":
                return self._extract_with_manga_ocr(image)
            elif method == "paddle":
                return self._extract_with_paddle_ocr(image)
            elif method == "easy":
                # Use appropriate EasyOCR based on source language
                if source_lang == "ja":
                    return self._extract_with_easy_ocr_ja(image)
                else:
                    return self._extract_with_easy_ocr(image)
            elif method == "trocr":
                return self._extract_with_trocr(image)
            else:
                # Fallback to appropriate EasyOCR
                if source_lang == "ja":
                    return self._extract_with_easy_ocr_ja(image)
                else:
                    return self._extract_with_easy_ocr(image)
                
        except Exception as e:
            print(f"❌ OCR failed with {method}: {e}")
            # Smart fallback based on language
            try:
                if source_lang == "ja":
                    # For Japanese: try EasyOCR-JA -> manga-ocr
                    if method != "easy_ja":
                        return self._extract_with_easy_ocr_ja(image)
                    elif method != "manga_ocr":
                        return self._extract_with_manga_ocr(image)
                elif source_lang == "zh":
                    # For Chinese: try EasyOCR -> TrOCR
                    if method != "easy":
                        return self._extract_with_easy_ocr(image)
                    else:
                        return self._extract_with_trocr(image)
                elif source_lang == "ko":
                    # For Korean: try TrOCR -> manga-ocr
                    if method != "trocr":
                        return self._extract_with_trocr(image)
                    else:
                        return self._extract_with_manga_ocr(image)
                else:
                    # For others: general fallback
                    return self._extract_with_easy_ocr(image)
            except:
                return "OCR_ERROR"

    def _extract_with_manga_ocr(self, image):
        """Extract Japanese text using manga-ocr"""
        self._init_manga_ocr()
        try:
            text = self.manga_ocr(image)
            return text.strip()
        except Exception as e:
            print(f"❌ manga-ocr error: {e}")
            return ""

    def _extract_with_paddle_ocr(self, image):
        """Extract Chinese text using PaddleOCR"""
        self._init_paddle_ocr()
        
        if self.paddle_ocr is None:
            print("❌ PaddleOCR not initialized")
            return ""
            
        try:
            # Convert PIL to numpy for PaddleOCR
            img_array = np.array(image)
            
            # Use new PaddleOCR API (predict)
            results = self.paddle_ocr.predict(img_array)
            
            if results:
                texts = []
                
                # Parse new PaddleOCR format - OCRResult object
                for result in results:
                    try:
                        rec_texts = result['rec_texts']
                        rec_scores = result['rec_scores']
                        
                        for text, score in zip(rec_texts, rec_scores):
                            if text.strip() and score > 0.5:  # Filter by confidence and non-empty
                                texts.append(text.strip())
                    except (KeyError, TypeError) as e:
                        print(f"❌ PaddleOCR result parsing error: {e}")
                        continue
                
                return " ".join(texts) if texts else ""
            
            return ""
            
        except Exception as e:
            print(f"❌ PaddleOCR error: {e}")
            return ""

    def _extract_with_easy_ocr(self, image):
        """Extract text using EasyOCR (Korean + English)"""
        self._init_easy_ocr()
        try:
            # Convert PIL to numpy for EasyOCR
            img_array = np.array(image)
            
            # EasyOCR returns [(box, text, confidence)] or [(box, text)]
            results = self.easy_ocr.readtext(img_array, paragraph=True)
            
            if results:
                texts = []
                for result in results:
                    if len(result) >= 2:  # Handle both formats
                        bbox, text = result[0], result[1]
                        conf = result[2] if len(result) > 2 else 1.0
                        
                        if conf > 0.5:  # confidence threshold
                            texts.append(text)
                return " ".join(texts)
            return ""
            
        except Exception as e:
            print(f"❌ EasyOCR error: {e}")
            return ""

    def _extract_with_easy_ocr_ja(self, image):
        """Extract Japanese text using EasyOCR (Japanese + English only)"""
        self._init_easy_ocr_ja()
        try:
            # Convert PIL to numpy for EasyOCR
            img_array = np.array(image)
            
            # EasyOCR returns [(box, text, confidence)] or [(box, text)]
            results = self.easy_ocr_ja.readtext(img_array, paragraph=True)
            
            if results:
                texts = []
                for result in results:
                    if len(result) >= 2:  # Handle both formats
                        bbox, text = result[0], result[1]
                        conf = result[2] if len(result) > 2 else 1.0
                        
                        if conf > 0.5:  # confidence threshold
                            texts.append(text)
                return " ".join(texts)
            return ""
            
        except Exception as e:
            print(f"❌ EasyOCR Japanese error: {e}")
            return ""

    def _extract_with_trocr(self, image):
        """Extract text using TrOCR (general purpose)"""
        self._init_trocr()
        try:
            # Preprocess image
            pixel_values = self.trocr_processor(image, return_tensors="pt").pixel_values
            
            # Generate text
            generated_ids = self.trocr_model.generate(pixel_values)
            generated_text = self.trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
            
            return generated_text.strip()
            
        except Exception as e:
            print(f"❌ TrOCR error: {e}")
            return ""

    def get_best_ocr_for_language(self, source_lang):
        """Get recommended OCR method for language"""
        recommendations = {
            "ja": ("manga_ocr", "πŸ‡―πŸ‡΅ manga-ocr β†’ EasyOCR-JA (Specialized for Japanese)"),
            "zh": ("paddle", "πŸ‡¨πŸ‡³ PaddleOCR β†’ EasyOCR (Optimized for Chinese)"),
            "ko": ("easy", "πŸ‡°πŸ‡· EasyOCR β†’ TrOCR (Good for Korean manhwa)"),
            "en": ("easy", "πŸ‡ΊπŸ‡Έ EasyOCR (Multi-language support)"),
            "auto": ("easy", "🌍 EasyOCR β†’ Smart fallback (Auto-detect)")
        }
        return recommendations.get(source_lang, ("easy", "🌍 EasyOCR (Fallback)"))

    def benchmark_ocr_methods(self, image, source_lang="auto"):
        """Compare all OCR methods on the same image"""
        print(f"\nπŸ§ͺ OCR Benchmark for language: {source_lang}")
        print("=" * 60)
        
        methods = [
            ("manga_ocr", "πŸ‡―πŸ‡΅ manga-ocr"),
            ("paddle", "πŸ‡¨πŸ‡³ PaddleOCR"), 
            ("easy", "πŸ‡°πŸ‡· EasyOCR"),
            ("trocr", "πŸ€– TrOCR")
        ]
        
        results = {}
        for method, name in methods:
            try:
                import time
                start_time = time.time()
                text = self.extract_text(image, source_lang, method)
                elapsed = time.time() - start_time
                
                results[method] = {
                    'text': text,
                    'time': elapsed,
                    'success': len(text.strip()) > 0
                }
                
                print(f"{name:20} | {elapsed:5.2f}s | {text[:50]}")
                
            except Exception as e:
                results[method] = {
                    'text': f"ERROR: {e}",
                    'time': 0,
                    'success': False
                }
                print(f"{name:20} | ERROR  | {str(e)[:50]}")
        
        return results


if __name__ == "__main__":
    # Test script
    print("πŸ§ͺ Testing Multi-Language OCR")
    
    ocr = MultiLanguageOCR()
    
    # Test recommendations
    for lang in ["ja", "zh", "ko", "en", "auto"]:
        method, desc = ocr.get_best_ocr_for_language(lang)
        print(f"Language '{lang}': {desc}")