File size: 4,696 Bytes
fad436e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import logging
import cv2
import os
import numpy as np
from PIL import Image, ImageEnhance
from .base import BaseOCR
from .gradio_ocr import GradioOCREngine

class OCREngine(BaseOCR):
    def __init__(self, engine_type='paddle'):
        self.engine_type = engine_type
        self.ocr = None
        self.gradio_fallback = None
        self._initialize_engine()

    def _initialize_engine(self):
        logging.info(f"Initializing OCR engine: {self.engine_type}")
        
        # Pre-emptive Gradio initialization as it's the most reliable fallback
        try:
            self.gradio_fallback = GradioOCREngine()
        except Exception as e:
            logging.error(f"Failed to pre-initialize Gradio fallback: {e}")

        if self.engine_type == 'paddle':
            try:
                from paddleocr import PaddleOCR
                self.ocr = PaddleOCR(use_angle_cls=False, lang='en', show_log=False)
                logging.info("PaddleOCR engine initialized successfully.")
            except Exception as e:
                logging.warning(f"Failed to initialize PaddleOCR: {e}. Switching to EasyOCR fallback.")
                self.engine_type = 'easyocr'
        
        if self.engine_type == 'easyocr':
            try:
                import easyocr
                self.ocr = easyocr.Reader(['en'])
                logging.info("EasyOCR engine initialized successfully.")
            except Exception as e:
                logging.error(f"Failed to initialize EasyOCR: {e}. OCR will be partially unavailable.")
                self.ocr = None

    def preprocess_image(self, image_path, scale=2):
        try:
            image = cv2.imread(image_path)
            if image is None:
                logging.error(f"Image not found or unreadable: {image_path}")
                return None
            
            # Upscale
            height, width = image.shape[:2]
            image = cv2.resize(image, (width * scale, height * scale), interpolation=cv2.INTER_CUBIC)
            
            # Denoise
            image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
            
            # Sharpen
            kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
            image = cv2.filter2D(image, -1, kernel)
            
            # Enhance Contrast
            pil_img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
            enhancer = ImageEnhance.Contrast(pil_img)
            enhanced_image = enhancer.enhance(1.5)
            
            logging.debug(f"Preprocessing completed for {image_path}")
            return cv2.cvtColor(np.array(enhanced_image), cv2.COLOR_RGB2BGR)
        except Exception as e:
            logging.error(f"Error during image preprocessing for {image_path}: {e}")
            return None

    def extract_text(self, image_path: str) -> str:
        logging.info(f"Starting text extraction for: {os.path.basename(image_path)}")
        
        # Tiered Extraction Strategy:
        # 1. Primary Engine (Paddle/EasyOCR)
        # 2. Gradio Remote Fallback (Very reliable)
        
        extracted_text = ""
        
        # 1. Local OCR
        if self.engine_type == 'paddle' and self.ocr:
            try:
                processed_img = self.preprocess_image(image_path)
                if processed_img is not None:
                    results = self.ocr.ocr(processed_img)
                    if results and results[0]:
                        extracted_text = " ".join([line[1][0] for line in results[0]])
            except Exception as e:
                logging.error(f"PaddleOCR crashed: {e}")

        elif self.engine_type == 'easyocr' and self.ocr:
            try:
                processed_img = self.preprocess_image(image_path)
                if processed_img is not None:
                    results = self.ocr.readtext(processed_img)
                    extracted_text = " ".join([res[1] for res in results])
            except Exception as e:
                logging.error(f"EasyOCR crashed: {e}")

        # 2. Gradio Fallback if Local failed
        if not extracted_text and self.gradio_fallback:
            logging.info("Local OCR failed or returned empty. Trying Gradio OCR fallback...")
            extracted_text = self.gradio_fallback.extract_text(image_path)
        
        if extracted_text:
            logging.info(f"OCR extracted {len(extracted_text)} characters using {'Gradio' if not extracted_text else self.engine_type}.")
        else:
            logging.error("All OCR methods failed to extract text.")
            
        return extracted_text

    def process(self, image_path: str) -> str:
        return self.extract_text(image_path)