ImageDataExtractor2

Running

File size: 4,696 Bytes

fad436e

import logging
import cv2
import os
import numpy as np
from PIL import Image, ImageEnhance
from .base import BaseOCR
from .gradio_ocr import GradioOCREngine

class OCREngine(BaseOCR):
    def __init__(self, engine_type='paddle'):
        self.engine_type = engine_type
        self.ocr = None
        self.gradio_fallback = None
        self._initialize_engine()

    def _initialize_engine(self):
        logging.info(f"Initializing OCR engine: {self.engine_type}")
        
        # Pre-emptive Gradio initialization as it's the most reliable fallback
        try:
            self.gradio_fallback = GradioOCREngine()
        except Exception as e:
            logging.error(f"Failed to pre-initialize Gradio fallback: {e}")

        if self.engine_type == 'paddle':
            try:
                from paddleocr import PaddleOCR
                self.ocr = PaddleOCR(use_angle_cls=False, lang='en', show_log=False)
                logging.info("PaddleOCR engine initialized successfully.")
            except Exception as e:
                logging.warning(f"Failed to initialize PaddleOCR: {e}. Switching to EasyOCR fallback.")
                self.engine_type = 'easyocr'
        
        if self.engine_type == 'easyocr':
            try:
                import easyocr
                self.ocr = easyocr.Reader(['en'])
                logging.info("EasyOCR engine initialized successfully.")
            except Exception as e:
                logging.error(f"Failed to initialize EasyOCR: {e}. OCR will be partially unavailable.")
                self.ocr = None

    def preprocess_image(self, image_path, scale=2):
        try:
            image = cv2.imread(image_path)
            if image is None:
                logging.error(f"Image not found or unreadable: {image_path}")
                return None
            
            # Upscale
            height, width = image.shape[:2]
            image = cv2.resize(image, (width * scale, height * scale), interpolation=cv2.INTER_CUBIC)
            
            # Denoise
            image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
            
            # Sharpen
            kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
            image = cv2.filter2D(image, -1, kernel)
            
            # Enhance Contrast
            pil_img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
            enhancer = ImageEnhance.Contrast(pil_img)
            enhanced_image = enhancer.enhance(1.5)
            
            logging.debug(f"Preprocessing completed for {image_path}")
            return cv2.cvtColor(np.array(enhanced_image), cv2.COLOR_RGB2BGR)
        except Exception as e:
            logging.error(f"Error during image preprocessing for {image_path}: {e}")
            return None

    def extract_text(self, image_path: str) -> str:
        logging.info(f"Starting text extraction for: {os.path.basename(image_path)}")
        
        # Tiered Extraction Strategy:
        # 1. Primary Engine (Paddle/EasyOCR)
        # 2. Gradio Remote Fallback (Very reliable)
        
        extracted_text = ""
        
        # 1. Local OCR
        if self.engine_type == 'paddle' and self.ocr:
            try:
                processed_img = self.preprocess_image(image_path)
                if processed_img is not None:
                    results = self.ocr.ocr(processed_img)
                    if results and results[0]:
                        extracted_text = " ".join([line[1][0] for line in results[0]])
            except Exception as e:
                logging.error(f"PaddleOCR crashed: {e}")

        elif self.engine_type == 'easyocr' and self.ocr:
            try:
                processed_img = self.preprocess_image(image_path)
                if processed_img is not None:
                    results = self.ocr.readtext(processed_img)
                    extracted_text = " ".join([res[1] for res in results])
            except Exception as e:
                logging.error(f"EasyOCR crashed: {e}")

        # 2. Gradio Fallback if Local failed
        if not extracted_text and self.gradio_fallback:
            logging.info("Local OCR failed or returned empty. Trying Gradio OCR fallback...")
            extracted_text = self.gradio_fallback.extract_text(image_path)
        
        if extracted_text:
            logging.info(f"OCR extracted {len(extracted_text)} characters using {'Gradio' if not extracted_text else self.engine_type}.")
        else:
            logging.error("All OCR methods failed to extract text.")
            
        return extracted_text

    def process(self, image_path: str) -> str:
        return self.extract_text(image_path)