import logging import cv2 import os import numpy as np from PIL import Image, ImageEnhance from .base import BaseOCR from .gradio_ocr import GradioOCREngine class OCREngine(BaseOCR): def __init__(self, engine_type='paddle'): self.engine_type = engine_type self.ocr = None self.gradio_fallback = None self._initialize_engine() def _initialize_engine(self): logging.info(f"Initializing OCR engine: {self.engine_type}") # Pre-emptive Gradio initialization as it's the most reliable fallback try: self.gradio_fallback = GradioOCREngine() except Exception as e: logging.error(f"Failed to pre-initialize Gradio fallback: {e}") if self.engine_type == 'paddle': try: from paddleocr import PaddleOCR self.ocr = PaddleOCR(use_angle_cls=False, lang='en', show_log=False) logging.info("PaddleOCR engine initialized successfully.") except Exception as e: logging.warning(f"Failed to initialize PaddleOCR: {e}. Switching to EasyOCR fallback.") self.engine_type = 'easyocr' if self.engine_type == 'easyocr': try: import easyocr self.ocr = easyocr.Reader(['en']) logging.info("EasyOCR engine initialized successfully.") except Exception as e: logging.error(f"Failed to initialize EasyOCR: {e}. OCR will be partially unavailable.") self.ocr = None def preprocess_image(self, image_path, scale=2): try: image = cv2.imread(image_path) if image is None: logging.error(f"Image not found or unreadable: {image_path}") return None # Upscale height, width = image.shape[:2] image = cv2.resize(image, (width * scale, height * scale), interpolation=cv2.INTER_CUBIC) # Denoise image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21) # Sharpen kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]]) image = cv2.filter2D(image, -1, kernel) # Enhance Contrast pil_img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) enhancer = ImageEnhance.Contrast(pil_img) enhanced_image = enhancer.enhance(1.5) logging.debug(f"Preprocessing completed for {image_path}") return cv2.cvtColor(np.array(enhanced_image), cv2.COLOR_RGB2BGR) except Exception as e: logging.error(f"Error during image preprocessing for {image_path}: {e}") return None def extract_text(self, image_path: str) -> str: logging.info(f"Starting text extraction for: {os.path.basename(image_path)}") # Tiered Extraction Strategy: # 1. Primary Engine (Paddle/EasyOCR) # 2. Gradio Remote Fallback (Very reliable) extracted_text = "" # 1. Local OCR if self.engine_type == 'paddle' and self.ocr: try: processed_img = self.preprocess_image(image_path) if processed_img is not None: results = self.ocr.ocr(processed_img) if results and results[0]: extracted_text = " ".join([line[1][0] for line in results[0]]) except Exception as e: logging.error(f"PaddleOCR crashed: {e}") elif self.engine_type == 'easyocr' and self.ocr: try: processed_img = self.preprocess_image(image_path) if processed_img is not None: results = self.ocr.readtext(processed_img) extracted_text = " ".join([res[1] for res in results]) except Exception as e: logging.error(f"EasyOCR crashed: {e}") # 2. Gradio Fallback if Local failed if not extracted_text and self.gradio_fallback: logging.info("Local OCR failed or returned empty. Trying Gradio OCR fallback...") extracted_text = self.gradio_fallback.extract_text(image_path) if extracted_text: logging.info(f"OCR extracted {len(extracted_text)} characters using {'Gradio' if not extracted_text else self.engine_type}.") else: logging.error("All OCR methods failed to extract text.") return extracted_text def process(self, image_path: str) -> str: return self.extract_text(image_path)