import cv2 import numpy as np import torch import re from easyocr import Reader from paddleocr import TextDetection class OCRCore: def __init__(self, languages=None, max_dim=2000): self.languages = languages or ["en", "hi", "mr"] self.max_dim = max_dim try: self.detector = TextDetection(model_name="PP-OCRv5_mobile_det") except Exception: self.detector = None self.reader = Reader(self.languages, gpu=torch.cuda.is_available()) def _resize(self, img): h, w = img.shape[:2] if max(h, w) > self.max_dim: scale = self.max_dim / max(h, w) img = cv2.resize(img, (int(w*scale), int(h*scale)), interpolation=cv2.INTER_AREA) return img def _deskew(self, img): gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) cnts, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) if not cnts: return img rect = cv2.minAreaRect(max(cnts, key=cv2.contourArea)) angle = rect[-1] if angle < -45: angle = 90 + angle elif angle > 45: angle -= 90 if abs(angle) < 0.5: return img h, w = img.shape[:2] M = cv2.getRotationMatrix2D((w//2, h//2), angle, 1.0) return cv2.warpAffine(img, M, (w, h), borderValue=(255, 255, 255)) def _enhance(self, img): den = cv2.fastNlMeansDenoisingColored(img, None, 10, 10, 7, 21) lab = cv2.cvtColor(den, cv2.COLOR_BGR2LAB) l, a, b = cv2.split(lab) clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) l = clahe.apply(l) merged = cv2.merge([l, a, b]) sharp = cv2.filter2D(cv2.cvtColor(merged, cv2.COLOR_LAB2BGR), -1, np.array([[0,-1,0],[-1,5,-1],[0,-1,0]])) return sharp def preprocess(self, img): img = self._resize(img) img = self._deskew(img) return self._enhance(img) def extract_keywords(self, txt): if not txt: return [] return [t for t in re.split(r"\s+", txt.strip()) if t] def run(self, image_path): img = cv2.imread(image_path) if img is None: return {"error": "Image not found"} img = self.preprocess(img) all_text = "" keywords = [] if self.detector: try: regions = self.detector.predict(input=image_path, batch_size=1) except: regions = [] else: regions = [] if regions: for res in regions: for poly, score in zip(res.get("dt_polys", []), res.get("dt_scores", [])): pts = np.array(poly, dtype=np.int32) x, y, w, h = cv2.boundingRect(pts) crop = img[y:y+h, x:x+w] out = self.reader.readtext(crop, detail=0) if out: t = out[0] all_text += " " + t keywords.extend(self.extract_keywords(t)) else: out = self.reader.readtext(img, detail=0) for t in out: all_text += " " + t keywords.extend(self.extract_keywords(t)) return { "ocr_text": all_text.strip(), "ocr_keywords": keywords }