Spaces:
Sleeping
Sleeping
| import cv2 | |
| import numpy as np | |
| import torch | |
| import re | |
| from easyocr import Reader | |
| from paddleocr import TextDetection | |
| class OCRCore: | |
| def __init__(self, languages=None, max_dim=2000): | |
| self.languages = languages or ["en", "hi", "mr"] | |
| self.max_dim = max_dim | |
| try: | |
| self.detector = TextDetection(model_name="PP-OCRv5_mobile_det") | |
| except Exception: | |
| self.detector = None | |
| self.reader = Reader(self.languages, gpu=torch.cuda.is_available()) | |
| def _resize(self, img): | |
| h, w = img.shape[:2] | |
| if max(h, w) > self.max_dim: | |
| scale = self.max_dim / max(h, w) | |
| img = cv2.resize(img, (int(w*scale), int(h*scale)), interpolation=cv2.INTER_AREA) | |
| return img | |
| def _deskew(self, img): | |
| gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
| _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) | |
| cnts, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
| if not cnts: | |
| return img | |
| rect = cv2.minAreaRect(max(cnts, key=cv2.contourArea)) | |
| angle = rect[-1] | |
| if angle < -45: | |
| angle = 90 + angle | |
| elif angle > 45: | |
| angle -= 90 | |
| if abs(angle) < 0.5: | |
| return img | |
| h, w = img.shape[:2] | |
| M = cv2.getRotationMatrix2D((w//2, h//2), angle, 1.0) | |
| return cv2.warpAffine(img, M, (w, h), borderValue=(255, 255, 255)) | |
| def _enhance(self, img): | |
| den = cv2.fastNlMeansDenoisingColored(img, None, 10, 10, 7, 21) | |
| lab = cv2.cvtColor(den, cv2.COLOR_BGR2LAB) | |
| l, a, b = cv2.split(lab) | |
| clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) | |
| l = clahe.apply(l) | |
| merged = cv2.merge([l, a, b]) | |
| sharp = cv2.filter2D(cv2.cvtColor(merged, cv2.COLOR_LAB2BGR), -1, | |
| np.array([[0,-1,0],[-1,5,-1],[0,-1,0]])) | |
| return sharp | |
| def preprocess(self, img): | |
| img = self._resize(img) | |
| img = self._deskew(img) | |
| return self._enhance(img) | |
| def extract_keywords(self, txt): | |
| if not txt: | |
| return [] | |
| return [t for t in re.split(r"\s+", txt.strip()) if t] | |
| def run(self, image_path): | |
| img = cv2.imread(image_path) | |
| if img is None: | |
| return {"error": "Image not found"} | |
| img = self.preprocess(img) | |
| all_text = "" | |
| keywords = [] | |
| if self.detector: | |
| try: | |
| regions = self.detector.predict(input=image_path, batch_size=1) | |
| except: | |
| regions = [] | |
| else: | |
| regions = [] | |
| if regions: | |
| for res in regions: | |
| for poly, score in zip(res.get("dt_polys", []), res.get("dt_scores", [])): | |
| pts = np.array(poly, dtype=np.int32) | |
| x, y, w, h = cv2.boundingRect(pts) | |
| crop = img[y:y+h, x:x+w] | |
| out = self.reader.readtext(crop, detail=0) | |
| if out: | |
| t = out[0] | |
| all_text += " " + t | |
| keywords.extend(self.extract_keywords(t)) | |
| else: | |
| out = self.reader.readtext(img, detail=0) | |
| for t in out: | |
| all_text += " " + t | |
| keywords.extend(self.extract_keywords(t)) | |
| return { | |
| "ocr_text": all_text.strip(), | |
| "ocr_keywords": keywords | |
| } | |