paddleeasyocr / ocr_core.py
triflix's picture
Create ocr_core.py
7d610d3 verified
import cv2
import numpy as np
import torch
import re
from easyocr import Reader
from paddleocr import TextDetection
class OCRCore:
def __init__(self, languages=None, max_dim=2000):
self.languages = languages or ["en", "hi", "mr"]
self.max_dim = max_dim
try:
self.detector = TextDetection(model_name="PP-OCRv5_mobile_det")
except Exception:
self.detector = None
self.reader = Reader(self.languages, gpu=torch.cuda.is_available())
def _resize(self, img):
h, w = img.shape[:2]
if max(h, w) > self.max_dim:
scale = self.max_dim / max(h, w)
img = cv2.resize(img, (int(w*scale), int(h*scale)), interpolation=cv2.INTER_AREA)
return img
def _deskew(self, img):
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
cnts, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if not cnts:
return img
rect = cv2.minAreaRect(max(cnts, key=cv2.contourArea))
angle = rect[-1]
if angle < -45:
angle = 90 + angle
elif angle > 45:
angle -= 90
if abs(angle) < 0.5:
return img
h, w = img.shape[:2]
M = cv2.getRotationMatrix2D((w//2, h//2), angle, 1.0)
return cv2.warpAffine(img, M, (w, h), borderValue=(255, 255, 255))
def _enhance(self, img):
den = cv2.fastNlMeansDenoisingColored(img, None, 10, 10, 7, 21)
lab = cv2.cvtColor(den, cv2.COLOR_BGR2LAB)
l, a, b = cv2.split(lab)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
l = clahe.apply(l)
merged = cv2.merge([l, a, b])
sharp = cv2.filter2D(cv2.cvtColor(merged, cv2.COLOR_LAB2BGR), -1,
np.array([[0,-1,0],[-1,5,-1],[0,-1,0]]))
return sharp
def preprocess(self, img):
img = self._resize(img)
img = self._deskew(img)
return self._enhance(img)
def extract_keywords(self, txt):
if not txt:
return []
return [t for t in re.split(r"\s+", txt.strip()) if t]
def run(self, image_path):
img = cv2.imread(image_path)
if img is None:
return {"error": "Image not found"}
img = self.preprocess(img)
all_text = ""
keywords = []
if self.detector:
try:
regions = self.detector.predict(input=image_path, batch_size=1)
except:
regions = []
else:
regions = []
if regions:
for res in regions:
for poly, score in zip(res.get("dt_polys", []), res.get("dt_scores", [])):
pts = np.array(poly, dtype=np.int32)
x, y, w, h = cv2.boundingRect(pts)
crop = img[y:y+h, x:x+w]
out = self.reader.readtext(crop, detail=0)
if out:
t = out[0]
all_text += " " + t
keywords.extend(self.extract_keywords(t))
else:
out = self.reader.readtext(img, detail=0)
for t in out:
all_text += " " + t
keywords.extend(self.extract_keywords(t))
return {
"ocr_text": all_text.strip(),
"ocr_keywords": keywords
}