File size: 1,332 Bytes
9a5a8ff | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 | from __future__ import annotations
from typing import Tuple
import numpy as np
import cv2
from PIL import Image
def pil_to_cv(img: Image.Image) -> np.ndarray:
"""PIL RGB → OpenCV BGR np.ndarray."""
arr = np.array(img.convert("RGB"))
return cv2.cvtColor(arr, cv2.COLOR_RGB2BGR)
def cv_to_pil(img_bgr: np.ndarray) -> Image.Image:
"""OpenCV BGR → PIL RGB Image."""
rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
return Image.fromarray(rgb)
def preprocess(img: Image.Image) -> Image.Image:
"""
Light denoise + contrast boost to help OCR.
Conservative so we don't destroy small text.
"""
bgr = pil_to_cv(img)
# Denoise while preserving edges
bgr = cv2.bilateralFilter(bgr, d=7, sigmaColor=50, sigmaSpace=50)
# Simple contrast equalization on L channel
lab = cv2.cvtColor(bgr, cv2.COLOR_BGR2LAB)
l, a, b = cv2.split(lab)
l = cv2.equalizeHist(l)
lab = cv2.merge([l, a, b])
out = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)
return cv_to_pil(out)
def quad_to_bbox(quad) -> Tuple[int, int, int, int]:
"""
EasyOCR returns 4 points for each detection.
Convert that quadrilateral to an axis-aligned bbox (x1,y1,x2,y2).
"""
xs = [int(p[0]) for p in quad]
ys = [int(p[1]) for p in quad]
return (min(xs), min(ys), max(xs), max(ys)) |