File size: 1,332 Bytes
9a5a8ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from __future__ import annotations
from typing import Tuple

import numpy as np
import cv2
from PIL import Image


def pil_to_cv(img: Image.Image) -> np.ndarray:
    """PIL RGB → OpenCV BGR np.ndarray."""
    arr = np.array(img.convert("RGB"))
    return cv2.cvtColor(arr, cv2.COLOR_RGB2BGR)


def cv_to_pil(img_bgr: np.ndarray) -> Image.Image:
    """OpenCV BGR → PIL RGB Image."""
    rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
    return Image.fromarray(rgb)


def preprocess(img: Image.Image) -> Image.Image:
    """
    Light denoise + contrast boost to help OCR.
    Conservative so we don't destroy small text.
    """
    bgr = pil_to_cv(img)

    # Denoise while preserving edges
    bgr = cv2.bilateralFilter(bgr, d=7, sigmaColor=50, sigmaSpace=50)

    # Simple contrast equalization on L channel
    lab = cv2.cvtColor(bgr, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    l = cv2.equalizeHist(l)
    lab = cv2.merge([l, a, b])
    out = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)

    return cv_to_pil(out)


def quad_to_bbox(quad) -> Tuple[int, int, int, int]:
    """
    EasyOCR returns 4 points for each detection.
    Convert that quadrilateral to an axis-aligned bbox (x1,y1,x2,y2).
    """
    xs = [int(p[0]) for p in quad]
    ys = [int(p[1]) for p in quad]
    return (min(xs), min(ys), max(xs), max(ys))