File size: 1,627 Bytes
619b919
 
 
 
dcfbd2e
619b919
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import cv2
import numpy as np


MAX_LONG_EDGE = 4096


def preprocess_image(input_data: str | bytes | np.ndarray) -> np.ndarray:
    """
    Preprocess a receipt image for OCR.

    Accepts:
      - File path (str)
      - Raw image bytes
      - Numpy array (BGR or greyscale)

    Returns BGR numpy array ready for PaddleOCR.
    """
    if isinstance(input_data, np.ndarray):
        img = input_data.copy()
    elif isinstance(input_data, (bytes, bytearray)):
        arr = np.frombuffer(input_data, dtype=np.uint8)
        img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
        if img is None:
            raise ValueError("Could not decode image from bytes")
    elif isinstance(input_data, str):
        img = cv2.imread(input_data, cv2.IMREAD_COLOR)
        if img is None:
            raise FileNotFoundError(f"Could not read image: {input_data}")
    else:
        raise TypeError(f"Unsupported input type: {type(input_data)}")

    img = _resize_if_large(img)
    return img


def boost_contrast(img: np.ndarray, strength: float = 2.0) -> np.ndarray:
    """Increase contrast centered at midpoint 128 — pushes light pixels
    (ghost text) toward white and dark pixels (real ink) toward black."""
    mid = 128.0
    return cv2.convertScaleAbs(img, alpha=strength, beta=mid * (1 - strength))


def _resize_if_large(img: np.ndarray) -> np.ndarray:
    h, w = img.shape[:2]
    long_edge = max(h, w)
    if long_edge <= MAX_LONG_EDGE:
        return img
    scale = MAX_LONG_EDGE / long_edge
    new_w = int(w * scale)
    new_h = int(h * scale)
    return cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_AREA)