Spaces:
Running
Running
File size: 1,627 Bytes
619b919 dcfbd2e 619b919 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 | import cv2
import numpy as np
MAX_LONG_EDGE = 4096
def preprocess_image(input_data: str | bytes | np.ndarray) -> np.ndarray:
"""
Preprocess a receipt image for OCR.
Accepts:
- File path (str)
- Raw image bytes
- Numpy array (BGR or greyscale)
Returns BGR numpy array ready for PaddleOCR.
"""
if isinstance(input_data, np.ndarray):
img = input_data.copy()
elif isinstance(input_data, (bytes, bytearray)):
arr = np.frombuffer(input_data, dtype=np.uint8)
img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
if img is None:
raise ValueError("Could not decode image from bytes")
elif isinstance(input_data, str):
img = cv2.imread(input_data, cv2.IMREAD_COLOR)
if img is None:
raise FileNotFoundError(f"Could not read image: {input_data}")
else:
raise TypeError(f"Unsupported input type: {type(input_data)}")
img = _resize_if_large(img)
return img
def boost_contrast(img: np.ndarray, strength: float = 2.0) -> np.ndarray:
"""Increase contrast centered at midpoint 128 — pushes light pixels
(ghost text) toward white and dark pixels (real ink) toward black."""
mid = 128.0
return cv2.convertScaleAbs(img, alpha=strength, beta=mid * (1 - strength))
def _resize_if_large(img: np.ndarray) -> np.ndarray:
h, w = img.shape[:2]
long_edge = max(h, w)
if long_edge <= MAX_LONG_EDGE:
return img
scale = MAX_LONG_EDGE / long_edge
new_w = int(w * scale)
new_h = int(h * scale)
return cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_AREA)
|