receipt-ocr / utils /image_prep.py
sinful1992
perf(ocr): raise image cap to 4096px for higher quality
dcfbd2e
import cv2
import numpy as np
MAX_LONG_EDGE = 4096
def preprocess_image(input_data: str | bytes | np.ndarray) -> np.ndarray:
"""
Preprocess a receipt image for OCR.
Accepts:
- File path (str)
- Raw image bytes
- Numpy array (BGR or greyscale)
Returns BGR numpy array ready for PaddleOCR.
"""
if isinstance(input_data, np.ndarray):
img = input_data.copy()
elif isinstance(input_data, (bytes, bytearray)):
arr = np.frombuffer(input_data, dtype=np.uint8)
img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
if img is None:
raise ValueError("Could not decode image from bytes")
elif isinstance(input_data, str):
img = cv2.imread(input_data, cv2.IMREAD_COLOR)
if img is None:
raise FileNotFoundError(f"Could not read image: {input_data}")
else:
raise TypeError(f"Unsupported input type: {type(input_data)}")
img = _resize_if_large(img)
return img
def boost_contrast(img: np.ndarray, strength: float = 2.0) -> np.ndarray:
"""Increase contrast centered at midpoint 128 — pushes light pixels
(ghost text) toward white and dark pixels (real ink) toward black."""
mid = 128.0
return cv2.convertScaleAbs(img, alpha=strength, beta=mid * (1 - strength))
def _resize_if_large(img: np.ndarray) -> np.ndarray:
h, w = img.shape[:2]
long_edge = max(h, w)
if long_edge <= MAX_LONG_EDGE:
return img
scale = MAX_LONG_EDGE / long_edge
new_w = int(w * scale)
new_h = int(h * scale)
return cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_AREA)