import cv2 import numpy as np MAX_LONG_EDGE = 4096 def preprocess_image(input_data: str | bytes | np.ndarray) -> np.ndarray: """ Preprocess a receipt image for OCR. Accepts: - File path (str) - Raw image bytes - Numpy array (BGR or greyscale) Returns BGR numpy array ready for PaddleOCR. """ if isinstance(input_data, np.ndarray): img = input_data.copy() elif isinstance(input_data, (bytes, bytearray)): arr = np.frombuffer(input_data, dtype=np.uint8) img = cv2.imdecode(arr, cv2.IMREAD_COLOR) if img is None: raise ValueError("Could not decode image from bytes") elif isinstance(input_data, str): img = cv2.imread(input_data, cv2.IMREAD_COLOR) if img is None: raise FileNotFoundError(f"Could not read image: {input_data}") else: raise TypeError(f"Unsupported input type: {type(input_data)}") img = _resize_if_large(img) return img def boost_contrast(img: np.ndarray, strength: float = 2.0) -> np.ndarray: """Increase contrast centered at midpoint 128 — pushes light pixels (ghost text) toward white and dark pixels (real ink) toward black.""" mid = 128.0 return cv2.convertScaleAbs(img, alpha=strength, beta=mid * (1 - strength)) def _resize_if_large(img: np.ndarray) -> np.ndarray: h, w = img.shape[:2] long_edge = max(h, w) if long_edge <= MAX_LONG_EDGE: return img scale = MAX_LONG_EDGE / long_edge new_w = int(w * scale) new_h = int(h * scale) return cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_AREA)