Spaces:

sinful1
/

receipt-ocr

Running

File size: 1,627 Bytes

619b919
 
 
 
dcfbd2e
619b919

import cv2
import numpy as np


MAX_LONG_EDGE = 4096


def preprocess_image(input_data: str | bytes | np.ndarray) -> np.ndarray:
    """
    Preprocess a receipt image for OCR.

    Accepts:
      - File path (str)
      - Raw image bytes
      - Numpy array (BGR or greyscale)

    Returns BGR numpy array ready for PaddleOCR.
    """
    if isinstance(input_data, np.ndarray):
        img = input_data.copy()
    elif isinstance(input_data, (bytes, bytearray)):
        arr = np.frombuffer(input_data, dtype=np.uint8)
        img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
        if img is None:
            raise ValueError("Could not decode image from bytes")
    elif isinstance(input_data, str):
        img = cv2.imread(input_data, cv2.IMREAD_COLOR)
        if img is None:
            raise FileNotFoundError(f"Could not read image: {input_data}")
    else:
        raise TypeError(f"Unsupported input type: {type(input_data)}")

    img = _resize_if_large(img)
    return img


def boost_contrast(img: np.ndarray, strength: float = 2.0) -> np.ndarray:
    """Increase contrast centered at midpoint 128 — pushes light pixels
    (ghost text) toward white and dark pixels (real ink) toward black."""
    mid = 128.0
    return cv2.convertScaleAbs(img, alpha=strength, beta=mid * (1 - strength))


def _resize_if_large(img: np.ndarray) -> np.ndarray:
    h, w = img.shape[:2]
    long_edge = max(h, w)
    if long_edge <= MAX_LONG_EDGE:
        return img
    scale = MAX_LONG_EDGE / long_edge
    new_w = int(w * scale)
    new_h = int(h * scale)
    return cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_AREA)