import cv2 import numpy as np import re import pytesseract pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract" def preprocess(image_bytes): np_arr = np.frombuffer(image_bytes, np.uint8) img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) blur = cv2.GaussianBlur(gray, (5,5), 0) thresh = cv2.adaptiveThreshold( blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2 ) return thresh def extract_total(text): matches = re.findall(r'\d+\.\d{2}', text) return max([float(m) for m in matches], default=0) def scan_receipt(image_bytes): processed = preprocess(image_bytes) text = pytesseract.image_to_string(processed, config='--psm 6') total = extract_total(text) return { "raw_text": text, "total": total }