Spaces:
Sleeping
Sleeping
| import cv2 | |
| import numpy as np | |
| import re | |
| import pytesseract | |
| pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract" | |
| def preprocess(image_bytes): | |
| np_arr = np.frombuffer(image_bytes, np.uint8) | |
| img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR) | |
| gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
| blur = cv2.GaussianBlur(gray, (5,5), 0) | |
| thresh = cv2.adaptiveThreshold( | |
| blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, | |
| cv2.THRESH_BINARY, 11, 2 | |
| ) | |
| return thresh | |
| def extract_total(text): | |
| matches = re.findall(r'\d+\.\d{2}', text) | |
| return max([float(m) for m in matches], default=0) | |
| def scan_receipt(image_bytes): | |
| processed = preprocess(image_bytes) | |
| text = pytesseract.image_to_string(processed, config='--psm 6') | |
| total = extract_total(text) | |
| return { | |
| "raw_text": text, | |
| "total": total | |
| } |