Spaces:

dal4933
/

TEST-FRANKO

Runtime error

TEST-FRANKO / receipt_processor /parsers /kaufland_parser.py

Kristijan Nincevic

Fixed spar and Kaufland parsers, raised rate limiter

1d22489 about 1 year ago

2.42 kB

	import re
	from datetime import datetime
	from .base import BaseParser

	class KauflandParser(BaseParser):
	def parse(self, text: str) -> dict:
	lines = text.splitlines()
	receipt = {
	"store": "Kaufland",
	"date": None,
	"address": None,
	"items": [],
	"parser_used": "KauflandParser"
	}

	# --- Address Extraction ---
	for line in lines:
	if "Zagreb" in line and "," in line:
	receipt["address"] = line.strip()
	break

	# --- Date Extraction ---
	date_match = re.search(r'DATUM[:]?\s*(\d{2})/(\d{2})/(\d{2})', text, re.IGNORECASE)
	if date_match:
	year, month, day = date_match.groups()
	receipt["date"] = f"20{year}-{month.zfill(2)}-{day.zfill(2)}"

	# --- Item Extraction ---
	skip_keywords = {
	"račun", "cijena", "oib", "zagreb", "kartica", "odobreno", "vrijeme", "br.",
	"pdv", "kontakt", "iznos", "rrn", "aid", "mid", "pan", "za platiti", "za vratiti",
	"neto", "bruto", "prodaja", "terminal", "potvrde", "jir", "zkir", "tc", "datum",
	"kaufland", "www.", "središće", "knifera"
	}

	item_pattern = re.compile(r'^[A-Za-zČĆŽŠĐčćžšđ\s\-\.\d/]+$')
	price_pattern = re.compile(r'(\d+,\d{2})\s*(?:E\|C\|EUR)?$', re.IGNORECASE)

	potential_items = []

	for i, line in enumerate(lines):
	line = line.strip()
	if not line:
	continue

	line_lower = line.lower()

	if any(kw in line_lower for kw in skip_keywords):
	continue

	# Ignore receipt number format like 22043/4430/30
	if re.match(r'^\d{3,6}/\d{3,6}/?\d*$', line):
	continue

	# Match product name
	if item_pattern.match(line) and len(line) > 4:
	potential_items.append(line)
	continue

	# Match price line
	price_match = price_pattern.search(line)
	if price_match and potential_items:
	price = float(price_match.group(1).replace(',', '.'))
	name = potential_items.pop(0)
	receipt["items"].append({
	"name": name,
	"quantity": 1.0,
	"price": price
	})


	return receipt