Spaces:
Runtime error
Runtime error
| import re | |
| from datetime import datetime | |
| from .base import BaseParser | |
| class KauflandParser(BaseParser): | |
| def parse(self, text: str) -> dict: | |
| lines = text.splitlines() | |
| receipt = { | |
| "store": "Kaufland", | |
| "date": None, | |
| "address": None, | |
| "items": [], | |
| "parser_used": "KauflandParser" | |
| } | |
| # --- Address Extraction --- | |
| for line in lines: | |
| if "Zagreb" in line and "," in line: | |
| receipt["address"] = line.strip() | |
| break | |
| # --- Date Extraction --- | |
| date_match = re.search(r'DATUM[:]?\s*(\d{2})/(\d{2})/(\d{2})', text, re.IGNORECASE) | |
| if date_match: | |
| year, month, day = date_match.groups() | |
| receipt["date"] = f"20{year}-{month.zfill(2)}-{day.zfill(2)}" | |
| # --- Item Extraction --- | |
| skip_keywords = { | |
| "račun", "cijena", "oib", "zagreb", "kartica", "odobreno", "vrijeme", "br.", | |
| "pdv", "kontakt", "iznos", "rrn", "aid", "mid", "pan", "za platiti", "za vratiti", | |
| "neto", "bruto", "prodaja", "terminal", "potvrde", "jir", "zkir", "tc", "datum", | |
| "kaufland", "www.", "središće", "knifera" | |
| } | |
| item_pattern = re.compile(r'^[A-Za-zČĆŽŠĐčćžšđ\s\-\.\d/]+$') | |
| price_pattern = re.compile(r'(\d+,\d{2})\s*(?:E|C|EUR)?$', re.IGNORECASE) | |
| potential_items = [] | |
| for i, line in enumerate(lines): | |
| line = line.strip() | |
| if not line: | |
| continue | |
| line_lower = line.lower() | |
| if any(kw in line_lower for kw in skip_keywords): | |
| continue | |
| # Ignore receipt number format like 22043/4430/30 | |
| if re.match(r'^\d{3,6}/\d{3,6}/?\d*$', line): | |
| continue | |
| # Match product name | |
| if item_pattern.match(line) and len(line) > 4: | |
| potential_items.append(line) | |
| continue | |
| # Match price line | |
| price_match = price_pattern.search(line) | |
| if price_match and potential_items: | |
| price = float(price_match.group(1).replace(',', '.')) | |
| name = potential_items.pop(0) | |
| receipt["items"].append({ | |
| "name": name, | |
| "quantity": 1.0, | |
| "price": price | |
| }) | |
| return receipt | |