Spaces:
Runtime error
Runtime error
Kristijan Nincevic
Updated Konzum, Kaufland and Studenac parsers, added a folder for locally testing the parsers
5e55f4d | from .base import BaseParser | |
| import re | |
| from datetime import datetime | |
| class StudenacParser(BaseParser): | |
| def parse(self, text: str) -> dict: | |
| result = { | |
| "store": "Studenac", | |
| "date": None, | |
| "address": None, | |
| "items": [], | |
| "parser_used": "StudenacParser" | |
| } | |
| # --- Address Extraction --- | |
| address_match = re.search(r'Prodavaonica \d+\n([^\n]+)', text) | |
| if address_match: | |
| result['address'] = address_match.group(1).strip() | |
| # --- Date Extraction --- | |
| date_match = re.search(r'Datum[:\s]*(\d{2}\.\d{2}\.\d{4})', text) | |
| if date_match: | |
| try: | |
| result['date'] = datetime.strptime(date_match.group(1), '%d.%m.%Y').date().isoformat() | |
| except Exception: | |
| pass | |
| # --- Items Extraction --- | |
| lines = [l.strip() for l in text.split('\n')] | |
| item_mode = 0 # 0=disabled, 1=name found, 2=price section | |
| current_item = None | |
| for line in lines: | |
| # Start of items section | |
| if "Naziv artikla" in line: | |
| item_mode = 1 | |
| continue | |
| if item_mode == 1: | |
| # Capture item name (next non-empty line after header) | |
| if line and not any(x in line for x in ["Kol Cijena", "UKUPNO"]): | |
| current_item = {"name": line, "quantity": None, "price": None} | |
| item_mode = 2 | |
| continue | |
| if item_mode == 2: | |
| # Look for quantity pattern in next numeric lines | |
| if match := re.search(r'^(\d+)[\s,]+([\d.,]+)', line): | |
| current_item["quantity"] = int(match.group(1)) | |
| # Capture either unit price or total price temporarily | |
| current_item["price"] = float(match.group(2).replace(',', '.')) | |
| item_mode = 3 # Look for total confirmation | |
| continue | |
| if item_mode == 3: | |
| # Confirm total price and finalize item | |
| if match := re.search(r'^(\d+[\.,]\d{2})\b', line): | |
| current_item["price"] = float(match.group(1).replace(',', '.')) | |
| result["items"].append(current_item) | |
| current_item = None | |
| item_mode = 0 | |
| return result |