Spaces:
Runtime error
Runtime error
Kristijan Nincevic
Updated Konzum, Kaufland and Studenac parsers, added a folder for locally testing the parsers
5e55f4d | from .base import BaseParser | |
| import re | |
| from datetime import datetime | |
| class PlodineParser(BaseParser): | |
| def parse(self, text: str) -> dict: | |
| result = { | |
| "store": "Plodine", | |
| "date": None, | |
| "address": None, | |
| "items": [], | |
| "parser_used": "PlodineParser" | |
| } | |
| # Address extraction (Improved) | |
| address_lines = text.splitlines() | |
| street_name = None | |
| house_number = None | |
| postal_city = None | |
| for i, line in enumerate(address_lines): | |
| if "HIPERMARKET" in line.upper() or "SUPERMARKET" in line.upper(): | |
| # This should contain something like "HIPERMARKET ZAGREB Karla Metikosa" | |
| parts = line.strip().split() | |
| if len(parts) >= 3: | |
| street_name = " ".join(parts[2:]) | |
| if re.match(r'^\d+\s+[A-Za-zČĆŽŠĐčćžšđ ]+$', line.strip()): | |
| house_number = line.strip() | |
| if re.match(r'^\d{5}\s+[A-Za-zČĆŽŠĐčćžšđ]+$', line.strip()): | |
| postal_city = line.strip() | |
| if street_name and house_number and postal_city: | |
| result['address'] = f"{street_name}, {postal_city}" | |
| # Date extraction (just the date, no time) | |
| date_match = re.search(r'(\d{2}\.\d{2}\.\d{4})', text) | |
| if date_match: | |
| try: | |
| result['date'] = datetime.strptime( | |
| date_match.group(1), | |
| '%d.%m.%Y' | |
| ).date().isoformat() # Only YYYY-MM-DD | |
| except: | |
| pass | |
| # Item parsing | |
| item_section = re.search( | |
| r'(?<=Artikal\nKol\nCijena\nIznos €\n)(.*?)(?=\nZA PLATITI)', | |
| text, | |
| re.DOTALL | |
| ) | |
| if item_section: | |
| item_pattern = re.compile( | |
| r'^([^\n]+)\n' # Item name | |
| r'(\d+,\d+|\d+)\s*x\s*([\d,]+)?\n?' # Quantity x UnitPrice | |
| r'([\d,]+)?', # Total price | |
| re.MULTILINE | |
| ) | |
| matches = item_pattern.finditer(item_section.group(1)) | |
| for match in matches: | |
| name = match.group(1).strip() | |
| quantity = float(match.group(2).replace(',', '.')) | |
| price_str = match.group(3) or match.group(4) | |
| if price_str: | |
| result['items'].append({ | |
| "name": name, | |
| "quantity": quantity, | |
| "price": float(price_str.replace(',', '.')) | |
| }) | |
| return result | |