TEST-FRANKO / receipt_processor /parsers /studenac_parser.py
Kristijan Nincevic
Updated Konzum, Kaufland and Studenac parsers, added a folder for locally testing the parsers
5e55f4d
from .base import BaseParser
import re
from datetime import datetime
class StudenacParser(BaseParser):
def parse(self, text: str) -> dict:
result = {
"store": "Studenac",
"date": None,
"address": None,
"items": [],
"parser_used": "StudenacParser"
}
# --- Address Extraction ---
address_match = re.search(r'Prodavaonica \d+\n([^\n]+)', text)
if address_match:
result['address'] = address_match.group(1).strip()
# --- Date Extraction ---
date_match = re.search(r'Datum[:\s]*(\d{2}\.\d{2}\.\d{4})', text)
if date_match:
try:
result['date'] = datetime.strptime(date_match.group(1), '%d.%m.%Y').date().isoformat()
except Exception:
pass
# --- Items Extraction ---
lines = [l.strip() for l in text.split('\n')]
item_mode = 0 # 0=disabled, 1=name found, 2=price section
current_item = None
for line in lines:
# Start of items section
if "Naziv artikla" in line:
item_mode = 1
continue
if item_mode == 1:
# Capture item name (next non-empty line after header)
if line and not any(x in line for x in ["Kol Cijena", "UKUPNO"]):
current_item = {"name": line, "quantity": None, "price": None}
item_mode = 2
continue
if item_mode == 2:
# Look for quantity pattern in next numeric lines
if match := re.search(r'^(\d+)[\s,]+([\d.,]+)', line):
current_item["quantity"] = int(match.group(1))
# Capture either unit price or total price temporarily
current_item["price"] = float(match.group(2).replace(',', '.'))
item_mode = 3 # Look for total confirmation
continue
if item_mode == 3:
# Confirm total price and finalize item
if match := re.search(r'^(\d+[\.,]\d{2})\b', line):
current_item["price"] = float(match.group(1).replace(',', '.'))
result["items"].append(current_item)
current_item = None
item_mode = 0
return result