Spaces:

dal4933
/

TEST-FRANKO

Runtime error

TEST-FRANKO / receipt_processor /parsers /studenac_parser.py

Kristijan Nincevic

Updated Konzum, Kaufland and Studenac parsers, added a folder for locally testing the parsers

5e55f4d about 1 year ago

2.44 kB

	from .base import BaseParser
	import re
	from datetime import datetime

	class StudenacParser(BaseParser):
	def parse(self, text: str) -> dict:
	result = {
	"store": "Studenac",
	"date": None,
	"address": None,
	"items": [],
	"parser_used": "StudenacParser"
	}

	# --- Address Extraction ---
	address_match = re.search(r'Prodavaonica \d+\n([^\n]+)', text)
	if address_match:
	result['address'] = address_match.group(1).strip()

	# --- Date Extraction ---
	date_match = re.search(r'Datum[:\s]*(\d{2}\.\d{2}\.\d{4})', text)
	if date_match:
	try:
	result['date'] = datetime.strptime(date_match.group(1), '%d.%m.%Y').date().isoformat()
	except Exception:
	pass

	# --- Items Extraction ---
	lines = [l.strip() for l in text.split('\n')]
	item_mode = 0 # 0=disabled, 1=name found, 2=price section
	current_item = None

	for line in lines:
	# Start of items section
	if "Naziv artikla" in line:
	item_mode = 1
	continue

	if item_mode == 1:
	# Capture item name (next non-empty line after header)
	if line and not any(x in line for x in ["Kol Cijena", "UKUPNO"]):
	current_item = {"name": line, "quantity": None, "price": None}
	item_mode = 2
	continue

	if item_mode == 2:
	# Look for quantity pattern in next numeric lines
	if match := re.search(r'^(\d+)[\s,]+([\d.,]+)', line):
	current_item["quantity"] = int(match.group(1))
	# Capture either unit price or total price temporarily
	current_item["price"] = float(match.group(2).replace(',', '.'))
	item_mode = 3 # Look for total confirmation
	continue

	if item_mode == 3:
	# Confirm total price and finalize item
	if match := re.search(r'^(\d+[\.,]\d{2})\b', line):
	current_item["price"] = float(match.group(1).replace(',', '.'))
	result["items"].append(current_item)
	current_item = None
	item_mode = 0

	return result