Denny Lulak commited on
Commit
7965fc0
·
1 Parent(s): 92f5cd9

Receipt-Implementations

Browse files
__pycache__/app.cpython-312.pyc CHANGED
Binary files a/__pycache__/app.cpython-312.pyc and b/__pycache__/app.cpython-312.pyc differ
 
__pycache__/inference.cpython-312.pyc CHANGED
Binary files a/__pycache__/inference.cpython-312.pyc and b/__pycache__/inference.cpython-312.pyc differ
 
app.py CHANGED
@@ -4,7 +4,7 @@ from inference import ObjectDetector
4
  import numpy as np
5
  import cv2
6
  from receipt_processor.google_ocr import GoogleVisionOCR
7
- from receipt_processor.receipt_parser import ReceiptParser
8
 
9
  # Configuration
10
  MODEL_ONNX_PATH = "model.onnx"
@@ -44,8 +44,7 @@ detector = ObjectDetector(
44
  input_size=INPUT_SIZE
45
  )
46
  ocr_processor = GoogleVisionOCR()
47
- receipt_parser = ReceiptParser()
48
- # Initialize FastAPI
49
  app = FastAPI()
50
 
51
  # Enhanced CORS configuration
@@ -95,33 +94,34 @@ async def detect_objects(file: UploadFile = File(...)):
95
  except Exception as e:
96
  raise HTTPException(500, f"Processing error: {str(e)}")
97
 
98
- # Add new endpoint
99
  @app.post("/receipt-ocr")
100
  async def process_receipt(file: UploadFile = File(...)):
101
  try:
102
- if not file.content_type.startswith("image/"):
103
- raise HTTPException(400, "File must be an image")
104
-
105
  content = await file.read()
 
 
106
  extracted_text = ocr_processor.extract_text(content)
 
107
 
108
  if not extracted_text:
109
  raise HTTPException(400, "No text extracted from image")
110
 
111
- parsed_receipt = receipt_parser.parse_receipt_text(extracted_text)
 
 
 
 
112
 
113
  return {
114
  "status": "success",
115
  "receipt": parsed_receipt
116
  }
117
 
118
- except HTTPException:
119
- raise
120
  except Exception as e:
 
121
  raise HTTPException(500, f"Receipt processing error: {str(e)}")
122
-
123
-
124
-
125
  if __name__ == "__main__":
126
  import uvicorn
127
- uvicorn.run(app, host="0.0.0.0", port=7860) # Hugging Face requires port 7860
 
4
  import numpy as np
5
  import cv2
6
  from receipt_processor.google_ocr import GoogleVisionOCR
7
+ from receipt_processor.parsers.parser_selector import ParserSelector
8
 
9
  # Configuration
10
  MODEL_ONNX_PATH = "model.onnx"
 
44
  input_size=INPUT_SIZE
45
  )
46
  ocr_processor = GoogleVisionOCR()
47
+ parser_selector = ParserSelector() # Initialize FastAPI
 
48
  app = FastAPI()
49
 
50
  # Enhanced CORS configuration
 
94
  except Exception as e:
95
  raise HTTPException(500, f"Processing error: {str(e)}")
96
 
 
97
  @app.post("/receipt-ocr")
98
  async def process_receipt(file: UploadFile = File(...)):
99
  try:
100
+ print(f"Received file: {file.filename} ({file.content_type})")
101
+
 
102
  content = await file.read()
103
+ print(f"File size: {len(content)} bytes")
104
+
105
  extracted_text = ocr_processor.extract_text(content)
106
+ print(f"Extracted text length: {len(extracted_text)} chars")
107
 
108
  if not extracted_text:
109
  raise HTTPException(400, "No text extracted from image")
110
 
111
+ parser = parser_selector.get_store_parser(extracted_text)
112
+ print(f"Using parser: {parser.__class__.__name__}")
113
+
114
+ parsed_receipt = parser.parse(extracted_text)
115
+ print("Parsing completed successfully")
116
 
117
  return {
118
  "status": "success",
119
  "receipt": parsed_receipt
120
  }
121
 
 
 
122
  except Exception as e:
123
+ print(f"ERROR: {str(e)}")
124
  raise HTTPException(500, f"Receipt processing error: {str(e)}")
 
 
 
125
  if __name__ == "__main__":
126
  import uvicorn
127
+ uvicorn.run(app, host="0.0.0.0", port=7860) # Hugging Face requires port 7860
index.html ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Receipt Parser</title>
7
+ <style>
8
+ body {
9
+ font-family: Arial, sans-serif;
10
+ max-width: 800px;
11
+ margin: 20px auto;
12
+ padding: 0 20px;
13
+ }
14
+ .upload-box {
15
+ border: 2px dashed #ccc;
16
+ padding: 20px;
17
+ text-align: center;
18
+ margin-bottom: 20px;
19
+ }
20
+ #preview {
21
+ max-width: 100%;
22
+ margin-top: 10px;
23
+ display: none;
24
+ }
25
+ pre {
26
+ background: #f0f0f0;
27
+ padding: 15px;
28
+ border-radius: 5px;
29
+ white-space: pre-wrap;
30
+ word-wrap: break-word;
31
+ }
32
+ .loading {
33
+ display: none;
34
+ color: #666;
35
+ margin: 10px 0;
36
+ }
37
+ </style>
38
+ </head>
39
+ <body>
40
+ <h1>Receipt Parser</h1>
41
+
42
+ <div class="upload-box">
43
+ <input type="file" id="fileInput" accept="image/*">
44
+ <p>Drag and drop or click to upload receipt</p>
45
+ <img id="preview">
46
+ </div>
47
+
48
+ <div class="loading" id="loading">
49
+ Processing receipt...
50
+ </div>
51
+
52
+ <pre id="results" style="display: none;"></pre>
53
+
54
+ <script>
55
+ const fileInput = document.getElementById('fileInput');
56
+ const preview = document.getElementById('preview');
57
+ const loading = document.getElementById('loading');
58
+ const results = document.getElementById('results');
59
+
60
+ fileInput.addEventListener('change', handleFile);
61
+
62
+ document.addEventListener('dragover', e => e.preventDefault());
63
+ document.addEventListener('drop', e => {
64
+ e.preventDefault();
65
+ if (e.dataTransfer.files[0]) {
66
+ fileInput.files = e.dataTransfer.files;
67
+ handleFile();
68
+ }
69
+ });
70
+
71
+ async function handleFile() {
72
+ const file = fileInput.files[0];
73
+ if (!file) return;
74
+
75
+ preview.src = URL.createObjectURL(file);
76
+ preview.style.display = 'block';
77
+
78
+ loading.style.display = 'block';
79
+ results.style.display = 'none';
80
+
81
+ try {
82
+ const formData = new FormData();
83
+ formData.append('file', file);
84
+
85
+ const response = await fetch('http://localhost:7860/receipt-ocr', {
86
+ method: 'POST',
87
+ body: formData
88
+ });
89
+
90
+ const data = await response.json();
91
+ results.textContent = JSON.stringify(data, null, 2);
92
+ results.style.display = 'block';
93
+
94
+ } catch (error) {
95
+ results.textContent = `Error: ${error.message}`;
96
+ results.style.display = 'block';
97
+ } finally {
98
+ loading.style.display = 'none';
99
+ }
100
+ }
101
+ </script>
102
+ </body>
103
+ </html>
parsed_receipts/lidl_receipt.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "store": "Lidl",
3
+ "date": "2025-04-06",
4
+ "address": "Lastovska ulica 42, Zagreb",
5
+ "items": [
6
+ {
7
+ "name": "Vrećica mala",
8
+ "quantity": 1,
9
+ "price": 0.1
10
+ },
11
+ {
12
+ "name": "Violeta Pr. toal p",
13
+ "quantity": 1,
14
+ "price": 4.89
15
+ },
16
+ {
17
+ "name": "Toast bijeli",
18
+ "quantity": 1,
19
+ "price": 0.99
20
+ },
21
+ {
22
+ "name": "Franck kava, 400",
23
+ "quantity": 1,
24
+ "price": 8.29
25
+ },
26
+ {
27
+ "name": "Cascaval listići",
28
+ "quantity": 1,
29
+ "price": 2.09
30
+ },
31
+ {
32
+ "name": "Kulenova seka",
33
+ "quantity": 1,
34
+ "price": 1.99
35
+ },
36
+ {
37
+ "name": "Trajno mlijeko 2.8%",
38
+ "quantity": 1,
39
+ "price": 0.79
40
+ }
41
+ ],
42
+ "parser_used": "LidlParser"
43
+ }
parsed_receipts/plodine_receipt.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "store": "Plodine",
3
+ "date": "2025-04-04T13:05:57",
4
+ "address": "HIPERMARKET ZAGREB Karla Metikosa, Karla Metikosa 4",
5
+ "items": [
6
+ {
7
+ "name": "JAJA PLODINE M 18/1",
8
+ "quantity": 1.0,
9
+ "price": 3.99
10
+ },
11
+ {
12
+ "name": "BANANA SORTA CAVENDISH",
13
+ "quantity": 0.348,
14
+ "price": 1.44
15
+ }
16
+ ],
17
+ "parser_used": "PlodineParser"
18
+ }
parsed_receipts/studenac_receipt.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "store": "Studenac",
3
+ "date": "2025-04-01",
4
+ "address": "Zagreb, Karla Metikoša 2",
5
+ "items": [
6
+ {
7
+ "name": "TEREA SILVER",
8
+ "quantity": 3,
9
+ "price": 4.1
10
+ }
11
+ ],
12
+ "parser_used": "StudenacParser"
13
+ }
receipt_processor/__pycache__/google_ocr.cpython-312.pyc CHANGED
Binary files a/receipt_processor/__pycache__/google_ocr.cpython-312.pyc and b/receipt_processor/__pycache__/google_ocr.cpython-312.pyc differ
 
receipt_processor/__pycache__/receipt_parser.cpython-312.pyc DELETED
Binary file (3.37 kB)
 
receipt_processor/google_ocr.py CHANGED
@@ -1,12 +1,22 @@
1
  from google.cloud import vision
2
  import os
 
3
 
4
  class GoogleVisionOCR:
5
  def __init__(self):
6
- os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "receipt-vision-key.json"
 
 
 
7
  self.client = vision.ImageAnnotatorClient()
8
 
9
- def extract_text(self, image_content: bytes) -> str:
10
- image = vision.Image(content=image_content)
11
- response = self.client.text_detection(image=image)
12
- return response.text_annotations[0].description if response.text_annotations else ""
 
 
 
 
 
 
 
1
  from google.cloud import vision
2
  import os
3
+ import io
4
 
5
  class GoogleVisionOCR:
6
  def __init__(self):
7
+ # Initialize with either environment credentials or local key file
8
+ creds_path = os.getenv("GOOGLE_APPLICATION_CREDENTIALS", "receipt-vision-key.json")
9
+ if os.path.exists(creds_path):
10
+ os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = creds_path
11
  self.client = vision.ImageAnnotatorClient()
12
 
13
+ def extract_text(self, image_content):
14
+ """Extracts text from image using Google Vision API"""
15
+ try:
16
+ image = vision.Image(content=image_content)
17
+ response = self.client.text_detection(image=image)
18
+ texts = response.text_annotations
19
+ return texts[0].description if texts else ""
20
+ except Exception as e:
21
+ print(f"OCR Error: {str(e)}")
22
+ return ""
receipt_processor/parsers/__pycache__/base.cpython-312.pyc ADDED
Binary file (944 Bytes). View file
 
receipt_processor/parsers/__pycache__/konzum_parser.cpython-312.pyc ADDED
Binary file (2.26 kB). View file
 
receipt_processor/parsers/__pycache__/lidl_parser.cpython-312.pyc ADDED
Binary file (2.48 kB). View file
 
receipt_processor/parsers/__pycache__/parser_selector.cpython-312.pyc ADDED
Binary file (2.01 kB). View file
 
receipt_processor/parsers/__pycache__/plodine_parser.cpython-312.pyc ADDED
Binary file (2.69 kB). View file
 
receipt_processor/parsers/__pycache__/studenac_parser.cpython-312.pyc ADDED
Binary file (2.02 kB). View file
 
receipt_processor/parsers/base.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+
3
+ class BaseParser(ABC):
4
+ @abstractmethod
5
+ def parse(self, text: str) -> dict:
6
+ pass
7
+
8
+ @classmethod
9
+ def get_parser_name(cls) -> str:
10
+ return cls.__name__.replace('Parser', '').lower()
receipt_processor/parsers/konzum_parser.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .base import BaseParser
2
+ import re
3
+ from datetime import datetime
4
+
5
+ class KonzumParser(BaseParser):
6
+ def parse(self, text: str) -> dict:
7
+ result = {
8
+ "store": "Konzum",
9
+ "date": None,
10
+ "address": None,
11
+ "items": [],
12
+ "parser_used": "KonzumParser"
13
+ }
14
+
15
+ # Address extraction
16
+ address_matches = re.findall(r'Zagreb, [A-Za-zšđžčćŠĐŽČĆ0-9 ]+ \d+', text)
17
+ if len(address_matches) > 1:
18
+ result['address'] = address_matches[1]
19
+
20
+ # Date extraction
21
+ date_match = re.search(r'Datum[\s:]*(\d{2}\.\d{2}\.\d{4})', text)
22
+ if date_match:
23
+ try:
24
+ result['date'] = datetime.strptime(date_match.group(1), '%d.%m.%Y').date().isoformat()
25
+ except:
26
+ pass
27
+
28
+ # Item parsing
29
+ item_section = re.search(r'Naziv artikla.*?Kol Cijena\nIznos P\n(.*?)\nUKUPNO', text, re.DOTALL)
30
+ if item_section:
31
+ item_lines = re.finditer(
32
+ r'^([A-ZŠĐŽČĆ][A-Za-zšđžčć0-9 \/\.-]+?)\s+(\d+)\s+([\d,]+)',
33
+ item_section.group(1),
34
+ re.MULTILINE
35
+ )
36
+
37
+ for match in item_lines:
38
+ result['items'].append({
39
+ "name": match.group(1).strip(),
40
+ "quantity": int(match.group(2)),
41
+ "price": float(match.group(3).replace(',', '.'))
42
+ })
43
+
44
+ return result
receipt_processor/parsers/lidl_parser.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .base import BaseParser
2
+ import re
3
+ from datetime import datetime
4
+
5
+ class LidlParser(BaseParser):
6
+ def parse(self, text: str) -> dict:
7
+ result = {
8
+ "store": "Lidl",
9
+ "date": None,
10
+ "address": None,
11
+ "items": [],
12
+ "parser_used": "LidlParser"
13
+ }
14
+
15
+ # Address extraction (your original working version)
16
+ address_match = re.search(
17
+ r'OIB:[^\n]*\n([^\n]*\d+,\s*Zagreb)\s*\n€',
18
+ text,
19
+ re.MULTILINE
20
+ )
21
+ if address_match:
22
+ result['address'] = address_match.group(1).strip()
23
+
24
+ # Date extraction (your working version)
25
+ date_match = re.search(r'Datum:\s*(\d{2}\.\d{2}\.\d{4})', text)
26
+ if date_match:
27
+ try:
28
+ result['date'] = datetime.strptime(
29
+ date_match.group(1),
30
+ '%d.%m.%Y'
31
+ ).date().isoformat()
32
+ except:
33
+ pass
34
+
35
+ # Item parsing (your original working pattern)
36
+ item_section = re.search(
37
+ r'(?<=€\n)(.*?)(?=\nza platiti)',
38
+ text,
39
+ re.DOTALL
40
+ )
41
+ if item_section:
42
+ item_pattern = re.compile(
43
+ r'^(\d+)\s+((?:[^\d\n]|[\d,]+[^\n])+?)\n'
44
+ r'([\d,]+)\s+[A-Z]$',
45
+ re.MULTILINE
46
+ )
47
+
48
+ matches = item_pattern.finditer(item_section.group(1))
49
+ for match in matches:
50
+ quantity = int(match.group(1))
51
+ item_name = match.group(2).strip()
52
+ price = float(match.group(3).replace(',', '.'))
53
+
54
+ if not re.search(r'[<>*]|EUR:|^\d+$', item_name):
55
+ result['items'].append({
56
+ "name": item_name,
57
+ "quantity": quantity,
58
+ "price": price
59
+ })
60
+
61
+ return result
receipt_processor/parsers/parser_selector.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import importlib
2
+ from .base import BaseParser
3
+
4
+ class ParserSelector:
5
+ def __init__(self):
6
+ self.store_keywords = {
7
+ 'lidl': ['lidl'],
8
+ 'konzum': ['konzum'],
9
+ 'plodine': ['plodine'],
10
+ 'studenac': ['studenac']
11
+ }
12
+
13
+ def get_store_parser(self, text: str):
14
+ text_lower = text.lower()
15
+
16
+ for store, keywords in self.store_keywords.items():
17
+ if any(kw in text_lower for kw in keywords):
18
+ try:
19
+ module = importlib.import_module(f"receipt_processor.parsers.{store}_parser")
20
+ for attr_name in dir(module):
21
+ attr = getattr(module, attr_name)
22
+ try:
23
+ if issubclass(attr, BaseParser) and attr != BaseParser:
24
+ return attr()
25
+ except TypeError:
26
+ continue
27
+ except ModuleNotFoundError:
28
+ continue
29
+ raise ValueError(f"No parser found for text: {text[:50]}...")
receipt_processor/parsers/plodine_parser.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .base import BaseParser
2
+ import re
3
+ from datetime import datetime
4
+
5
+ class PlodineParser(BaseParser):
6
+ def parse(self, text: str) -> dict:
7
+ result = {
8
+ "store": "Plodine",
9
+ "date": None,
10
+ "address": None,
11
+ "items": [],
12
+ "parser_used": "PlodineParser"
13
+ }
14
+
15
+ # Address extraction (your original working version)
16
+ address_match = re.search(
17
+ r'PLODINE d\.d\. Rijeka\n.*?\nOIB:.*?\n(.*?)\n(.*?)\n\d+',
18
+ text,
19
+ re.DOTALL
20
+ )
21
+ if address_match:
22
+ street = address_match.group(1).strip()
23
+ city = address_match.group(2).strip()
24
+ if "Rijeka" not in street:
25
+ result['address'] = f"{street}, {city}"
26
+
27
+ # Date extraction (your working version)
28
+ date_match = re.search(r'(\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}:\d{2})', text)
29
+ if date_match:
30
+ try:
31
+ result['date'] = datetime.strptime(
32
+ date_match.group(1),
33
+ '%d.%m.%Y %H:%M:%S'
34
+ ).isoformat()
35
+ except:
36
+ pass
37
+
38
+ # Item parsing (your original working pattern)
39
+ item_section = re.search(
40
+ r'(?<=Artikal\nKol\nCijena\nIznos €\n)(.*?)(?=\nZA PLATITI)',
41
+ text,
42
+ re.DOTALL
43
+ )
44
+ if item_section:
45
+ item_pattern = re.compile(
46
+ r'^([^\n]+)\n' # Item name
47
+ r'(\d+,\d+|\d+)\s*x\s*([\d,]+)?\n?' # Quantity
48
+ r'([\d,]+)?', # Price
49
+ re.MULTILINE
50
+ )
51
+
52
+ matches = item_pattern.finditer(item_section.group(1))
53
+ for match in matches:
54
+ name = match.group(1).strip()
55
+ quantity = float(match.group(2).replace(',', '.'))
56
+ price_str = match.group(3) or match.group(4)
57
+
58
+ if price_str:
59
+ result['items'].append({
60
+ "name": name,
61
+ "quantity": quantity,
62
+ "price": float(price_str.replace(',', '.'))
63
+ })
64
+
65
+ return result
receipt_processor/parsers/studenac_parser.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .base import BaseParser
2
+ import re
3
+ from datetime import datetime
4
+
5
+ class StudenacParser(BaseParser):
6
+ def parse(self, text: str) -> dict:
7
+ result = {
8
+ "store": "Studenac",
9
+ "date": None,
10
+ "address": None,
11
+ "items": [],
12
+ "parser_used": "StudenacParser"
13
+ }
14
+
15
+ # Address extraction (your working version)
16
+ address_match = re.search(
17
+ r'Prodavaonica \d+\n([^,]+,\s*[^\n]+)\nBlagajna',
18
+ text
19
+ )
20
+ if address_match:
21
+ result['address'] = address_match.group(1).strip()
22
+
23
+ # Date extraction (your working version)
24
+ date_match = re.search(r'Datum:\s*(\d{2}\.\d{2}\.\d{4})', text)
25
+ if date_match:
26
+ try:
27
+ result['date'] = datetime.strptime(
28
+ date_match.group(1),
29
+ '%d.%m.%Y'
30
+ ).date().isoformat()
31
+ except:
32
+ pass
33
+
34
+ # Item parsing (your original working pattern)
35
+ item_match = re.search(
36
+ r'(C)\s+(TEREA SILVER)\n.*?\n(\d+)\s+([\d,]+)\n([\d,]+)',
37
+ text,
38
+ re.DOTALL
39
+ )
40
+ if item_match:
41
+ result['items'].append({
42
+ "name": item_match.group(2).strip(),
43
+ "quantity": int(item_match.group(3)),
44
+ "price": float(item_match.group(4).replace(',', '.'))
45
+ })
46
+
47
+ return result
receipt_processor/receipt_parser.py DELETED
@@ -1,58 +0,0 @@
1
- import re
2
-
3
- class ReceiptParser:
4
- @staticmethod
5
- def parse_receipt_text(full_text: str) -> dict:
6
- lines = full_text.splitlines()
7
- receipt = {"store": None, "date": None, "total": None, "items": []}
8
-
9
- # Store detection
10
- for line in lines:
11
- if any(kw in line.lower() for kw in ["konzum", "plodine", "studenac"]):
12
- receipt["store"] = line.strip()
13
- break
14
-
15
- # Date detection
16
- for line in lines:
17
- if match := re.search(r'\b(\d{2}\.\d{2}\.\d{4})\b', line):
18
- receipt["date"] = match.group(1)
19
- break
20
-
21
- # Total detection
22
- for line in reversed(lines):
23
- if any(word in line.lower() for word in ["ukupno", "za platiti"]):
24
- if match := re.search(r'(\d+,\d{2})', line):
25
- receipt["total"] = f"{match.group(1).replace(',', '.')} EUR"
26
- break
27
-
28
- # Item parsing logic
29
- merged_lines = []
30
- skip_next = False
31
- for i, line in enumerate(lines):
32
- if skip_next:
33
- skip_next = False
34
- continue
35
- if re.search(r'\d+,\d{2}$', line):
36
- if i+1 < len(lines) and re.match(r'^\d+,\d{2}', lines[i+1]):
37
- merged_lines.append(f"{line} {lines[i+1]}")
38
- skip_next = True
39
- continue
40
- merged_lines.append(line)
41
-
42
- item_patterns = [
43
- re.compile(r'(.+?)\s+(\d+)\s+(\d+,\d{2})\s+(\d+,\d{2})'),
44
- re.compile(r'(.+?)\s+(\d+)\s+x\s+(\d+,\d{2})\s+(\d+,\d{2})'),
45
- re.compile(r'(.+?)\s+(\d+)\s+(\d+)\s+(\d+,\d{2})'),
46
- ]
47
-
48
- for line in merged_lines:
49
- for pattern in item_patterns:
50
- if match := pattern.match(line):
51
- receipt["items"].append({
52
- "name": match.group(1).strip().title(),
53
- "qty": int(match.group(2)),
54
- "price": match.group(4).replace(",", ".")
55
- })
56
- break
57
-
58
- return receipt