Namra-Satva commited on
Commit
a652976
·
verified ·
1 Parent(s): 1a6a4fd

Update model_utils.py

Browse files
Files changed (1) hide show
  1. model_utils.py +7 -23
model_utils.py CHANGED
@@ -23,39 +23,23 @@ def initialize_data_dict():
23
  def parse_products(raw_text):
24
  structured = []
25
  lines = raw_text.split('\n')
26
-
27
  for line in lines:
28
- line = line.strip()
29
- if not line:
30
- continue
31
-
32
  match = re.match(r"(\d+)\s+(.*)\s+([\d,]+\.\d{2})\s+([\d,]+\.\d{2})", line)
33
  if match:
34
  qty, desc, unit_price, amount = match.groups()
35
  structured.append({
36
- "qty": int(qty),
37
  "description": desc.strip(),
38
- "unit_price": float(unit_price.replace(",", "")),
39
- "amount": float(amount.replace(",", ""))
40
- })
41
- continue
42
-
43
- match_description_mixed = re.search(r"^(.*?)(\d+)\s+[^A-Za-z0-9\s]?([\d.,]+)\s+[^A-Za-z0-9\s]?([\d.,]+)$", line)
44
- if match_description_mixed:
45
- structured.append({
46
- "qty": int(match_description_mixed.group(2)),
47
- "description": match_description_mixed.group(1).strip(" .:-"),
48
- "unit_price": float(match_description_mixed.group(3).replace(",", "")),
49
- "amount": float(match_description_mixed.group(4).replace(",", ""))
50
  })
51
- else:
52
  structured.append({
53
  "qty": 0,
54
- "description": line,
55
- "unit_price": 0.0,
56
- "amount": 0.0
57
  })
58
-
59
  return structured
60
 
61
  def extract_invoice_data_from_image(image_path: str):
 
23
  def parse_products(raw_text):
24
  structured = []
25
  lines = raw_text.split('\n')
 
26
  for line in lines:
 
 
 
 
27
  match = re.match(r"(\d+)\s+(.*)\s+([\d,]+\.\d{2})\s+([\d,]+\.\d{2})", line)
28
  if match:
29
  qty, desc, unit_price, amount = match.groups()
30
  structured.append({
31
+ "qty": qty,
32
  "description": desc.strip(),
33
+ "unit_price": unit_price,
34
+ "amount": amount
 
 
 
 
 
 
 
 
 
 
35
  })
36
+ elif line.strip():
37
  structured.append({
38
  "qty": 0,
39
+ "description": line.strip(),
40
+ "unit_price": 0,
41
+ "amount": 0
42
  })
 
43
  return structured
44
 
45
  def extract_invoice_data_from_image(image_path: str):