Abhisesh7 commited on
Commit
f6c5876
·
verified ·
1 Parent(s): dc082e0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -9
app.py CHANGED
@@ -90,7 +90,7 @@ def extract_items(text):
90
  # Find the end of the main table
91
  table_end = len(lines)
92
  for i in range(table_start, len(lines)):
93
- if "Item(s) Total" in lines[i] or "Total Value" in lines[i]:
94
  table_end = i
95
  break
96
 
@@ -104,11 +104,12 @@ def extract_items(text):
104
  table_row_pattern = r"\|?\s*([A-Za-z\s\d-]+(?:\s[A-Za-z\s\d-]+)*?)\s*\|?\s*(\d+)\s*\|?\s*([\d.]+)\s*\|?\s*([\d.]+)\s*\|?"
105
  else:
106
  # Pattern for Invoice_6164752968.pdf: "1 x Chicken Frankie | 60 | 6 | 54 | 2.5% | 1.35 | 2.5% | 1.35 | 56.7"
107
- table_row_pattern = r"\|?\s*(\d+\s*x\s*[A-Za-z\s\d-]+(?:\s[A-Za-z\s\d-]+)*?)\s*\|?\s*([\d.]+)\s*\|?\s*([\d.]+)\s*\|?\s*([\d.]+)\s*\|?\s*[\d.%]+\s*\|?\s*[\d.]+(?:\s*\|?\s*[\d.%]+\s*\|?\s*[\d.]+)?\s*\|?\s*([\d.]+)\s*\|?"
 
108
 
109
  for line in table_lines:
110
  line = line.strip()
111
- if not line:
112
  continue
113
  # Skip alignment rows (e.g., "|---|---|")
114
  if re.match(r"\|?\s*[-:]+(\s*\|\s*[-:]+)*\s*\|?", line):
@@ -147,7 +148,7 @@ def extract_items(text):
147
  if platform_fee_start != -1:
148
  platform_fee_end = len(lines)
149
  for i in range(platform_fee_start, len(lines)):
150
- if "Total" in lines[i] and not "Sr.No" in lines[i]:
151
  platform_fee_end = i + 1
152
  break
153
  platform_fee_lines = lines[platform_fee_start:platform_fee_end]
@@ -198,7 +199,6 @@ def extract_entities(text):
198
  invoice_numbers.append(invoice_number)
199
  print(f"Matched Invoice Number: {invoice_number}") # Debug
200
  if invoice_numbers:
201
- # Prioritize the invoice number associated with "Restaurant Service" (HSN Code: 996331)
202
  for i, num in enumerate(invoice_numbers):
203
  start_idx = text.find(num)
204
  context = text[max(0, start_idx-100):start_idx+100]
@@ -206,7 +206,7 @@ def extract_entities(text):
206
  primary_invoice_number = num
207
  break
208
  if primary_invoice_number == "Unknown":
209
- primary_invoice_number = invoice_numbers[0] # Fallback to the first invoice number
210
  print(f"Primary Invoice Number: {primary_invoice_number}") # Debug
211
 
212
  # Vendor Name
@@ -268,10 +268,7 @@ def extract_entities(text):
268
  print(f"Matched Amount: {amount}") # Debug
269
  except ValueError:
270
  continue
271
- # Sum only the final totals (e.g., after taxes)
272
  if total_amounts:
273
- # In this invoice, "Total Value" appears twice: ₹184.5 (net value before taxes) and ₹193.726 (after taxes)
274
- # We want the final total after taxes for the main items, plus the platform fee
275
  main_total = max([amt for amt in total_amounts if amt > 100], default=0.0) # ₹193.726
276
  platform_fee = min([amt for amt in total_amounts if amt < 10], default=0.0) # ₹3.54
277
  total_amount = main_total + platform_fee
 
90
  # Find the end of the main table
91
  table_end = len(lines)
92
  for i in range(table_start, len(lines)):
93
+ if "Item(s) Total" in lines[i] or "Total Value" in lines[i] or "Sr.No Particulars" in lines[i]:
94
  table_end = i
95
  break
96
 
 
104
  table_row_pattern = r"\|?\s*([A-Za-z\s\d-]+(?:\s[A-Za-z\s\d-]+)*?)\s*\|?\s*(\d+)\s*\|?\s*([\d.]+)\s*\|?\s*([\d.]+)\s*\|?"
105
  else:
106
  # Pattern for Invoice_6164752968.pdf: "1 x Chicken Frankie | 60 | 6 | 54 | 2.5% | 1.35 | 2.5% | 1.35 | 56.7"
107
+ # Adjusted to be more flexible for potential spacing or formatting issues
108
+ table_row_pattern = r"\|?\s*(\d+\s*x\s*[A-Za-z\s\d-]+(?:\s[A-Za-z\s\d-]+)*?)\s*\|?\s*([\d.]+)\s*\|?\s*([\d.]+)\s*\|?\s*([\d.]+)\s*(?:\|?\s*[\d.%]+\s*\|?\s*[\d.]+){2}\s*\|?\s*([\d.]+)\s*\|?"
109
 
110
  for line in table_lines:
111
  line = line.strip()
112
+ if not line or "HSN Code" in line or "Total" in line:
113
  continue
114
  # Skip alignment rows (e.g., "|---|---|")
115
  if re.match(r"\|?\s*[-:]+(\s*\|\s*[-:]+)*\s*\|?", line):
 
148
  if platform_fee_start != -1:
149
  platform_fee_end = len(lines)
150
  for i in range(platform_fee_start, len(lines)):
151
+ if "Total" in lines[i] and "Sr.No" not in lines[i]:
152
  platform_fee_end = i + 1
153
  break
154
  platform_fee_lines = lines[platform_fee_start:platform_fee_end]
 
199
  invoice_numbers.append(invoice_number)
200
  print(f"Matched Invoice Number: {invoice_number}") # Debug
201
  if invoice_numbers:
 
202
  for i, num in enumerate(invoice_numbers):
203
  start_idx = text.find(num)
204
  context = text[max(0, start_idx-100):start_idx+100]
 
206
  primary_invoice_number = num
207
  break
208
  if primary_invoice_number == "Unknown":
209
+ primary_invoice_number = invoice_numbers[0]
210
  print(f"Primary Invoice Number: {primary_invoice_number}") # Debug
211
 
212
  # Vendor Name
 
268
  print(f"Matched Amount: {amount}") # Debug
269
  except ValueError:
270
  continue
 
271
  if total_amounts:
 
 
272
  main_total = max([amt for amt in total_amounts if amt > 100], default=0.0) # ₹193.726
273
  platform_fee = min([amt for amt in total_amounts if amt < 10], default=0.0) # ₹3.54
274
  total_amount = main_total + platform_fee