Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -90,7 +90,7 @@ def extract_items(text):
|
|
| 90 |
# Find the end of the main table
|
| 91 |
table_end = len(lines)
|
| 92 |
for i in range(table_start, len(lines)):
|
| 93 |
-
if "Item(s) Total" in lines[i] or "Total Value" in lines[i]:
|
| 94 |
table_end = i
|
| 95 |
break
|
| 96 |
|
|
@@ -104,11 +104,12 @@ def extract_items(text):
|
|
| 104 |
table_row_pattern = r"\|?\s*([A-Za-z\s\d-]+(?:\s[A-Za-z\s\d-]+)*?)\s*\|?\s*(\d+)\s*\|?\s*([\d.]+)\s*\|?\s*([\d.]+)\s*\|?"
|
| 105 |
else:
|
| 106 |
# Pattern for Invoice_6164752968.pdf: "1 x Chicken Frankie | 60 | 6 | 54 | 2.5% | 1.35 | 2.5% | 1.35 | 56.7"
|
| 107 |
-
|
|
|
|
| 108 |
|
| 109 |
for line in table_lines:
|
| 110 |
line = line.strip()
|
| 111 |
-
if not line:
|
| 112 |
continue
|
| 113 |
# Skip alignment rows (e.g., "|---|---|")
|
| 114 |
if re.match(r"\|?\s*[-:]+(\s*\|\s*[-:]+)*\s*\|?", line):
|
|
@@ -147,7 +148,7 @@ def extract_items(text):
|
|
| 147 |
if platform_fee_start != -1:
|
| 148 |
platform_fee_end = len(lines)
|
| 149 |
for i in range(platform_fee_start, len(lines)):
|
| 150 |
-
if "Total" in lines[i] and
|
| 151 |
platform_fee_end = i + 1
|
| 152 |
break
|
| 153 |
platform_fee_lines = lines[platform_fee_start:platform_fee_end]
|
|
@@ -198,7 +199,6 @@ def extract_entities(text):
|
|
| 198 |
invoice_numbers.append(invoice_number)
|
| 199 |
print(f"Matched Invoice Number: {invoice_number}") # Debug
|
| 200 |
if invoice_numbers:
|
| 201 |
-
# Prioritize the invoice number associated with "Restaurant Service" (HSN Code: 996331)
|
| 202 |
for i, num in enumerate(invoice_numbers):
|
| 203 |
start_idx = text.find(num)
|
| 204 |
context = text[max(0, start_idx-100):start_idx+100]
|
|
@@ -206,7 +206,7 @@ def extract_entities(text):
|
|
| 206 |
primary_invoice_number = num
|
| 207 |
break
|
| 208 |
if primary_invoice_number == "Unknown":
|
| 209 |
-
primary_invoice_number = invoice_numbers[0]
|
| 210 |
print(f"Primary Invoice Number: {primary_invoice_number}") # Debug
|
| 211 |
|
| 212 |
# Vendor Name
|
|
@@ -268,10 +268,7 @@ def extract_entities(text):
|
|
| 268 |
print(f"Matched Amount: {amount}") # Debug
|
| 269 |
except ValueError:
|
| 270 |
continue
|
| 271 |
-
# Sum only the final totals (e.g., after taxes)
|
| 272 |
if total_amounts:
|
| 273 |
-
# In this invoice, "Total Value" appears twice: ₹184.5 (net value before taxes) and ₹193.726 (after taxes)
|
| 274 |
-
# We want the final total after taxes for the main items, plus the platform fee
|
| 275 |
main_total = max([amt for amt in total_amounts if amt > 100], default=0.0) # ₹193.726
|
| 276 |
platform_fee = min([amt for amt in total_amounts if amt < 10], default=0.0) # ₹3.54
|
| 277 |
total_amount = main_total + platform_fee
|
|
|
|
| 90 |
# Find the end of the main table
|
| 91 |
table_end = len(lines)
|
| 92 |
for i in range(table_start, len(lines)):
|
| 93 |
+
if "Item(s) Total" in lines[i] or "Total Value" in lines[i] or "Sr.No Particulars" in lines[i]:
|
| 94 |
table_end = i
|
| 95 |
break
|
| 96 |
|
|
|
|
| 104 |
table_row_pattern = r"\|?\s*([A-Za-z\s\d-]+(?:\s[A-Za-z\s\d-]+)*?)\s*\|?\s*(\d+)\s*\|?\s*([\d.]+)\s*\|?\s*([\d.]+)\s*\|?"
|
| 105 |
else:
|
| 106 |
# Pattern for Invoice_6164752968.pdf: "1 x Chicken Frankie | 60 | 6 | 54 | 2.5% | 1.35 | 2.5% | 1.35 | 56.7"
|
| 107 |
+
# Adjusted to be more flexible for potential spacing or formatting issues
|
| 108 |
+
table_row_pattern = r"\|?\s*(\d+\s*x\s*[A-Za-z\s\d-]+(?:\s[A-Za-z\s\d-]+)*?)\s*\|?\s*([\d.]+)\s*\|?\s*([\d.]+)\s*\|?\s*([\d.]+)\s*(?:\|?\s*[\d.%]+\s*\|?\s*[\d.]+){2}\s*\|?\s*([\d.]+)\s*\|?"
|
| 109 |
|
| 110 |
for line in table_lines:
|
| 111 |
line = line.strip()
|
| 112 |
+
if not line or "HSN Code" in line or "Total" in line:
|
| 113 |
continue
|
| 114 |
# Skip alignment rows (e.g., "|---|---|")
|
| 115 |
if re.match(r"\|?\s*[-:]+(\s*\|\s*[-:]+)*\s*\|?", line):
|
|
|
|
| 148 |
if platform_fee_start != -1:
|
| 149 |
platform_fee_end = len(lines)
|
| 150 |
for i in range(platform_fee_start, len(lines)):
|
| 151 |
+
if "Total" in lines[i] and "Sr.No" not in lines[i]:
|
| 152 |
platform_fee_end = i + 1
|
| 153 |
break
|
| 154 |
platform_fee_lines = lines[platform_fee_start:platform_fee_end]
|
|
|
|
| 199 |
invoice_numbers.append(invoice_number)
|
| 200 |
print(f"Matched Invoice Number: {invoice_number}") # Debug
|
| 201 |
if invoice_numbers:
|
|
|
|
| 202 |
for i, num in enumerate(invoice_numbers):
|
| 203 |
start_idx = text.find(num)
|
| 204 |
context = text[max(0, start_idx-100):start_idx+100]
|
|
|
|
| 206 |
primary_invoice_number = num
|
| 207 |
break
|
| 208 |
if primary_invoice_number == "Unknown":
|
| 209 |
+
primary_invoice_number = invoice_numbers[0]
|
| 210 |
print(f"Primary Invoice Number: {primary_invoice_number}") # Debug
|
| 211 |
|
| 212 |
# Vendor Name
|
|
|
|
| 268 |
print(f"Matched Amount: {amount}") # Debug
|
| 269 |
except ValueError:
|
| 270 |
continue
|
|
|
|
| 271 |
if total_amounts:
|
|
|
|
|
|
|
| 272 |
main_total = max([amt for amt in total_amounts if amt > 100], default=0.0) # ₹193.726
|
| 273 |
platform_fee = min([amt for amt in total_amounts if amt < 10], default=0.0) # ₹3.54
|
| 274 |
total_amount = main_total + platform_fee
|