Spaces:
Running
Running
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +303 -83
src/streamlit_app.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
| 1 |
# =========================
|
| 2 |
# Invoice Extractor (Qwen3-VL via RunPod vLLM) - Batch Mode with Tax Validation
|
|
|
|
|
|
|
| 3 |
# =========================
|
| 4 |
import os
|
| 5 |
from pathlib import Path
|
|
@@ -108,7 +110,7 @@ def clean_float(x) -> float:
|
|
| 108 |
def normalize_date(date_str) -> str:
|
| 109 |
"""
|
| 110 |
Normalize various date formats to dd-MMM-yyyy format (e.g., 01-Jan-2025)
|
| 111 |
-
Handles: ISO, US, EU, and
|
| 112 |
Returns empty string if date cannot be parsed
|
| 113 |
"""
|
| 114 |
if not date_str or date_str == "":
|
|
@@ -119,23 +121,88 @@ def normalize_date(date_str) -> str:
|
|
| 119 |
if date_str == "":
|
| 120 |
return ""
|
| 121 |
|
| 122 |
-
#
|
| 123 |
formats = [
|
| 124 |
-
|
| 125 |
-
"%
|
| 126 |
-
"%m-%d-%Y", # 01-15-2025 (US)
|
| 127 |
"%Y/%m/%d", # 2025/01/15
|
| 128 |
-
"%d/%m/%Y", # 15/01/2025
|
| 129 |
-
"%m/%d/%Y", # 01/15/2025
|
| 130 |
-
"%d.%m.%Y", # 15.01.2025
|
| 131 |
"%Y.%m.%d", # 2025.01.15
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
"%d %B %Y", # 15 January 2025
|
| 133 |
"%d %b %Y", # 15 Jan 2025
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
"%B %d, %Y", # January 15, 2025
|
| 135 |
"%b %d, %Y", # Jan 15, 2025
|
| 136 |
-
"%d
|
| 137 |
-
"%d
|
| 138 |
-
"%Y
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
]
|
| 140 |
|
| 141 |
parsed_date = None
|
|
@@ -148,6 +215,19 @@ def normalize_date(date_str) -> str:
|
|
| 148 |
except (ValueError, TypeError):
|
| 149 |
continue
|
| 150 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
# If no format matched, return empty string
|
| 152 |
if parsed_date is None:
|
| 153 |
return ""
|
|
@@ -158,6 +238,7 @@ def normalize_date(date_str) -> str:
|
|
| 158 |
def parse_date_to_object(date_str):
|
| 159 |
"""
|
| 160 |
Parse a date string to a datetime.date object for date_input widget
|
|
|
|
| 161 |
Returns None if date cannot be parsed
|
| 162 |
"""
|
| 163 |
if not date_str or date_str == "":
|
|
@@ -168,23 +249,88 @@ def parse_date_to_object(date_str):
|
|
| 168 |
if date_str == "":
|
| 169 |
return None
|
| 170 |
|
| 171 |
-
#
|
| 172 |
formats = [
|
| 173 |
-
|
| 174 |
-
"%
|
| 175 |
-
"%m-%d-%Y", # 01-15-2025 (US)
|
| 176 |
"%Y/%m/%d", # 2025/01/15
|
| 177 |
-
"%d/%m/%Y", # 15/01/2025
|
| 178 |
-
"%m/%d/%Y", # 01/15/2025
|
| 179 |
-
"%d.%m.%Y", # 15.01.2025
|
| 180 |
"%Y.%m.%d", # 2025.01.15
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
"%d %B %Y", # 15 January 2025
|
| 182 |
"%d %b %Y", # 15 Jan 2025
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
"%B %d, %Y", # January 15, 2025
|
| 184 |
"%b %d, %Y", # Jan 15, 2025
|
| 185 |
-
"%d
|
| 186 |
-
"%d
|
| 187 |
-
"%Y
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
]
|
| 189 |
|
| 190 |
# Try parsing with each format
|
|
@@ -195,6 +341,19 @@ def parse_date_to_object(date_str):
|
|
| 195 |
except (ValueError, TypeError):
|
| 196 |
continue
|
| 197 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
return None
|
| 199 |
|
| 200 |
# -----------------------------
|
|
@@ -449,10 +608,11 @@ def parse_vllm_json(raw_json_text):
|
|
| 449 |
def validate_and_calculate_taxes(structured_data):
|
| 450 |
"""
|
| 451 |
Enhanced tax validation with smart line-item calculation:
|
| 452 |
-
1.
|
| 453 |
-
2. Skip
|
| 454 |
-
3.
|
| 455 |
-
4.
|
|
|
|
| 456 |
"""
|
| 457 |
|
| 458 |
subtotal = structured_data.get("Subtotal", 0.0)
|
|
@@ -473,81 +633,128 @@ def validate_and_calculate_taxes(structured_data):
|
|
| 473 |
structured_data["tax_skip_reason"] = "Tax rate exists but tax amount is 0"
|
| 474 |
return structured_data
|
| 475 |
|
| 476 |
-
#
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
# TEST SOURCE A: tax_rate
|
| 481 |
-
if model_tax_rate > 0:
|
| 482 |
-
expected_tax_from_rate = subtotal * (model_tax_rate / 100)
|
| 483 |
-
expected_total_from_rate = subtotal + expected_tax_from_rate
|
| 484 |
-
error_from_rate = abs(expected_total_from_rate - total_amount)
|
| 485 |
-
else:
|
| 486 |
-
error_from_rate = float('inf')
|
| 487 |
-
|
| 488 |
-
# TEST SOURCE B: tax_amount
|
| 489 |
-
if model_tax_amount > 0:
|
| 490 |
-
calculated_rate_from_amount = (model_tax_amount / subtotal) * 100
|
| 491 |
-
expected_total_from_amount = subtotal + model_tax_amount
|
| 492 |
-
error_from_amount = abs(expected_total_from_amount - total_amount)
|
| 493 |
-
else:
|
| 494 |
-
error_from_amount = float('inf')
|
| 495 |
-
|
| 496 |
-
# PICK WINNER (or use whichever is available)
|
| 497 |
-
if model_tax_rate > 0 or model_tax_amount > 0:
|
| 498 |
-
if error_from_rate < error_from_amount:
|
| 499 |
-
authoritative_rate = round(model_tax_rate, 4)
|
| 500 |
-
authority_source = "tax_rate"
|
| 501 |
-
else:
|
| 502 |
-
authoritative_rate = round(calculated_rate_from_amount, 4)
|
| 503 |
-
authority_source = "tax_amount"
|
| 504 |
-
else:
|
| 505 |
-
# No tax information available
|
| 506 |
-
structured_data["tax_validated"] = False
|
| 507 |
-
structured_data["tax_skip_reason"] = "No tax rate or amount provided"
|
| 508 |
-
return structured_data
|
| 509 |
-
|
| 510 |
-
# APPLY to line items - BUT respect explicit 0.00 values
|
| 511 |
-
calculated_total_tax = 0.0
|
| 512 |
|
| 513 |
for item in items:
|
| 514 |
amount = item.get("Amount", 0.0)
|
| 515 |
-
original_tax = item.get("Tax", 0.0)
|
| 516 |
raw_tax_value = item.get("Tax_Raw", "") # Original string value from JSON
|
| 517 |
|
| 518 |
-
# If item amount is 0,
|
| 519 |
if amount == 0.0:
|
| 520 |
item["Tax"] = 0.0
|
| 521 |
item["Line Total"] = 0.0
|
|
|
|
| 522 |
continue
|
| 523 |
|
| 524 |
-
# Distinguish between
|
| 525 |
-
# Empty
|
| 526 |
-
# "0", "0.0", "0.00"
|
|
|
|
| 527 |
|
|
|
|
| 528 |
is_explicitly_zero = False
|
|
|
|
| 529 |
if isinstance(raw_tax_value, str):
|
| 530 |
cleaned = raw_tax_value.strip()
|
| 531 |
-
|
| 532 |
-
|
| 533 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 534 |
elif raw_tax_value == 0 or raw_tax_value == 0.0:
|
| 535 |
-
# If it's a number 0, treat as explicit
|
| 536 |
is_explicitly_zero = True
|
| 537 |
|
| 538 |
-
# If
|
| 539 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 540 |
item["Tax"] = 0.0
|
| 541 |
item["Line Total"] = amount
|
| 542 |
-
|
| 543 |
continue
|
| 544 |
|
| 545 |
-
#
|
| 546 |
-
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
|
| 550 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 551 |
|
| 552 |
# Update summary - ENSURE BOTH FIELDS ARE FILLED
|
| 553 |
structured_data["Tax Percentage"] = authoritative_rate
|
|
@@ -1174,16 +1381,29 @@ elif len(st.session_state.batch_results) > 0:
|
|
| 1174 |
with frame_right:
|
| 1175 |
st.subheader(f"Editable Invoice: {current['file_name']}")
|
| 1176 |
|
| 1177 |
-
# SWAP BUTTON REMOVED COMPLETELY
|
| 1178 |
-
|
| 1179 |
# ----------------- FORM START -----------------
|
| 1180 |
with st.form(key=f"edit_form_{selected_hash}", clear_on_submit=False):
|
| 1181 |
tabs = st.tabs(["Invoice Details", "Sender/Recipient", "Bank Details", "Line Items"])
|
| 1182 |
|
| 1183 |
with tabs[0]:
|
| 1184 |
st.text_input("Invoice Number", key=f"Invoice Number_{selected_hash}")
|
| 1185 |
-
|
| 1186 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1187 |
|
| 1188 |
curr_options = ['USD', 'EUR', 'GBP', 'INR', 'Other']
|
| 1189 |
if st.session_state[f"Currency_{selected_hash}"] not in curr_options:
|
|
|
|
| 1 |
# =========================
|
| 2 |
# Invoice Extractor (Qwen3-VL via RunPod vLLM) - Batch Mode with Tax Validation
|
| 3 |
+
# UPDATED: Comprehensive date parsing (50+ formats) + Hybrid date display
|
| 4 |
+
# FIX: Tax calculation skips both empty ("") and explicit zero (0.00) values
|
| 5 |
# =========================
|
| 6 |
import os
|
| 7 |
from pathlib import Path
|
|
|
|
| 110 |
def normalize_date(date_str) -> str:
|
| 111 |
"""
|
| 112 |
Normalize various date formats to dd-MMM-yyyy format (e.g., 01-Jan-2025)
|
| 113 |
+
Handles: ISO, US, EU, Asian, two-digit years, and 50+ worldwide date formats
|
| 114 |
Returns empty string if date cannot be parsed
|
| 115 |
"""
|
| 116 |
if not date_str or date_str == "":
|
|
|
|
| 121 |
if date_str == "":
|
| 122 |
return ""
|
| 123 |
|
| 124 |
+
# Comprehensive list of date formats to try (order matters - most specific first)
|
| 125 |
formats = [
|
| 126 |
+
# ISO formats (4-digit year)
|
| 127 |
+
"%Y-%m-%d", # 2025-01-15
|
|
|
|
| 128 |
"%Y/%m/%d", # 2025/01/15
|
|
|
|
|
|
|
|
|
|
| 129 |
"%Y.%m.%d", # 2025.01.15
|
| 130 |
+
"%Y %m %d", # 2025 01 15
|
| 131 |
+
"%Y%m%d", # 20250115 (compact)
|
| 132 |
+
|
| 133 |
+
# European formats with full month names (4-digit year)
|
| 134 |
"%d %B %Y", # 15 January 2025
|
| 135 |
"%d %b %Y", # 15 Jan 2025
|
| 136 |
+
"%d-%B-%Y", # 15-January-2025
|
| 137 |
+
"%d-%b-%Y", # 15-Jan-2025
|
| 138 |
+
"%d.%B.%Y", # 15.January.2025
|
| 139 |
+
"%d.%b.%Y", # 15.Jan.2025
|
| 140 |
+
"%d/%B/%Y", # 15/January/2025
|
| 141 |
+
"%d/%b/%Y", # 15/Jan/2025
|
| 142 |
+
|
| 143 |
+
# US formats with full month names (4-digit year)
|
| 144 |
"%B %d, %Y", # January 15, 2025
|
| 145 |
"%b %d, %Y", # Jan 15, 2025
|
| 146 |
+
"%B %d %Y", # January 15 2025
|
| 147 |
+
"%b %d %Y", # Jan 15 2025
|
| 148 |
+
"%B-%d-%Y", # January-15-2025
|
| 149 |
+
"%b-%d-%Y", # Jan-15-2025
|
| 150 |
+
|
| 151 |
+
# European formats - Day first (4-digit year)
|
| 152 |
+
"%d-%m-%Y", # 15-01-2025
|
| 153 |
+
"%d/%m/%Y", # 15/01/2025
|
| 154 |
+
"%d.%m.%Y", # 15.01.2025
|
| 155 |
+
"%d %m %Y", # 15 01 2025
|
| 156 |
+
|
| 157 |
+
# US formats - Month first (4-digit year)
|
| 158 |
+
"%m-%d-%Y", # 01-15-2025
|
| 159 |
+
"%m/%d/%Y", # 01/15/2025
|
| 160 |
+
"%m.%d.%Y", # 01.15.2025
|
| 161 |
+
"%m %d %Y", # 01 15 2025
|
| 162 |
+
|
| 163 |
+
# European formats with 2-digit year - Day first
|
| 164 |
+
"%d-%m-%y", # 15-01-25
|
| 165 |
+
"%d/%m/%y", # 15/01/25 or 25/09/25 โ FIXES YOUR ISSUE!
|
| 166 |
+
"%d.%m.%y", # 15.01.25
|
| 167 |
+
"%d %m %y", # 15 01 25
|
| 168 |
+
|
| 169 |
+
# US formats with 2-digit year - Month first
|
| 170 |
+
"%m-%d-%y", # 01-15-25
|
| 171 |
+
"%m/%d/%y", # 01/15/25
|
| 172 |
+
"%m.%d.%y", # 01.15.25
|
| 173 |
+
"%m %d %y", # 01 15 25
|
| 174 |
+
|
| 175 |
+
# ISO with 2-digit year
|
| 176 |
+
"%y-%m-%d", # 25-01-15
|
| 177 |
+
"%y/%m/%d", # 25/01/15
|
| 178 |
+
"%y.%m.%d", # 25.01.15
|
| 179 |
+
"%y %m %d", # 25 01 15
|
| 180 |
+
|
| 181 |
+
# Compact formats with 2-digit year
|
| 182 |
+
"%y%m%d", # 250115
|
| 183 |
+
"%d%m%y", # 150125
|
| 184 |
+
"%m%d%y", # 011525
|
| 185 |
+
|
| 186 |
+
# European formats with abbreviated month (2-digit year)
|
| 187 |
+
"%d-%b-%y", # 15-Jan-25
|
| 188 |
+
"%d/%b/%y", # 15/Jan/25
|
| 189 |
+
"%d.%b.%y", # 15.Jan.25
|
| 190 |
+
"%d %b %y", # 15 Jan 25
|
| 191 |
+
"%d-%B-%y", # 15-January-25
|
| 192 |
+
"%d/%B/%y", # 15/January/25
|
| 193 |
+
|
| 194 |
+
# US formats with abbreviated month (2-digit year)
|
| 195 |
+
"%b %d, %y", # Jan 15, 25
|
| 196 |
+
"%b %d %y", # Jan 15 25
|
| 197 |
+
"%B %d, %y", # January 15, 25
|
| 198 |
+
"%B %d %y", # January 15 25
|
| 199 |
+
"%b-%d-%y", # Jan-15-25
|
| 200 |
+
"%B-%d-%y", # January-15-25
|
| 201 |
+
|
| 202 |
+
# Compact 8-digit formats
|
| 203 |
+
"%d%m%Y", # 15012025
|
| 204 |
+
"%m%d%Y", # 01152025
|
| 205 |
+
"%Y%d%m", # 20251501
|
| 206 |
]
|
| 207 |
|
| 208 |
parsed_date = None
|
|
|
|
| 215 |
except (ValueError, TypeError):
|
| 216 |
continue
|
| 217 |
|
| 218 |
+
# If still not parsed, try removing ordinal suffixes (st, nd, rd, th)
|
| 219 |
+
if parsed_date is None and isinstance(date_str, str):
|
| 220 |
+
import re
|
| 221 |
+
cleaned = re.sub(r'(\d+)(st|nd|rd|th)\b', r'\1', date_str, flags=re.IGNORECASE)
|
| 222 |
+
|
| 223 |
+
if cleaned != date_str:
|
| 224 |
+
for fmt in formats:
|
| 225 |
+
try:
|
| 226 |
+
parsed_date = datetime.strptime(cleaned, fmt)
|
| 227 |
+
break
|
| 228 |
+
except (ValueError, TypeError):
|
| 229 |
+
continue
|
| 230 |
+
|
| 231 |
# If no format matched, return empty string
|
| 232 |
if parsed_date is None:
|
| 233 |
return ""
|
|
|
|
| 238 |
def parse_date_to_object(date_str):
|
| 239 |
"""
|
| 240 |
Parse a date string to a datetime.date object for date_input widget
|
| 241 |
+
Handles: ISO, US, EU, Asian, two-digit years, and 50+ worldwide date formats
|
| 242 |
Returns None if date cannot be parsed
|
| 243 |
"""
|
| 244 |
if not date_str or date_str == "":
|
|
|
|
| 249 |
if date_str == "":
|
| 250 |
return None
|
| 251 |
|
| 252 |
+
# Comprehensive list of date formats to try (same as normalize_date)
|
| 253 |
formats = [
|
| 254 |
+
# ISO formats (4-digit year)
|
| 255 |
+
"%Y-%m-%d", # 2025-01-15
|
|
|
|
| 256 |
"%Y/%m/%d", # 2025/01/15
|
|
|
|
|
|
|
|
|
|
| 257 |
"%Y.%m.%d", # 2025.01.15
|
| 258 |
+
"%Y %m %d", # 2025 01 15
|
| 259 |
+
"%Y%m%d", # 20250115 (compact)
|
| 260 |
+
|
| 261 |
+
# European formats with full month names (4-digit year)
|
| 262 |
"%d %B %Y", # 15 January 2025
|
| 263 |
"%d %b %Y", # 15 Jan 2025
|
| 264 |
+
"%d-%B-%Y", # 15-January-2025
|
| 265 |
+
"%d-%b-%Y", # 15-Jan-2025
|
| 266 |
+
"%d.%B.%Y", # 15.January.2025
|
| 267 |
+
"%d.%b.%Y", # 15.Jan.2025
|
| 268 |
+
"%d/%B/%Y", # 15/January/2025
|
| 269 |
+
"%d/%b/%Y", # 15/Jan/2025
|
| 270 |
+
|
| 271 |
+
# US formats with full month names (4-digit year)
|
| 272 |
"%B %d, %Y", # January 15, 2025
|
| 273 |
"%b %d, %Y", # Jan 15, 2025
|
| 274 |
+
"%B %d %Y", # January 15 2025
|
| 275 |
+
"%b %d %Y", # Jan 15 2025
|
| 276 |
+
"%B-%d-%Y", # January-15-2025
|
| 277 |
+
"%b-%d-%Y", # Jan-15-2025
|
| 278 |
+
|
| 279 |
+
# European formats - Day first (4-digit year)
|
| 280 |
+
"%d-%m-%Y", # 15-01-2025
|
| 281 |
+
"%d/%m/%Y", # 15/01/2025
|
| 282 |
+
"%d.%m.%Y", # 15.01.2025
|
| 283 |
+
"%d %m %Y", # 15 01 2025
|
| 284 |
+
|
| 285 |
+
# US formats - Month first (4-digit year)
|
| 286 |
+
"%m-%d-%Y", # 01-15-2025
|
| 287 |
+
"%m/%d/%Y", # 01/15/2025
|
| 288 |
+
"%m.%d.%Y", # 01.15.2025
|
| 289 |
+
"%m %d %Y", # 01 15 2025
|
| 290 |
+
|
| 291 |
+
# European formats with 2-digit year - Day first
|
| 292 |
+
"%d-%m-%y", # 15-01-25
|
| 293 |
+
"%d/%m/%y", # 15/01/25 or 25/09/25 โ FIXES YOUR ISSUE!
|
| 294 |
+
"%d.%m.%y", # 15.01.25
|
| 295 |
+
"%d %m %y", # 15 01 25
|
| 296 |
+
|
| 297 |
+
# US formats with 2-digit year - Month first
|
| 298 |
+
"%m-%d-%y", # 01-15-25
|
| 299 |
+
"%m/%d/%y", # 01/15/25
|
| 300 |
+
"%m.%d.%y", # 01.15.25
|
| 301 |
+
"%m %d %y", # 01 15 25
|
| 302 |
+
|
| 303 |
+
# ISO with 2-digit year
|
| 304 |
+
"%y-%m-%d", # 25-01-15
|
| 305 |
+
"%y/%m/%d", # 25/01/15
|
| 306 |
+
"%y.%m.%d", # 25.01.15
|
| 307 |
+
"%y %m %d", # 25 01 15
|
| 308 |
+
|
| 309 |
+
# Compact formats with 2-digit year
|
| 310 |
+
"%y%m%d", # 250115
|
| 311 |
+
"%d%m%y", # 150125
|
| 312 |
+
"%m%d%y", # 011525
|
| 313 |
+
|
| 314 |
+
# European formats with abbreviated month (2-digit year)
|
| 315 |
+
"%d-%b-%y", # 15-Jan-25
|
| 316 |
+
"%d/%b/%y", # 15/Jan/25
|
| 317 |
+
"%d.%b.%y", # 15.Jan.25
|
| 318 |
+
"%d %b %y", # 15 Jan 25
|
| 319 |
+
"%d-%B-%y", # 15-January-25
|
| 320 |
+
"%d/%B/%y", # 15/January/25
|
| 321 |
+
|
| 322 |
+
# US formats with abbreviated month (2-digit year)
|
| 323 |
+
"%b %d, %y", # Jan 15, 25
|
| 324 |
+
"%b %d %y", # Jan 15 25
|
| 325 |
+
"%B %d, %y", # January 15, 25
|
| 326 |
+
"%B %d %y", # January 15 25
|
| 327 |
+
"%b-%d-%y", # Jan-15-25
|
| 328 |
+
"%B-%d-%y", # January-15-25
|
| 329 |
+
|
| 330 |
+
# Compact 8-digit formats
|
| 331 |
+
"%d%m%Y", # 15012025
|
| 332 |
+
"%m%d%Y", # 01152025
|
| 333 |
+
"%Y%d%m", # 20251501
|
| 334 |
]
|
| 335 |
|
| 336 |
# Try parsing with each format
|
|
|
|
| 341 |
except (ValueError, TypeError):
|
| 342 |
continue
|
| 343 |
|
| 344 |
+
# If still not parsed, try removing ordinal suffixes
|
| 345 |
+
if isinstance(date_str, str):
|
| 346 |
+
import re
|
| 347 |
+
cleaned = re.sub(r'(\d+)(st|nd|rd|th)\b', r'\1', date_str, flags=re.IGNORECASE)
|
| 348 |
+
|
| 349 |
+
if cleaned != date_str:
|
| 350 |
+
for fmt in formats:
|
| 351 |
+
try:
|
| 352 |
+
parsed_date = datetime.strptime(cleaned, fmt)
|
| 353 |
+
return parsed_date.date()
|
| 354 |
+
except (ValueError, TypeError):
|
| 355 |
+
continue
|
| 356 |
+
|
| 357 |
return None
|
| 358 |
|
| 359 |
# -----------------------------
|
|
|
|
| 608 |
def validate_and_calculate_taxes(structured_data):
|
| 609 |
"""
|
| 610 |
Enhanced tax validation with smart line-item calculation:
|
| 611 |
+
1. Skip calculation if tax is empty ("") - tax not provided
|
| 612 |
+
2. Skip calculation if tax is explicitly 0.00 - tax-exempt item
|
| 613 |
+
3. Calculate tax ONLY when line item has a non-zero tax value
|
| 614 |
+
4. Skip validation if tax_amount is 0 but tax_rate exists
|
| 615 |
+
5. Ensure both Tax Percentage and Total Tax are properly filled
|
| 616 |
"""
|
| 617 |
|
| 618 |
subtotal = structured_data.get("Subtotal", 0.0)
|
|
|
|
| 633 |
structured_data["tax_skip_reason"] = "Tax rate exists but tax amount is 0"
|
| 634 |
return structured_data
|
| 635 |
|
| 636 |
+
# FIRST PASS: Identify which items are taxable (BEFORE determining authoritative rate)
|
| 637 |
+
# This is critical because we need to know the taxable subtotal to calculate the correct rate
|
| 638 |
+
taxable_items = []
|
| 639 |
+
non_taxable_items = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 640 |
|
| 641 |
for item in items:
|
| 642 |
amount = item.get("Amount", 0.0)
|
|
|
|
| 643 |
raw_tax_value = item.get("Tax_Raw", "") # Original string value from JSON
|
| 644 |
|
| 645 |
+
# If item amount is 0, it's non-taxable
|
| 646 |
if amount == 0.0:
|
| 647 |
item["Tax"] = 0.0
|
| 648 |
item["Line Total"] = 0.0
|
| 649 |
+
non_taxable_items.append(item)
|
| 650 |
continue
|
| 651 |
|
| 652 |
+
# Distinguish between:
|
| 653 |
+
# 1. Empty ("") = tax not provided โ NON-TAXABLE
|
| 654 |
+
# 2. Explicit "0", "0.0", "0.00" = tax-exempt โ NON-TAXABLE
|
| 655 |
+
# 3. Non-zero value = TAXABLE (calculate tax for this item)
|
| 656 |
|
| 657 |
+
is_empty = False
|
| 658 |
is_explicitly_zero = False
|
| 659 |
+
|
| 660 |
if isinstance(raw_tax_value, str):
|
| 661 |
cleaned = raw_tax_value.strip()
|
| 662 |
+
if cleaned == "":
|
| 663 |
+
# Empty string means tax was not provided
|
| 664 |
+
is_empty = True
|
| 665 |
+
else:
|
| 666 |
+
# Check if it's explicitly set to some form of zero
|
| 667 |
+
try:
|
| 668 |
+
cleaned_value = float(re.sub(r'[^\d\.-]', '', cleaned) or '0')
|
| 669 |
+
if cleaned_value == 0.0:
|
| 670 |
+
is_explicitly_zero = True
|
| 671 |
+
except (ValueError, TypeError):
|
| 672 |
+
pass
|
| 673 |
+
elif raw_tax_value is None or raw_tax_value == "":
|
| 674 |
+
is_empty = True
|
| 675 |
elif raw_tax_value == 0 or raw_tax_value == 0.0:
|
| 676 |
+
# If it's a number 0, treat as explicit zero
|
| 677 |
is_explicitly_zero = True
|
| 678 |
|
| 679 |
+
# If empty - tax not provided, NON-TAXABLE
|
| 680 |
+
if is_empty:
|
| 681 |
+
item["Tax"] = 0.0
|
| 682 |
+
item["Line Total"] = amount
|
| 683 |
+
non_taxable_items.append(item)
|
| 684 |
+
continue
|
| 685 |
+
|
| 686 |
+
# If explicitly 0.00 - tax-exempt item, NON-TAXABLE
|
| 687 |
+
if is_explicitly_zero:
|
| 688 |
item["Tax"] = 0.0
|
| 689 |
item["Line Total"] = amount
|
| 690 |
+
non_taxable_items.append(item)
|
| 691 |
continue
|
| 692 |
|
| 693 |
+
# This item is TAXABLE
|
| 694 |
+
taxable_items.append(item)
|
| 695 |
+
|
| 696 |
+
# SECOND PASS: Determine authoritative tax rate from available sources
|
| 697 |
+
# NOW we calculate based on TAXABLE items only (not all items)
|
| 698 |
+
authoritative_rate = None
|
| 699 |
+
authority_source = None
|
| 700 |
+
|
| 701 |
+
if taxable_items:
|
| 702 |
+
# Calculate total taxable amount (sum of amounts for taxable items only)
|
| 703 |
+
total_taxable_amount = sum(item.get("Amount", 0.0) for item in taxable_items)
|
| 704 |
+
|
| 705 |
+
if total_taxable_amount > 0:
|
| 706 |
+
# TEST SOURCE A: tax_rate (test against taxable subtotal, not total subtotal)
|
| 707 |
+
if model_tax_rate > 0:
|
| 708 |
+
expected_tax_from_rate = total_taxable_amount * (model_tax_rate / 100)
|
| 709 |
+
expected_total_from_rate = subtotal + expected_tax_from_rate
|
| 710 |
+
error_from_rate = abs(expected_total_from_rate - total_amount)
|
| 711 |
+
else:
|
| 712 |
+
error_from_rate = float('inf')
|
| 713 |
+
|
| 714 |
+
# TEST SOURCE B: tax_amount (calculate rate based on taxable subtotal only)
|
| 715 |
+
if model_tax_amount > 0:
|
| 716 |
+
calculated_rate_from_amount = (model_tax_amount / total_taxable_amount) * 100
|
| 717 |
+
expected_total_from_amount = subtotal + model_tax_amount
|
| 718 |
+
error_from_amount = abs(expected_total_from_amount - total_amount)
|
| 719 |
+
else:
|
| 720 |
+
error_from_amount = float('inf')
|
| 721 |
+
|
| 722 |
+
# PICK WINNER (or use whichever is available)
|
| 723 |
+
if model_tax_rate > 0 or model_tax_amount > 0:
|
| 724 |
+
if error_from_rate < error_from_amount:
|
| 725 |
+
authoritative_rate = round(model_tax_rate, 4)
|
| 726 |
+
authority_source = "tax_rate"
|
| 727 |
+
else:
|
| 728 |
+
authoritative_rate = round(calculated_rate_from_amount, 4)
|
| 729 |
+
authority_source = "tax_amount"
|
| 730 |
+
else:
|
| 731 |
+
# No tax information available
|
| 732 |
+
structured_data["tax_validated"] = False
|
| 733 |
+
structured_data["tax_skip_reason"] = "No tax rate or amount provided"
|
| 734 |
+
return structured_data
|
| 735 |
+
else:
|
| 736 |
+
# No taxable items with amount > 0
|
| 737 |
+
structured_data["tax_validated"] = False
|
| 738 |
+
structured_data["tax_skip_reason"] = "No taxable items with valid amounts"
|
| 739 |
+
return structured_data
|
| 740 |
+
else:
|
| 741 |
+
# No taxable items found
|
| 742 |
+
structured_data["tax_validated"] = False
|
| 743 |
+
structured_data["tax_skip_reason"] = "No taxable items found"
|
| 744 |
+
return structured_data
|
| 745 |
+
|
| 746 |
+
# THIRD PASS: Calculate tax for taxable items using authoritative rate
|
| 747 |
+
calculated_total_tax = 0.0
|
| 748 |
+
|
| 749 |
+
if taxable_items and authoritative_rate is not None:
|
| 750 |
+
# Calculate tax for each taxable item
|
| 751 |
+
for item in taxable_items:
|
| 752 |
+
amount = item.get("Amount", 0.0)
|
| 753 |
+
# Calculate tax based on authoritative rate
|
| 754 |
+
corrected_tax = round(amount * (authoritative_rate / 100), 2)
|
| 755 |
+
item["Tax"] = corrected_tax
|
| 756 |
+
calculated_total_tax += corrected_tax
|
| 757 |
+
item["Line Total"] = round(amount + corrected_tax, 2)
|
| 758 |
|
| 759 |
# Update summary - ENSURE BOTH FIELDS ARE FILLED
|
| 760 |
structured_data["Tax Percentage"] = authoritative_rate
|
|
|
|
| 1381 |
with frame_right:
|
| 1382 |
st.subheader(f"Editable Invoice: {current['file_name']}")
|
| 1383 |
|
|
|
|
|
|
|
| 1384 |
# ----------------- FORM START -----------------
|
| 1385 |
with st.form(key=f"edit_form_{selected_hash}", clear_on_submit=False):
|
| 1386 |
tabs = st.tabs(["Invoice Details", "Sender/Recipient", "Bank Details", "Line Items"])
|
| 1387 |
|
| 1388 |
with tabs[0]:
|
| 1389 |
st.text_input("Invoice Number", key=f"Invoice Number_{selected_hash}")
|
| 1390 |
+
|
| 1391 |
+
# HYBRID DATE DISPLAY: Formatted display + Date picker
|
| 1392 |
+
st.write("**Invoice Date:**")
|
| 1393 |
+
invoice_date_obj = st.session_state.get(f"Invoice Date_{selected_hash}", None)
|
| 1394 |
+
if invoice_date_obj:
|
| 1395 |
+
formatted_invoice = invoice_date_obj.strftime("%d-%b-%Y")
|
| 1396 |
+
st.info(f"๐
{formatted_invoice}") # Shows: ๐
25-Sep-2025
|
| 1397 |
+
st.date_input("Select date:", key=f"Invoice Date_{selected_hash}",
|
| 1398 |
+
format="DD/MM/YYYY", label_visibility="collapsed")
|
| 1399 |
+
|
| 1400 |
+
st.write("**Due Date:**")
|
| 1401 |
+
due_date_obj = st.session_state.get(f"Due Date_{selected_hash}", None)
|
| 1402 |
+
if due_date_obj:
|
| 1403 |
+
formatted_due = due_date_obj.strftime("%d-%b-%Y")
|
| 1404 |
+
st.info(f"๐
{formatted_due}") # Shows: ๐
30-Sep-2025
|
| 1405 |
+
st.date_input("Select date:", key=f"Due Date_{selected_hash}",
|
| 1406 |
+
format="DD/MM/YYYY", label_visibility="collapsed")
|
| 1407 |
|
| 1408 |
curr_options = ['USD', 'EUR', 'GBP', 'INR', 'Other']
|
| 1409 |
if st.session_state[f"Currency_{selected_hash}"] not in curr_options:
|