Spaces:
Sleeping
Sleeping
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +142 -154
src/streamlit_app.py
CHANGED
|
@@ -196,11 +196,17 @@ def clean_float(x) -> float:
|
|
| 196 |
except ValueError:
|
| 197 |
return 0.0
|
| 198 |
|
| 199 |
-
def normalize_date(date_str) -> str:
|
| 200 |
"""
|
| 201 |
Normalize various date formats:
|
| 202 |
- Full dates (day-month-year) → dd-MMM-yyyy (e.g., 01-Jan-2025)
|
| 203 |
- Month-year only → MMM-yyyy (e.g., Aug-2025)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
Returns empty string if date cannot be parsed
|
| 205 |
"""
|
| 206 |
if not date_str or date_str == "":
|
|
@@ -210,17 +216,55 @@ def normalize_date(date_str) -> str:
|
|
| 210 |
date_str = date_str.strip()
|
| 211 |
if date_str == "":
|
| 212 |
return ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
|
| 214 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
full_date_formats = [
|
| 216 |
-
# ISO formats (4-digit year)
|
| 217 |
"%Y-%m-%d", # 2025-01-15
|
| 218 |
"%Y/%m/%d", # 2025/01/15
|
| 219 |
"%Y.%m.%d", # 2025.01.15
|
| 220 |
"%Y %m %d", # 2025 01 15
|
| 221 |
"%Y%m%d", # 20250115 (compact)
|
| 222 |
|
| 223 |
-
# European formats with full month names (4-digit year)
|
|
|
|
|
|
|
| 224 |
"%d %B %Y", # 15 January 2025
|
| 225 |
"%d %b %Y", # 15 Jan 2025
|
| 226 |
"%d-%B-%Y", # 15-January-2025
|
|
@@ -230,13 +274,15 @@ def normalize_date(date_str) -> str:
|
|
| 230 |
"%d/%B/%Y", # 15/January/2025
|
| 231 |
"%d/%b/%Y", # 15/Jan/2025
|
| 232 |
|
| 233 |
-
# US formats with full month names (4-digit year)
|
| 234 |
"%B %d, %Y", # January 15, 2025
|
| 235 |
"%b %d, %Y", # Jan 15, 2025
|
| 236 |
"%B %d %Y", # January 15 2025
|
| 237 |
"%b %d %Y", # Jan 15 2025
|
| 238 |
"%B-%d-%Y", # January-15-2025
|
| 239 |
"%b-%d-%Y", # Jan-15-2025
|
|
|
|
|
|
|
| 240 |
|
| 241 |
# European formats - Day first (4-digit year)
|
| 242 |
"%d-%m-%Y", # 15-01-2025
|
|
@@ -244,7 +290,7 @@ def normalize_date(date_str) -> str:
|
|
| 244 |
"%d.%m.%Y", # 15.01.2025
|
| 245 |
"%d %m %Y", # 15 01 2025
|
| 246 |
|
| 247 |
-
# US formats - Month first (4-digit year)
|
| 248 |
"%m-%d-%Y", # 01-15-2025
|
| 249 |
"%m/%d/%Y", # 01/15/2025
|
| 250 |
"%m.%d.%Y", # 01.15.2025
|
|
@@ -273,7 +319,9 @@ def normalize_date(date_str) -> str:
|
|
| 273 |
"%d%m%y", # 150125
|
| 274 |
"%m%d%y", # 011525
|
| 275 |
|
| 276 |
-
# European formats with abbreviated month (2-digit year)
|
|
|
|
|
|
|
| 277 |
"%d-%b-%y", # 15-Jan-25
|
| 278 |
"%d/%b/%y", # 15/Jan/25
|
| 279 |
"%d.%b.%y", # 15.Jan.25
|
|
@@ -281,7 +329,7 @@ def normalize_date(date_str) -> str:
|
|
| 281 |
"%d-%B-%y", # 15-January-25
|
| 282 |
"%d/%B/%y", # 15/January/25
|
| 283 |
|
| 284 |
-
# US formats with abbreviated month (2-digit year)
|
| 285 |
"%b %d, %y", # Jan 15, 25
|
| 286 |
"%b %d %y", # Jan 15 25
|
| 287 |
"%B %d, %y", # January 15, 25
|
|
@@ -295,25 +343,14 @@ def normalize_date(date_str) -> str:
|
|
| 295 |
"%Y%d%m", # 20251501
|
| 296 |
]
|
| 297 |
|
| 298 |
-
# Try full date formats
|
| 299 |
for fmt in full_date_formats:
|
| 300 |
try:
|
| 301 |
-
parsed_date = datetime.strptime(
|
| 302 |
return parsed_date.strftime("%d-%b-%Y")
|
| 303 |
except (ValueError, TypeError):
|
| 304 |
continue
|
| 305 |
|
| 306 |
-
# Try with ordinal suffixes removed (1st, 2nd, 3rd, etc.)
|
| 307 |
-
if isinstance(date_str, str):
|
| 308 |
-
cleaned = re.sub(r'(\d+)(st|nd|rd|th)\b', r'\1', date_str, flags=re.IGNORECASE)
|
| 309 |
-
if cleaned != date_str:
|
| 310 |
-
for fmt in full_date_formats:
|
| 311 |
-
try:
|
| 312 |
-
parsed_date = datetime.strptime(cleaned, fmt)
|
| 313 |
-
return parsed_date.strftime("%d-%b-%Y")
|
| 314 |
-
except (ValueError, TypeError):
|
| 315 |
-
continue
|
| 316 |
-
|
| 317 |
# MONTH-YEAR ONLY FORMATS - output as MMM-yyyy
|
| 318 |
month_year_formats = [
|
| 319 |
# Full month name with year
|
|
@@ -354,7 +391,7 @@ def normalize_date(date_str) -> str:
|
|
| 354 |
# Try month-year formats → output as MMM-yyyy (no day)
|
| 355 |
for fmt in month_year_formats:
|
| 356 |
try:
|
| 357 |
-
parsed_date = datetime.strptime(
|
| 358 |
return parsed_date.strftime("%b-%Y") # Aug-2025 format
|
| 359 |
except (ValueError, TypeError):
|
| 360 |
continue
|
|
@@ -362,10 +399,10 @@ def normalize_date(date_str) -> str:
|
|
| 362 |
# If no format matched, return empty string
|
| 363 |
return ""
|
| 364 |
|
| 365 |
-
def parse_date_to_object(date_str):
|
| 366 |
"""
|
| 367 |
Parse a date string to a datetime.date object for date_input widget
|
| 368 |
-
|
| 369 |
Returns None if date cannot be parsed
|
| 370 |
"""
|
| 371 |
if not date_str or date_str == "":
|
|
@@ -375,147 +412,89 @@ def parse_date_to_object(date_str):
|
|
| 375 |
date_str = date_str.strip()
|
| 376 |
if date_str == "":
|
| 377 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 378 |
|
| 379 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 380 |
formats = [
|
| 381 |
# ISO formats (4-digit year)
|
| 382 |
-
"%Y-%m-%d",
|
| 383 |
-
"%Y/%m/%d", # 2025/01/15
|
| 384 |
-
"%Y.%m.%d", # 2025.01.15
|
| 385 |
-
"%Y %m %d", # 2025 01 15
|
| 386 |
-
"%Y%m%d", # 20250115 (compact)
|
| 387 |
|
| 388 |
-
#
|
| 389 |
-
"%d %B %Y",
|
| 390 |
-
"%d %b %Y", # 15 Jan 2025
|
| 391 |
-
"%d-%B-%Y", # 15-January-2025
|
| 392 |
-
"%d-%b-%Y", # 15-Jan-2025
|
| 393 |
-
"%d.%B.%Y", # 15.January.2025
|
| 394 |
-
"%d.%b.%Y", # 15.Jan.2025
|
| 395 |
-
"%d/%B/%Y", # 15/January/2025
|
| 396 |
-
"%d/%b/%Y", # 15/Jan/2025
|
| 397 |
|
| 398 |
-
#
|
| 399 |
-
"%B %
|
| 400 |
-
"%
|
| 401 |
-
"%B %d %Y",
|
| 402 |
-
"%
|
| 403 |
-
"%B-%d-%Y", # January-15-2025
|
| 404 |
-
"%b-%d-%Y", # Jan-15-2025
|
| 405 |
|
| 406 |
-
# European formats - Day first
|
| 407 |
-
"%d-%m-%Y",
|
| 408 |
-
"%d
|
| 409 |
-
"%d.%m.%Y", # 15.01.2025
|
| 410 |
-
"%d %m %Y", # 15 01 2025
|
| 411 |
|
| 412 |
-
# US formats - Month first
|
| 413 |
-
"%m-%d-%Y",
|
| 414 |
-
"%m
|
| 415 |
-
"%m.%d.%Y", # 01.15.2025
|
| 416 |
-
"%m %d %Y", # 01 15 2025
|
| 417 |
-
|
| 418 |
-
# European formats with 2-digit year - Day first
|
| 419 |
-
"%d-%m-%y", # 15-01-25
|
| 420 |
-
"%d/%m/%y", # 15/01/25 or 25/09/25 ← FIXES YOUR ISSUE!
|
| 421 |
-
"%d.%m.%y", # 15.01.25
|
| 422 |
-
"%d %m %y", # 15 01 25
|
| 423 |
-
|
| 424 |
-
# US formats with 2-digit year - Month first
|
| 425 |
-
"%m-%d-%y", # 01-15-25
|
| 426 |
-
"%m/%d/%y", # 01/15/25
|
| 427 |
-
"%m.%d.%y", # 01.15.25
|
| 428 |
-
"%m %d %y", # 01 15 25
|
| 429 |
|
| 430 |
# ISO with 2-digit year
|
| 431 |
-
"%y-%m-%d",
|
| 432 |
-
"%y/%m/%d", # 25/01/15
|
| 433 |
-
"%y.%m.%d", # 25.01.15
|
| 434 |
-
"%y %m %d", # 25 01 15
|
| 435 |
-
|
| 436 |
-
# Compact formats with 2-digit year
|
| 437 |
-
"%y%m%d", # 250115
|
| 438 |
-
"%d%m%y", # 150125
|
| 439 |
-
"%m%d%y", # 011525
|
| 440 |
-
|
| 441 |
-
# European formats with abbreviated month (2-digit year)
|
| 442 |
-
"%d-%b-%y", # 15-Jan-25
|
| 443 |
-
"%d/%b/%y", # 15/Jan/25
|
| 444 |
-
"%d.%b.%y", # 15.Jan.25
|
| 445 |
-
"%d %b %y", # 15 Jan 25
|
| 446 |
-
"%d-%B-%y", # 15-January-25
|
| 447 |
-
"%d/%B/%y", # 15/January/25
|
| 448 |
-
|
| 449 |
-
# US formats with abbreviated month (2-digit year)
|
| 450 |
-
"%b %d, %y", # Jan 15, 25
|
| 451 |
-
"%b %d %y", # Jan 15 25
|
| 452 |
-
"%B %d, %y", # January 15, 25
|
| 453 |
-
"%B %d %y", # January 15 25
|
| 454 |
-
"%b-%d-%y", # Jan-15-25
|
| 455 |
-
"%B-%d-%y", # January-15-25
|
| 456 |
|
| 457 |
-
# Compact
|
| 458 |
-
"%d%m%Y",
|
| 459 |
-
"%m%d%Y", # 01152025
|
| 460 |
-
"%Y%d%m", # 20251501
|
| 461 |
|
| 462 |
-
#
|
| 463 |
-
#
|
| 464 |
-
"%
|
| 465 |
-
"%
|
| 466 |
-
"%
|
| 467 |
-
"%b, %
|
| 468 |
-
"%B-%Y", # August-2025
|
| 469 |
-
"%b-%Y", # Aug-2025
|
| 470 |
-
"%B/%Y", # August/2025
|
| 471 |
-
"%b/%Y", # Aug/2025
|
| 472 |
|
| 473 |
-
#
|
| 474 |
-
"%
|
| 475 |
-
"%
|
| 476 |
-
"%m
|
| 477 |
-
"%m %Y",
|
| 478 |
-
"%
|
| 479 |
-
"%
|
| 480 |
-
"%
|
| 481 |
-
"%Y %m", # 2025 08
|
| 482 |
-
|
| 483 |
-
# Numeric month-year (2-digit year)
|
| 484 |
-
"%m/%y", # 08/25
|
| 485 |
-
"%m-%y", # 08-25
|
| 486 |
-
"%m.%y", # 08.25
|
| 487 |
-
"%m %y", # 08 25
|
| 488 |
-
"%y-%m", # 25-08
|
| 489 |
-
"%y/%m", # 25/08
|
| 490 |
-
|
| 491 |
-
# Full month name with 2-digit year
|
| 492 |
-
"%B %y", # August 25
|
| 493 |
-
"%b %y", # Aug 25
|
| 494 |
-
"%B-%y", # August-25
|
| 495 |
-
"%b-%y", # Aug-25
|
| 496 |
]
|
| 497 |
|
| 498 |
-
# Try parsing with each format
|
| 499 |
for fmt in formats:
|
| 500 |
try:
|
| 501 |
-
parsed_date = datetime.strptime(
|
| 502 |
return parsed_date.date()
|
| 503 |
except (ValueError, TypeError):
|
| 504 |
continue
|
| 505 |
|
| 506 |
-
# If still not parsed, try removing ordinal suffixes
|
| 507 |
-
if isinstance(date_str, str):
|
| 508 |
-
import re
|
| 509 |
-
cleaned = re.sub(r'(\d+)(st|nd|rd|th)\b', r'\1', date_str, flags=re.IGNORECASE)
|
| 510 |
-
|
| 511 |
-
if cleaned != date_str:
|
| 512 |
-
for fmt in formats:
|
| 513 |
-
try:
|
| 514 |
-
parsed_date = datetime.strptime(cleaned, fmt)
|
| 515 |
-
return parsed_date.date()
|
| 516 |
-
except (ValueError, TypeError):
|
| 517 |
-
continue
|
| 518 |
-
|
| 519 |
return None
|
| 520 |
|
| 521 |
|
|
@@ -762,11 +741,14 @@ def parse_vllm_json(raw_json_text):
|
|
| 762 |
header = data.get("header", {})
|
| 763 |
summary = data.get("summary", {})
|
| 764 |
items = data.get("items", [])
|
|
|
|
|
|
|
|
|
|
| 765 |
|
| 766 |
result = {
|
| 767 |
"Invoice Number": header.get("invoice_no", ""),
|
| 768 |
-
"Invoice Date": normalize_date(header.get("invoice_date", "")),
|
| 769 |
-
"Due Date": normalize_date(header.get("due_date", "")),
|
| 770 |
"Sender Name": header.get("sender_name", ""),
|
| 771 |
"Sender Address": header.get("sender_addr", ""),
|
| 772 |
"Sender": {
|
|
@@ -792,7 +774,7 @@ def parse_vllm_json(raw_json_text):
|
|
| 792 |
"Tax Percentage": clean_amount(summary.get("tax_rate", "0")),
|
| 793 |
"Total Tax": clean_amount(summary.get("tax_amount", "0")),
|
| 794 |
"Total Amount": clean_amount(summary.get("total_amount", "0")),
|
| 795 |
-
"Currency":
|
| 796 |
"Itemized Data": []
|
| 797 |
}
|
| 798 |
|
|
@@ -1193,9 +1175,12 @@ def map_prediction_to_ui(pred):
|
|
| 1193 |
return s
|
| 1194 |
return None
|
| 1195 |
|
|
|
|
|
|
|
|
|
|
| 1196 |
ui["Invoice Number"] = pick_first("invoice_no", "invoice_number", "invoiceid", "invoice id") or ""
|
| 1197 |
-
ui["Invoice Date"] = normalize_date(pick_first("invoice_date", "date", "invoice date") or "")
|
| 1198 |
-
ui["Due Date"] = normalize_date(pick_first("due_date", "due_date", "due") or "")
|
| 1199 |
ui["Sender Name"] = pick_first("sender_name", "sender") or ""
|
| 1200 |
ui["Sender Address"] = pick_first("sender_addr", "sender_address", "sender addr") or ""
|
| 1201 |
ui["Recipient Name"] = pick_first("rcpt_name", "recipient_name", "recipient", "rcpt") or ""
|
|
@@ -1215,7 +1200,7 @@ def map_prediction_to_ui(pred):
|
|
| 1215 |
ui["Tax Percentage"] = clean_number(pick_first("tax_rate", "tax_percentage", "tax pct", "tax percentage") or 0.0)
|
| 1216 |
ui["Total Tax"] = clean_number(pick_first("tax_amount", "tax", "total_tax") or 0.0)
|
| 1217 |
ui["Total Amount"] = clean_number(pick_first("total_amount", "grand_total", "total", "amount") or 0.0)
|
| 1218 |
-
ui["Currency"] =
|
| 1219 |
|
| 1220 |
items_rows = []
|
| 1221 |
|
|
@@ -1535,18 +1520,21 @@ elif len(st.session_state.batch_results) > 0:
|
|
| 1535 |
|
| 1536 |
# --------- Initialize widget state - ONLY IF NOT EXISTS (avoid overwriting user edits) ----------
|
| 1537 |
bank = form_data.get("Bank Details", {}) if isinstance(form_data.get("Bank Details", {}), dict) else {}
|
|
|
|
|
|
|
|
|
|
| 1538 |
|
| 1539 |
# Only initialize if key doesn't exist - this preserves user edits between reruns
|
| 1540 |
if f"Invoice Number_{selected_hash}" not in st.session_state:
|
| 1541 |
st.session_state[f"Invoice Number_{selected_hash}"] = form_data.get('Invoice Number', '')
|
| 1542 |
|
| 1543 |
-
# Parse dates to date objects for date_input widgets
|
| 1544 |
if f"Invoice Date_{selected_hash}" not in st.session_state:
|
| 1545 |
-
invoice_date_obj = parse_date_to_object(form_data.get('Invoice Date', ''))
|
| 1546 |
st.session_state[f"Invoice Date_{selected_hash}"] = invoice_date_obj
|
| 1547 |
|
| 1548 |
if f"Due Date_{selected_hash}" not in st.session_state:
|
| 1549 |
-
due_date_obj = parse_date_to_object(form_data.get('Due Date', ''))
|
| 1550 |
st.session_state[f"Due Date_{selected_hash}"] = due_date_obj
|
| 1551 |
|
| 1552 |
if f"Currency_{selected_hash}" not in st.session_state:
|
|
|
|
| 196 |
except ValueError:
|
| 197 |
return 0.0
|
| 198 |
|
| 199 |
+
def normalize_date(date_str, currency=None) -> str:
|
| 200 |
"""
|
| 201 |
Normalize various date formats:
|
| 202 |
- Full dates (day-month-year) → dd-MMM-yyyy (e.g., 01-Jan-2025)
|
| 203 |
- Month-year only → MMM-yyyy (e.g., Aug-2025)
|
| 204 |
+
|
| 205 |
+
Currency-aware parsing:
|
| 206 |
+
- If currency is USD and date is numeric format (11/09/2025, 11-09-2025),
|
| 207 |
+
treat as MM/DD/YYYY
|
| 208 |
+
- For text formats (06-Nov-2025, December 6, 2025), parse normally
|
| 209 |
+
|
| 210 |
Returns empty string if date cannot be parsed
|
| 211 |
"""
|
| 212 |
if not date_str or date_str == "":
|
|
|
|
| 216 |
date_str = date_str.strip()
|
| 217 |
if date_str == "":
|
| 218 |
return ""
|
| 219 |
+
|
| 220 |
+
# EXTRA CLEANING: Replace various unicode spaces and clean up
|
| 221 |
+
# Non-breaking space, thin space, etc. → regular space
|
| 222 |
+
date_str = re.sub(r'[\u00A0\u2000-\u200B\u202F\u205F\u3000]', ' ', date_str)
|
| 223 |
+
# Remove zero-width characters
|
| 224 |
+
date_str = re.sub(r'[\u200B-\u200D\uFEFF]', '', date_str)
|
| 225 |
+
# Normalize multiple spaces to single space
|
| 226 |
+
date_str = re.sub(r'\s+', ' ', date_str).strip()
|
| 227 |
+
|
| 228 |
+
# Clean ordinal suffixes FIRST (1st, 2nd, 3rd, 4th, 06th, etc.)
|
| 229 |
+
cleaned_date = date_str
|
| 230 |
+
if isinstance(date_str, str):
|
| 231 |
+
# Handle ordinals: "06th December 2025" → "06 December 2025"
|
| 232 |
+
# Also handles: "December 6th, 2025" → "December 6, 2025"
|
| 233 |
+
cleaned_date = re.sub(r'(\d+)(st|nd|rd|th)\b', r'\1', date_str, flags=re.IGNORECASE)
|
| 234 |
|
| 235 |
+
# Check if date is NUMERIC format (contains only digits and separators)
|
| 236 |
+
# Pattern: XX/XX/XXXX, XX-XX-XXXX, XX.XX.XXXX (with 2 or 4 digit year)
|
| 237 |
+
is_numeric_format = bool(re.match(r'^\d{1,2}[/\-\.]\d{1,2}[/\-\.]\d{2,4}$', cleaned_date))
|
| 238 |
+
|
| 239 |
+
# US FORMAT PRIORITY: If currency is USD and date is numeric, try MM/DD/YYYY first
|
| 240 |
+
if currency and currency.upper() == 'USD' and is_numeric_format:
|
| 241 |
+
us_formats = [
|
| 242 |
+
"%m/%d/%Y", # 01/15/2025
|
| 243 |
+
"%m-%d-%Y", # 01-15-2025
|
| 244 |
+
"%m.%d.%Y", # 01.15.2025
|
| 245 |
+
"%m/%d/%y", # 01/15/25
|
| 246 |
+
"%m-%d-%y", # 01-15-25
|
| 247 |
+
"%m.%d.%y", # 01.15.25
|
| 248 |
+
]
|
| 249 |
+
for fmt in us_formats:
|
| 250 |
+
try:
|
| 251 |
+
parsed_date = datetime.strptime(cleaned_date, fmt)
|
| 252 |
+
return parsed_date.strftime("%d-%b-%Y")
|
| 253 |
+
except (ValueError, TypeError):
|
| 254 |
+
continue
|
| 255 |
+
|
| 256 |
+
# FULL DATE FORMATS (day-month-year) - standard parsing
|
| 257 |
full_date_formats = [
|
| 258 |
+
# ISO formats (4-digit year) - these are unambiguous
|
| 259 |
"%Y-%m-%d", # 2025-01-15
|
| 260 |
"%Y/%m/%d", # 2025/01/15
|
| 261 |
"%Y.%m.%d", # 2025.01.15
|
| 262 |
"%Y %m %d", # 2025 01 15
|
| 263 |
"%Y%m%d", # 20250115 (compact)
|
| 264 |
|
| 265 |
+
# European formats with full month names (4-digit year) - UNAMBIGUOUS
|
| 266 |
+
"%d %B, %Y", # 15 December, 2025 (with comma)
|
| 267 |
+
"%d %b, %Y", # 15 Dec, 2025 (with comma)
|
| 268 |
"%d %B %Y", # 15 January 2025
|
| 269 |
"%d %b %Y", # 15 Jan 2025
|
| 270 |
"%d-%B-%Y", # 15-January-2025
|
|
|
|
| 274 |
"%d/%B/%Y", # 15/January/2025
|
| 275 |
"%d/%b/%Y", # 15/Jan/2025
|
| 276 |
|
| 277 |
+
# US formats with full month names (4-digit year) - UNAMBIGUOUS
|
| 278 |
"%B %d, %Y", # January 15, 2025
|
| 279 |
"%b %d, %Y", # Jan 15, 2025
|
| 280 |
"%B %d %Y", # January 15 2025
|
| 281 |
"%b %d %Y", # Jan 15 2025
|
| 282 |
"%B-%d-%Y", # January-15-2025
|
| 283 |
"%b-%d-%Y", # Jan-15-2025
|
| 284 |
+
"%B %d,%Y", # January 15,2025 (no space after comma)
|
| 285 |
+
"%b %d,%Y", # Jan 15,2025
|
| 286 |
|
| 287 |
# European formats - Day first (4-digit year)
|
| 288 |
"%d-%m-%Y", # 15-01-2025
|
|
|
|
| 290 |
"%d.%m.%Y", # 15.01.2025
|
| 291 |
"%d %m %Y", # 15 01 2025
|
| 292 |
|
| 293 |
+
# US formats - Month first (4-digit year) - only if not USD or not numeric
|
| 294 |
"%m-%d-%Y", # 01-15-2025
|
| 295 |
"%m/%d/%Y", # 01/15/2025
|
| 296 |
"%m.%d.%Y", # 01.15.2025
|
|
|
|
| 319 |
"%d%m%y", # 150125
|
| 320 |
"%m%d%y", # 011525
|
| 321 |
|
| 322 |
+
# European formats with abbreviated month (2-digit year) - UNAMBIGUOUS
|
| 323 |
+
"%d %B, %y", # 15 December, 25 (with comma)
|
| 324 |
+
"%d %b, %y", # 15 Dec, 25 (with comma)
|
| 325 |
"%d-%b-%y", # 15-Jan-25
|
| 326 |
"%d/%b/%y", # 15/Jan/25
|
| 327 |
"%d.%b.%y", # 15.Jan.25
|
|
|
|
| 329 |
"%d-%B-%y", # 15-January-25
|
| 330 |
"%d/%B/%y", # 15/January/25
|
| 331 |
|
| 332 |
+
# US formats with abbreviated month (2-digit year) - UNAMBIGUOUS
|
| 333 |
"%b %d, %y", # Jan 15, 25
|
| 334 |
"%b %d %y", # Jan 15 25
|
| 335 |
"%B %d, %y", # January 15, 25
|
|
|
|
| 343 |
"%Y%d%m", # 20251501
|
| 344 |
]
|
| 345 |
|
| 346 |
+
# Try full date formats → output as dd-MMM-yyyy
|
| 347 |
for fmt in full_date_formats:
|
| 348 |
try:
|
| 349 |
+
parsed_date = datetime.strptime(cleaned_date, fmt)
|
| 350 |
return parsed_date.strftime("%d-%b-%Y")
|
| 351 |
except (ValueError, TypeError):
|
| 352 |
continue
|
| 353 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 354 |
# MONTH-YEAR ONLY FORMATS - output as MMM-yyyy
|
| 355 |
month_year_formats = [
|
| 356 |
# Full month name with year
|
|
|
|
| 391 |
# Try month-year formats → output as MMM-yyyy (no day)
|
| 392 |
for fmt in month_year_formats:
|
| 393 |
try:
|
| 394 |
+
parsed_date = datetime.strptime(cleaned_date, fmt)
|
| 395 |
return parsed_date.strftime("%b-%Y") # Aug-2025 format
|
| 396 |
except (ValueError, TypeError):
|
| 397 |
continue
|
|
|
|
| 399 |
# If no format matched, return empty string
|
| 400 |
return ""
|
| 401 |
|
| 402 |
+
def parse_date_to_object(date_str, currency=None):
|
| 403 |
"""
|
| 404 |
Parse a date string to a datetime.date object for date_input widget
|
| 405 |
+
Currency-aware: If USD and numeric format, treat as MM/DD/YYYY
|
| 406 |
Returns None if date cannot be parsed
|
| 407 |
"""
|
| 408 |
if not date_str or date_str == "":
|
|
|
|
| 412 |
date_str = date_str.strip()
|
| 413 |
if date_str == "":
|
| 414 |
return None
|
| 415 |
+
|
| 416 |
+
# EXTRA CLEANING: Replace various unicode spaces and clean up
|
| 417 |
+
date_str = re.sub(r'[\u00A0\u2000-\u200B\u202F\u205F\u3000]', ' ', date_str)
|
| 418 |
+
date_str = re.sub(r'[\u200B-\u200D\uFEFF]', '', date_str)
|
| 419 |
+
date_str = re.sub(r'\s+', ' ', date_str).strip()
|
| 420 |
+
|
| 421 |
+
# Clean ordinal suffixes FIRST (1st, 2nd, 3rd, 4th, 06th, etc.)
|
| 422 |
+
cleaned_date = str(date_str)
|
| 423 |
+
if isinstance(date_str, str):
|
| 424 |
+
cleaned_date = re.sub(r'(\d+)(st|nd|rd|th)\b', r'\1', date_str, flags=re.IGNORECASE)
|
| 425 |
|
| 426 |
+
# Check if date is NUMERIC format (contains only digits and separators)
|
| 427 |
+
is_numeric_format = bool(re.match(r'^\d{1,2}[/\-\.]\d{1,2}[/\-\.]\d{2,4}$', cleaned_date))
|
| 428 |
+
|
| 429 |
+
# US FORMAT PRIORITY: If currency is USD and date is numeric, try MM/DD/YYYY first
|
| 430 |
+
if currency and currency.upper() == 'USD' and is_numeric_format:
|
| 431 |
+
us_formats = [
|
| 432 |
+
"%m/%d/%Y", # 01/15/2025
|
| 433 |
+
"%m-%d-%Y", # 01-15-2025
|
| 434 |
+
"%m.%d.%Y", # 01.15.2025
|
| 435 |
+
"%m/%d/%y", # 01/15/25
|
| 436 |
+
"%m-%d-%y", # 01-15-25
|
| 437 |
+
"%m.%d.%y", # 01.15.25
|
| 438 |
+
]
|
| 439 |
+
for fmt in us_formats:
|
| 440 |
+
try:
|
| 441 |
+
parsed_date = datetime.strptime(cleaned_date, fmt)
|
| 442 |
+
return parsed_date.date()
|
| 443 |
+
except (ValueError, TypeError):
|
| 444 |
+
continue
|
| 445 |
+
|
| 446 |
+
# Standard formats
|
| 447 |
formats = [
|
| 448 |
# ISO formats (4-digit year)
|
| 449 |
+
"%Y-%m-%d", "%Y/%m/%d", "%Y.%m.%d", "%Y %m %d", "%Y%m%d",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 450 |
|
| 451 |
+
# Text month formats with comma - MUST BE FIRST for "06 December, 2025"
|
| 452 |
+
"%d %B, %Y", "%d %b, %Y", # 06 December, 2025 / 06 Dec, 2025
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 453 |
|
| 454 |
+
# Text month formats - UNAMBIGUOUS
|
| 455 |
+
"%d %B %Y", "%d %b %Y", "%d-%B-%Y", "%d-%b-%Y",
|
| 456 |
+
"%d.%B.%Y", "%d.%b.%Y", "%d/%B/%Y", "%d/%b/%Y",
|
| 457 |
+
"%B %d, %Y", "%b %d, %Y", "%B %d %Y", "%b %d %Y",
|
| 458 |
+
"%B-%d-%Y", "%b-%d-%Y", "%B %d,%Y", "%b %d,%Y",
|
|
|
|
|
|
|
| 459 |
|
| 460 |
+
# European formats - Day first
|
| 461 |
+
"%d-%m-%Y", "%d/%m/%Y", "%d.%m.%Y", "%d %m %Y",
|
| 462 |
+
"%d-%m-%y", "%d/%m/%y", "%d.%m.%y", "%d %m %y",
|
|
|
|
|
|
|
| 463 |
|
| 464 |
+
# US formats - Month first
|
| 465 |
+
"%m-%d-%Y", "%m/%d/%Y", "%m.%d.%Y", "%m %d %Y",
|
| 466 |
+
"%m-%d-%y", "%m/%d/%y", "%m.%d.%y", "%m %d %y",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 467 |
|
| 468 |
# ISO with 2-digit year
|
| 469 |
+
"%y-%m-%d", "%y/%m/%d", "%y.%m.%d", "%y %m %d",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 470 |
|
| 471 |
+
# Compact formats
|
| 472 |
+
"%y%m%d", "%d%m%y", "%m%d%y", "%d%m%Y", "%m%d%Y", "%Y%d%m",
|
|
|
|
|
|
|
| 473 |
|
| 474 |
+
# Text month with 2-digit year (with comma)
|
| 475 |
+
"%d %B, %y", "%d %b, %y", # 06 December, 25 / 06 Dec, 25
|
| 476 |
+
"%d-%b-%y", "%d/%b/%y", "%d.%b.%y", "%d %b %y",
|
| 477 |
+
"%d-%B-%y", "%d/%B/%y",
|
| 478 |
+
"%b %d, %y", "%b %d %y", "%B %d, %y", "%B %d %y",
|
| 479 |
+
"%b-%d-%y", "%B-%d-%y",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 480 |
|
| 481 |
+
# Month-year only
|
| 482 |
+
"%B %Y", "%b %Y", "%B, %Y", "%b, %Y",
|
| 483 |
+
"%B-%Y", "%b-%Y", "%B/%Y", "%b/%Y",
|
| 484 |
+
"%m/%Y", "%m-%Y", "%m.%Y", "%m %Y",
|
| 485 |
+
"%Y-%m", "%Y/%m", "%Y.%m", "%Y %m",
|
| 486 |
+
"%m/%y", "%m-%y", "%m.%y", "%m %y",
|
| 487 |
+
"%y-%m", "%y/%m",
|
| 488 |
+
"%B %y", "%b %y", "%B-%y", "%b-%y",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 489 |
]
|
| 490 |
|
|
|
|
| 491 |
for fmt in formats:
|
| 492 |
try:
|
| 493 |
+
parsed_date = datetime.strptime(cleaned_date, fmt)
|
| 494 |
return parsed_date.date()
|
| 495 |
except (ValueError, TypeError):
|
| 496 |
continue
|
| 497 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 498 |
return None
|
| 499 |
|
| 500 |
|
|
|
|
| 741 |
header = data.get("header", {})
|
| 742 |
summary = data.get("summary", {})
|
| 743 |
items = data.get("items", [])
|
| 744 |
+
|
| 745 |
+
# Get currency first for date parsing (USD uses MM/DD/YYYY for numeric dates)
|
| 746 |
+
currency = summary.get("currency", "")
|
| 747 |
|
| 748 |
result = {
|
| 749 |
"Invoice Number": header.get("invoice_no", ""),
|
| 750 |
+
"Invoice Date": normalize_date(header.get("invoice_date", ""), currency),
|
| 751 |
+
"Due Date": normalize_date(header.get("due_date", ""), currency),
|
| 752 |
"Sender Name": header.get("sender_name", ""),
|
| 753 |
"Sender Address": header.get("sender_addr", ""),
|
| 754 |
"Sender": {
|
|
|
|
| 774 |
"Tax Percentage": clean_amount(summary.get("tax_rate", "0")),
|
| 775 |
"Total Tax": clean_amount(summary.get("tax_amount", "0")),
|
| 776 |
"Total Amount": clean_amount(summary.get("total_amount", "0")),
|
| 777 |
+
"Currency": currency,
|
| 778 |
"Itemized Data": []
|
| 779 |
}
|
| 780 |
|
|
|
|
| 1175 |
return s
|
| 1176 |
return None
|
| 1177 |
|
| 1178 |
+
# Get currency first for date parsing (USD uses MM/DD/YYYY for numeric dates)
|
| 1179 |
+
currency = (pick_first("currency") or "").strip()
|
| 1180 |
+
|
| 1181 |
ui["Invoice Number"] = pick_first("invoice_no", "invoice_number", "invoiceid", "invoice id") or ""
|
| 1182 |
+
ui["Invoice Date"] = normalize_date(pick_first("invoice_date", "date", "invoice date") or "", currency)
|
| 1183 |
+
ui["Due Date"] = normalize_date(pick_first("due_date", "due_date", "due") or "", currency)
|
| 1184 |
ui["Sender Name"] = pick_first("sender_name", "sender") or ""
|
| 1185 |
ui["Sender Address"] = pick_first("sender_addr", "sender_address", "sender addr") or ""
|
| 1186 |
ui["Recipient Name"] = pick_first("rcpt_name", "recipient_name", "recipient", "rcpt") or ""
|
|
|
|
| 1200 |
ui["Tax Percentage"] = clean_number(pick_first("tax_rate", "tax_percentage", "tax pct", "tax percentage") or 0.0)
|
| 1201 |
ui["Total Tax"] = clean_number(pick_first("tax_amount", "tax", "total_tax") or 0.0)
|
| 1202 |
ui["Total Amount"] = clean_number(pick_first("total_amount", "grand_total", "total", "amount") or 0.0)
|
| 1203 |
+
ui["Currency"] = currency
|
| 1204 |
|
| 1205 |
items_rows = []
|
| 1206 |
|
|
|
|
| 1520 |
|
| 1521 |
# --------- Initialize widget state - ONLY IF NOT EXISTS (avoid overwriting user edits) ----------
|
| 1522 |
bank = form_data.get("Bank Details", {}) if isinstance(form_data.get("Bank Details", {}), dict) else {}
|
| 1523 |
+
|
| 1524 |
+
# Get currency for date parsing (USD uses MM/DD/YYYY for numeric dates)
|
| 1525 |
+
form_currency = form_data.get('Currency', '')
|
| 1526 |
|
| 1527 |
# Only initialize if key doesn't exist - this preserves user edits between reruns
|
| 1528 |
if f"Invoice Number_{selected_hash}" not in st.session_state:
|
| 1529 |
st.session_state[f"Invoice Number_{selected_hash}"] = form_data.get('Invoice Number', '')
|
| 1530 |
|
| 1531 |
+
# Parse dates to date objects for date_input widgets (pass currency for US date handling)
|
| 1532 |
if f"Invoice Date_{selected_hash}" not in st.session_state:
|
| 1533 |
+
invoice_date_obj = parse_date_to_object(form_data.get('Invoice Date', ''), form_currency)
|
| 1534 |
st.session_state[f"Invoice Date_{selected_hash}"] = invoice_date_obj
|
| 1535 |
|
| 1536 |
if f"Due Date_{selected_hash}" not in st.session_state:
|
| 1537 |
+
due_date_obj = parse_date_to_object(form_data.get('Due Date', ''), form_currency)
|
| 1538 |
st.session_state[f"Due Date_{selected_hash}"] = due_date_obj
|
| 1539 |
|
| 1540 |
if f"Currency_{selected_hash}" not in st.session_state:
|