Ankushbl6 commited on
Commit
3832c5b
·
verified ·
1 Parent(s): efa6392

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +142 -154
src/streamlit_app.py CHANGED
@@ -196,11 +196,17 @@ def clean_float(x) -> float:
196
  except ValueError:
197
  return 0.0
198
 
199
- def normalize_date(date_str) -> str:
200
  """
201
  Normalize various date formats:
202
  - Full dates (day-month-year) → dd-MMM-yyyy (e.g., 01-Jan-2025)
203
  - Month-year only → MMM-yyyy (e.g., Aug-2025)
 
 
 
 
 
 
204
  Returns empty string if date cannot be parsed
205
  """
206
  if not date_str or date_str == "":
@@ -210,17 +216,55 @@ def normalize_date(date_str) -> str:
210
  date_str = date_str.strip()
211
  if date_str == "":
212
  return ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
 
214
- # FULL DATE FORMATS (day-month-year) - try these first
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
  full_date_formats = [
216
- # ISO formats (4-digit year)
217
  "%Y-%m-%d", # 2025-01-15
218
  "%Y/%m/%d", # 2025/01/15
219
  "%Y.%m.%d", # 2025.01.15
220
  "%Y %m %d", # 2025 01 15
221
  "%Y%m%d", # 20250115 (compact)
222
 
223
- # European formats with full month names (4-digit year)
 
 
224
  "%d %B %Y", # 15 January 2025
225
  "%d %b %Y", # 15 Jan 2025
226
  "%d-%B-%Y", # 15-January-2025
@@ -230,13 +274,15 @@ def normalize_date(date_str) -> str:
230
  "%d/%B/%Y", # 15/January/2025
231
  "%d/%b/%Y", # 15/Jan/2025
232
 
233
- # US formats with full month names (4-digit year)
234
  "%B %d, %Y", # January 15, 2025
235
  "%b %d, %Y", # Jan 15, 2025
236
  "%B %d %Y", # January 15 2025
237
  "%b %d %Y", # Jan 15 2025
238
  "%B-%d-%Y", # January-15-2025
239
  "%b-%d-%Y", # Jan-15-2025
 
 
240
 
241
  # European formats - Day first (4-digit year)
242
  "%d-%m-%Y", # 15-01-2025
@@ -244,7 +290,7 @@ def normalize_date(date_str) -> str:
244
  "%d.%m.%Y", # 15.01.2025
245
  "%d %m %Y", # 15 01 2025
246
 
247
- # US formats - Month first (4-digit year)
248
  "%m-%d-%Y", # 01-15-2025
249
  "%m/%d/%Y", # 01/15/2025
250
  "%m.%d.%Y", # 01.15.2025
@@ -273,7 +319,9 @@ def normalize_date(date_str) -> str:
273
  "%d%m%y", # 150125
274
  "%m%d%y", # 011525
275
 
276
- # European formats with abbreviated month (2-digit year)
 
 
277
  "%d-%b-%y", # 15-Jan-25
278
  "%d/%b/%y", # 15/Jan/25
279
  "%d.%b.%y", # 15.Jan.25
@@ -281,7 +329,7 @@ def normalize_date(date_str) -> str:
281
  "%d-%B-%y", # 15-January-25
282
  "%d/%B/%y", # 15/January/25
283
 
284
- # US formats with abbreviated month (2-digit year)
285
  "%b %d, %y", # Jan 15, 25
286
  "%b %d %y", # Jan 15 25
287
  "%B %d, %y", # January 15, 25
@@ -295,25 +343,14 @@ def normalize_date(date_str) -> str:
295
  "%Y%d%m", # 20251501
296
  ]
297
 
298
- # Try full date formats first → output as dd-MMM-yyyy
299
  for fmt in full_date_formats:
300
  try:
301
- parsed_date = datetime.strptime(str(date_str), fmt)
302
  return parsed_date.strftime("%d-%b-%Y")
303
  except (ValueError, TypeError):
304
  continue
305
 
306
- # Try with ordinal suffixes removed (1st, 2nd, 3rd, etc.)
307
- if isinstance(date_str, str):
308
- cleaned = re.sub(r'(\d+)(st|nd|rd|th)\b', r'\1', date_str, flags=re.IGNORECASE)
309
- if cleaned != date_str:
310
- for fmt in full_date_formats:
311
- try:
312
- parsed_date = datetime.strptime(cleaned, fmt)
313
- return parsed_date.strftime("%d-%b-%Y")
314
- except (ValueError, TypeError):
315
- continue
316
-
317
  # MONTH-YEAR ONLY FORMATS - output as MMM-yyyy
318
  month_year_formats = [
319
  # Full month name with year
@@ -354,7 +391,7 @@ def normalize_date(date_str) -> str:
354
  # Try month-year formats → output as MMM-yyyy (no day)
355
  for fmt in month_year_formats:
356
  try:
357
- parsed_date = datetime.strptime(str(date_str), fmt)
358
  return parsed_date.strftime("%b-%Y") # Aug-2025 format
359
  except (ValueError, TypeError):
360
  continue
@@ -362,10 +399,10 @@ def normalize_date(date_str) -> str:
362
  # If no format matched, return empty string
363
  return ""
364
 
365
- def parse_date_to_object(date_str):
366
  """
367
  Parse a date string to a datetime.date object for date_input widget
368
- Handles: ISO, US, EU, Asian, two-digit years, and 50+ worldwide date formats
369
  Returns None if date cannot be parsed
370
  """
371
  if not date_str or date_str == "":
@@ -375,147 +412,89 @@ def parse_date_to_object(date_str):
375
  date_str = date_str.strip()
376
  if date_str == "":
377
  return None
 
 
 
 
 
 
 
 
 
 
378
 
379
- # Comprehensive list of date formats to try (same as normalize_date)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
380
  formats = [
381
  # ISO formats (4-digit year)
382
- "%Y-%m-%d", # 2025-01-15
383
- "%Y/%m/%d", # 2025/01/15
384
- "%Y.%m.%d", # 2025.01.15
385
- "%Y %m %d", # 2025 01 15
386
- "%Y%m%d", # 20250115 (compact)
387
 
388
- # European formats with full month names (4-digit year)
389
- "%d %B %Y", # 15 January 2025
390
- "%d %b %Y", # 15 Jan 2025
391
- "%d-%B-%Y", # 15-January-2025
392
- "%d-%b-%Y", # 15-Jan-2025
393
- "%d.%B.%Y", # 15.January.2025
394
- "%d.%b.%Y", # 15.Jan.2025
395
- "%d/%B/%Y", # 15/January/2025
396
- "%d/%b/%Y", # 15/Jan/2025
397
 
398
- # US formats with full month names (4-digit year)
399
- "%B %d, %Y", # January 15, 2025
400
- "%b %d, %Y", # Jan 15, 2025
401
- "%B %d %Y", # January 15 2025
402
- "%b %d %Y", # Jan 15 2025
403
- "%B-%d-%Y", # January-15-2025
404
- "%b-%d-%Y", # Jan-15-2025
405
 
406
- # European formats - Day first (4-digit year)
407
- "%d-%m-%Y", # 15-01-2025
408
- "%d/%m/%Y", # 15/01/2025
409
- "%d.%m.%Y", # 15.01.2025
410
- "%d %m %Y", # 15 01 2025
411
 
412
- # US formats - Month first (4-digit year)
413
- "%m-%d-%Y", # 01-15-2025
414
- "%m/%d/%Y", # 01/15/2025
415
- "%m.%d.%Y", # 01.15.2025
416
- "%m %d %Y", # 01 15 2025
417
-
418
- # European formats with 2-digit year - Day first
419
- "%d-%m-%y", # 15-01-25
420
- "%d/%m/%y", # 15/01/25 or 25/09/25 ← FIXES YOUR ISSUE!
421
- "%d.%m.%y", # 15.01.25
422
- "%d %m %y", # 15 01 25
423
-
424
- # US formats with 2-digit year - Month first
425
- "%m-%d-%y", # 01-15-25
426
- "%m/%d/%y", # 01/15/25
427
- "%m.%d.%y", # 01.15.25
428
- "%m %d %y", # 01 15 25
429
 
430
  # ISO with 2-digit year
431
- "%y-%m-%d", # 25-01-15
432
- "%y/%m/%d", # 25/01/15
433
- "%y.%m.%d", # 25.01.15
434
- "%y %m %d", # 25 01 15
435
-
436
- # Compact formats with 2-digit year
437
- "%y%m%d", # 250115
438
- "%d%m%y", # 150125
439
- "%m%d%y", # 011525
440
-
441
- # European formats with abbreviated month (2-digit year)
442
- "%d-%b-%y", # 15-Jan-25
443
- "%d/%b/%y", # 15/Jan/25
444
- "%d.%b.%y", # 15.Jan.25
445
- "%d %b %y", # 15 Jan 25
446
- "%d-%B-%y", # 15-January-25
447
- "%d/%B/%y", # 15/January/25
448
-
449
- # US formats with abbreviated month (2-digit year)
450
- "%b %d, %y", # Jan 15, 25
451
- "%b %d %y", # Jan 15 25
452
- "%B %d, %y", # January 15, 25
453
- "%B %d %y", # January 15 25
454
- "%b-%d-%y", # Jan-15-25
455
- "%B-%d-%y", # January-15-25
456
 
457
- # Compact 8-digit formats
458
- "%d%m%Y", # 15012025
459
- "%m%d%Y", # 01152025
460
- "%Y%d%m", # 20251501
461
 
462
- # ========== MONTH-YEAR ONLY FORMATS (defaults to 1st of month) ==========
463
- # Full month name with year
464
- "%B %Y", # August 2025
465
- "%b %Y", # Aug 2025
466
- "%B, %Y", # August, 2025
467
- "%b, %Y", # Aug, 2025
468
- "%B-%Y", # August-2025
469
- "%b-%Y", # Aug-2025
470
- "%B/%Y", # August/2025
471
- "%b/%Y", # Aug/2025
472
 
473
- # Numeric month-year (4-digit year)
474
- "%m/%Y", # 08/2025
475
- "%m-%Y", # 08-2025
476
- "%m.%Y", # 08.2025
477
- "%m %Y", # 08 2025
478
- "%Y-%m", # 2025-08
479
- "%Y/%m", # 2025/08
480
- "%Y.%m", # 2025.08
481
- "%Y %m", # 2025 08
482
-
483
- # Numeric month-year (2-digit year)
484
- "%m/%y", # 08/25
485
- "%m-%y", # 08-25
486
- "%m.%y", # 08.25
487
- "%m %y", # 08 25
488
- "%y-%m", # 25-08
489
- "%y/%m", # 25/08
490
-
491
- # Full month name with 2-digit year
492
- "%B %y", # August 25
493
- "%b %y", # Aug 25
494
- "%B-%y", # August-25
495
- "%b-%y", # Aug-25
496
  ]
497
 
498
- # Try parsing with each format
499
  for fmt in formats:
500
  try:
501
- parsed_date = datetime.strptime(str(date_str), fmt)
502
  return parsed_date.date()
503
  except (ValueError, TypeError):
504
  continue
505
 
506
- # If still not parsed, try removing ordinal suffixes
507
- if isinstance(date_str, str):
508
- import re
509
- cleaned = re.sub(r'(\d+)(st|nd|rd|th)\b', r'\1', date_str, flags=re.IGNORECASE)
510
-
511
- if cleaned != date_str:
512
- for fmt in formats:
513
- try:
514
- parsed_date = datetime.strptime(cleaned, fmt)
515
- return parsed_date.date()
516
- except (ValueError, TypeError):
517
- continue
518
-
519
  return None
520
 
521
 
@@ -762,11 +741,14 @@ def parse_vllm_json(raw_json_text):
762
  header = data.get("header", {})
763
  summary = data.get("summary", {})
764
  items = data.get("items", [])
 
 
 
765
 
766
  result = {
767
  "Invoice Number": header.get("invoice_no", ""),
768
- "Invoice Date": normalize_date(header.get("invoice_date", "")),
769
- "Due Date": normalize_date(header.get("due_date", "")),
770
  "Sender Name": header.get("sender_name", ""),
771
  "Sender Address": header.get("sender_addr", ""),
772
  "Sender": {
@@ -792,7 +774,7 @@ def parse_vllm_json(raw_json_text):
792
  "Tax Percentage": clean_amount(summary.get("tax_rate", "0")),
793
  "Total Tax": clean_amount(summary.get("tax_amount", "0")),
794
  "Total Amount": clean_amount(summary.get("total_amount", "0")),
795
- "Currency": summary.get("currency", ""),
796
  "Itemized Data": []
797
  }
798
 
@@ -1193,9 +1175,12 @@ def map_prediction_to_ui(pred):
1193
  return s
1194
  return None
1195
 
 
 
 
1196
  ui["Invoice Number"] = pick_first("invoice_no", "invoice_number", "invoiceid", "invoice id") or ""
1197
- ui["Invoice Date"] = normalize_date(pick_first("invoice_date", "date", "invoice date") or "")
1198
- ui["Due Date"] = normalize_date(pick_first("due_date", "due_date", "due") or "")
1199
  ui["Sender Name"] = pick_first("sender_name", "sender") or ""
1200
  ui["Sender Address"] = pick_first("sender_addr", "sender_address", "sender addr") or ""
1201
  ui["Recipient Name"] = pick_first("rcpt_name", "recipient_name", "recipient", "rcpt") or ""
@@ -1215,7 +1200,7 @@ def map_prediction_to_ui(pred):
1215
  ui["Tax Percentage"] = clean_number(pick_first("tax_rate", "tax_percentage", "tax pct", "tax percentage") or 0.0)
1216
  ui["Total Tax"] = clean_number(pick_first("tax_amount", "tax", "total_tax") or 0.0)
1217
  ui["Total Amount"] = clean_number(pick_first("total_amount", "grand_total", "total", "amount") or 0.0)
1218
- ui["Currency"] = (pick_first("currency") or "").strip()
1219
 
1220
  items_rows = []
1221
 
@@ -1535,18 +1520,21 @@ elif len(st.session_state.batch_results) > 0:
1535
 
1536
  # --------- Initialize widget state - ONLY IF NOT EXISTS (avoid overwriting user edits) ----------
1537
  bank = form_data.get("Bank Details", {}) if isinstance(form_data.get("Bank Details", {}), dict) else {}
 
 
 
1538
 
1539
  # Only initialize if key doesn't exist - this preserves user edits between reruns
1540
  if f"Invoice Number_{selected_hash}" not in st.session_state:
1541
  st.session_state[f"Invoice Number_{selected_hash}"] = form_data.get('Invoice Number', '')
1542
 
1543
- # Parse dates to date objects for date_input widgets
1544
  if f"Invoice Date_{selected_hash}" not in st.session_state:
1545
- invoice_date_obj = parse_date_to_object(form_data.get('Invoice Date', ''))
1546
  st.session_state[f"Invoice Date_{selected_hash}"] = invoice_date_obj
1547
 
1548
  if f"Due Date_{selected_hash}" not in st.session_state:
1549
- due_date_obj = parse_date_to_object(form_data.get('Due Date', ''))
1550
  st.session_state[f"Due Date_{selected_hash}"] = due_date_obj
1551
 
1552
  if f"Currency_{selected_hash}" not in st.session_state:
 
196
  except ValueError:
197
  return 0.0
198
 
199
+ def normalize_date(date_str, currency=None) -> str:
200
  """
201
  Normalize various date formats:
202
  - Full dates (day-month-year) → dd-MMM-yyyy (e.g., 01-Jan-2025)
203
  - Month-year only → MMM-yyyy (e.g., Aug-2025)
204
+
205
+ Currency-aware parsing:
206
+ - If currency is USD and date is numeric format (11/09/2025, 11-09-2025),
207
+ treat as MM/DD/YYYY
208
+ - For text formats (06-Nov-2025, December 6, 2025), parse normally
209
+
210
  Returns empty string if date cannot be parsed
211
  """
212
  if not date_str or date_str == "":
 
216
  date_str = date_str.strip()
217
  if date_str == "":
218
  return ""
219
+
220
+ # EXTRA CLEANING: Replace various unicode spaces and clean up
221
+ # Non-breaking space, thin space, etc. → regular space
222
+ date_str = re.sub(r'[\u00A0\u2000-\u200B\u202F\u205F\u3000]', ' ', date_str)
223
+ # Remove zero-width characters
224
+ date_str = re.sub(r'[\u200B-\u200D\uFEFF]', '', date_str)
225
+ # Normalize multiple spaces to single space
226
+ date_str = re.sub(r'\s+', ' ', date_str).strip()
227
+
228
+ # Clean ordinal suffixes FIRST (1st, 2nd, 3rd, 4th, 06th, etc.)
229
+ cleaned_date = date_str
230
+ if isinstance(date_str, str):
231
+ # Handle ordinals: "06th December 2025" → "06 December 2025"
232
+ # Also handles: "December 6th, 2025" → "December 6, 2025"
233
+ cleaned_date = re.sub(r'(\d+)(st|nd|rd|th)\b', r'\1', date_str, flags=re.IGNORECASE)
234
 
235
+ # Check if date is NUMERIC format (contains only digits and separators)
236
+ # Pattern: XX/XX/XXXX, XX-XX-XXXX, XX.XX.XXXX (with 2 or 4 digit year)
237
+ is_numeric_format = bool(re.match(r'^\d{1,2}[/\-\.]\d{1,2}[/\-\.]\d{2,4}$', cleaned_date))
238
+
239
+ # US FORMAT PRIORITY: If currency is USD and date is numeric, try MM/DD/YYYY first
240
+ if currency and currency.upper() == 'USD' and is_numeric_format:
241
+ us_formats = [
242
+ "%m/%d/%Y", # 01/15/2025
243
+ "%m-%d-%Y", # 01-15-2025
244
+ "%m.%d.%Y", # 01.15.2025
245
+ "%m/%d/%y", # 01/15/25
246
+ "%m-%d-%y", # 01-15-25
247
+ "%m.%d.%y", # 01.15.25
248
+ ]
249
+ for fmt in us_formats:
250
+ try:
251
+ parsed_date = datetime.strptime(cleaned_date, fmt)
252
+ return parsed_date.strftime("%d-%b-%Y")
253
+ except (ValueError, TypeError):
254
+ continue
255
+
256
+ # FULL DATE FORMATS (day-month-year) - standard parsing
257
  full_date_formats = [
258
+ # ISO formats (4-digit year) - these are unambiguous
259
  "%Y-%m-%d", # 2025-01-15
260
  "%Y/%m/%d", # 2025/01/15
261
  "%Y.%m.%d", # 2025.01.15
262
  "%Y %m %d", # 2025 01 15
263
  "%Y%m%d", # 20250115 (compact)
264
 
265
+ # European formats with full month names (4-digit year) - UNAMBIGUOUS
266
+ "%d %B, %Y", # 15 December, 2025 (with comma)
267
+ "%d %b, %Y", # 15 Dec, 2025 (with comma)
268
  "%d %B %Y", # 15 January 2025
269
  "%d %b %Y", # 15 Jan 2025
270
  "%d-%B-%Y", # 15-January-2025
 
274
  "%d/%B/%Y", # 15/January/2025
275
  "%d/%b/%Y", # 15/Jan/2025
276
 
277
+ # US formats with full month names (4-digit year) - UNAMBIGUOUS
278
  "%B %d, %Y", # January 15, 2025
279
  "%b %d, %Y", # Jan 15, 2025
280
  "%B %d %Y", # January 15 2025
281
  "%b %d %Y", # Jan 15 2025
282
  "%B-%d-%Y", # January-15-2025
283
  "%b-%d-%Y", # Jan-15-2025
284
+ "%B %d,%Y", # January 15,2025 (no space after comma)
285
+ "%b %d,%Y", # Jan 15,2025
286
 
287
  # European formats - Day first (4-digit year)
288
  "%d-%m-%Y", # 15-01-2025
 
290
  "%d.%m.%Y", # 15.01.2025
291
  "%d %m %Y", # 15 01 2025
292
 
293
+ # US formats - Month first (4-digit year) - only if not USD or not numeric
294
  "%m-%d-%Y", # 01-15-2025
295
  "%m/%d/%Y", # 01/15/2025
296
  "%m.%d.%Y", # 01.15.2025
 
319
  "%d%m%y", # 150125
320
  "%m%d%y", # 011525
321
 
322
+ # European formats with abbreviated month (2-digit year) - UNAMBIGUOUS
323
+ "%d %B, %y", # 15 December, 25 (with comma)
324
+ "%d %b, %y", # 15 Dec, 25 (with comma)
325
  "%d-%b-%y", # 15-Jan-25
326
  "%d/%b/%y", # 15/Jan/25
327
  "%d.%b.%y", # 15.Jan.25
 
329
  "%d-%B-%y", # 15-January-25
330
  "%d/%B/%y", # 15/January/25
331
 
332
+ # US formats with abbreviated month (2-digit year) - UNAMBIGUOUS
333
  "%b %d, %y", # Jan 15, 25
334
  "%b %d %y", # Jan 15 25
335
  "%B %d, %y", # January 15, 25
 
343
  "%Y%d%m", # 20251501
344
  ]
345
 
346
+ # Try full date formats → output as dd-MMM-yyyy
347
  for fmt in full_date_formats:
348
  try:
349
+ parsed_date = datetime.strptime(cleaned_date, fmt)
350
  return parsed_date.strftime("%d-%b-%Y")
351
  except (ValueError, TypeError):
352
  continue
353
 
 
 
 
 
 
 
 
 
 
 
 
354
  # MONTH-YEAR ONLY FORMATS - output as MMM-yyyy
355
  month_year_formats = [
356
  # Full month name with year
 
391
  # Try month-year formats → output as MMM-yyyy (no day)
392
  for fmt in month_year_formats:
393
  try:
394
+ parsed_date = datetime.strptime(cleaned_date, fmt)
395
  return parsed_date.strftime("%b-%Y") # Aug-2025 format
396
  except (ValueError, TypeError):
397
  continue
 
399
  # If no format matched, return empty string
400
  return ""
401
 
402
+ def parse_date_to_object(date_str, currency=None):
403
  """
404
  Parse a date string to a datetime.date object for date_input widget
405
+ Currency-aware: If USD and numeric format, treat as MM/DD/YYYY
406
  Returns None if date cannot be parsed
407
  """
408
  if not date_str or date_str == "":
 
412
  date_str = date_str.strip()
413
  if date_str == "":
414
  return None
415
+
416
+ # EXTRA CLEANING: Replace various unicode spaces and clean up
417
+ date_str = re.sub(r'[\u00A0\u2000-\u200B\u202F\u205F\u3000]', ' ', date_str)
418
+ date_str = re.sub(r'[\u200B-\u200D\uFEFF]', '', date_str)
419
+ date_str = re.sub(r'\s+', ' ', date_str).strip()
420
+
421
+ # Clean ordinal suffixes FIRST (1st, 2nd, 3rd, 4th, 06th, etc.)
422
+ cleaned_date = str(date_str)
423
+ if isinstance(date_str, str):
424
+ cleaned_date = re.sub(r'(\d+)(st|nd|rd|th)\b', r'\1', date_str, flags=re.IGNORECASE)
425
 
426
+ # Check if date is NUMERIC format (contains only digits and separators)
427
+ is_numeric_format = bool(re.match(r'^\d{1,2}[/\-\.]\d{1,2}[/\-\.]\d{2,4}$', cleaned_date))
428
+
429
+ # US FORMAT PRIORITY: If currency is USD and date is numeric, try MM/DD/YYYY first
430
+ if currency and currency.upper() == 'USD' and is_numeric_format:
431
+ us_formats = [
432
+ "%m/%d/%Y", # 01/15/2025
433
+ "%m-%d-%Y", # 01-15-2025
434
+ "%m.%d.%Y", # 01.15.2025
435
+ "%m/%d/%y", # 01/15/25
436
+ "%m-%d-%y", # 01-15-25
437
+ "%m.%d.%y", # 01.15.25
438
+ ]
439
+ for fmt in us_formats:
440
+ try:
441
+ parsed_date = datetime.strptime(cleaned_date, fmt)
442
+ return parsed_date.date()
443
+ except (ValueError, TypeError):
444
+ continue
445
+
446
+ # Standard formats
447
  formats = [
448
  # ISO formats (4-digit year)
449
+ "%Y-%m-%d", "%Y/%m/%d", "%Y.%m.%d", "%Y %m %d", "%Y%m%d",
 
 
 
 
450
 
451
+ # Text month formats with comma - MUST BE FIRST for "06 December, 2025"
452
+ "%d %B, %Y", "%d %b, %Y", # 06 December, 2025 / 06 Dec, 2025
 
 
 
 
 
 
 
453
 
454
+ # Text month formats - UNAMBIGUOUS
455
+ "%d %B %Y", "%d %b %Y", "%d-%B-%Y", "%d-%b-%Y",
456
+ "%d.%B.%Y", "%d.%b.%Y", "%d/%B/%Y", "%d/%b/%Y",
457
+ "%B %d, %Y", "%b %d, %Y", "%B %d %Y", "%b %d %Y",
458
+ "%B-%d-%Y", "%b-%d-%Y", "%B %d,%Y", "%b %d,%Y",
 
 
459
 
460
+ # European formats - Day first
461
+ "%d-%m-%Y", "%d/%m/%Y", "%d.%m.%Y", "%d %m %Y",
462
+ "%d-%m-%y", "%d/%m/%y", "%d.%m.%y", "%d %m %y",
 
 
463
 
464
+ # US formats - Month first
465
+ "%m-%d-%Y", "%m/%d/%Y", "%m.%d.%Y", "%m %d %Y",
466
+ "%m-%d-%y", "%m/%d/%y", "%m.%d.%y", "%m %d %y",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
467
 
468
  # ISO with 2-digit year
469
+ "%y-%m-%d", "%y/%m/%d", "%y.%m.%d", "%y %m %d",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
470
 
471
+ # Compact formats
472
+ "%y%m%d", "%d%m%y", "%m%d%y", "%d%m%Y", "%m%d%Y", "%Y%d%m",
 
 
473
 
474
+ # Text month with 2-digit year (with comma)
475
+ "%d %B, %y", "%d %b, %y", # 06 December, 25 / 06 Dec, 25
476
+ "%d-%b-%y", "%d/%b/%y", "%d.%b.%y", "%d %b %y",
477
+ "%d-%B-%y", "%d/%B/%y",
478
+ "%b %d, %y", "%b %d %y", "%B %d, %y", "%B %d %y",
479
+ "%b-%d-%y", "%B-%d-%y",
 
 
 
 
480
 
481
+ # Month-year only
482
+ "%B %Y", "%b %Y", "%B, %Y", "%b, %Y",
483
+ "%B-%Y", "%b-%Y", "%B/%Y", "%b/%Y",
484
+ "%m/%Y", "%m-%Y", "%m.%Y", "%m %Y",
485
+ "%Y-%m", "%Y/%m", "%Y.%m", "%Y %m",
486
+ "%m/%y", "%m-%y", "%m.%y", "%m %y",
487
+ "%y-%m", "%y/%m",
488
+ "%B %y", "%b %y", "%B-%y", "%b-%y",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
489
  ]
490
 
 
491
  for fmt in formats:
492
  try:
493
+ parsed_date = datetime.strptime(cleaned_date, fmt)
494
  return parsed_date.date()
495
  except (ValueError, TypeError):
496
  continue
497
 
 
 
 
 
 
 
 
 
 
 
 
 
 
498
  return None
499
 
500
 
 
741
  header = data.get("header", {})
742
  summary = data.get("summary", {})
743
  items = data.get("items", [])
744
+
745
+ # Get currency first for date parsing (USD uses MM/DD/YYYY for numeric dates)
746
+ currency = summary.get("currency", "")
747
 
748
  result = {
749
  "Invoice Number": header.get("invoice_no", ""),
750
+ "Invoice Date": normalize_date(header.get("invoice_date", ""), currency),
751
+ "Due Date": normalize_date(header.get("due_date", ""), currency),
752
  "Sender Name": header.get("sender_name", ""),
753
  "Sender Address": header.get("sender_addr", ""),
754
  "Sender": {
 
774
  "Tax Percentage": clean_amount(summary.get("tax_rate", "0")),
775
  "Total Tax": clean_amount(summary.get("tax_amount", "0")),
776
  "Total Amount": clean_amount(summary.get("total_amount", "0")),
777
+ "Currency": currency,
778
  "Itemized Data": []
779
  }
780
 
 
1175
  return s
1176
  return None
1177
 
1178
+ # Get currency first for date parsing (USD uses MM/DD/YYYY for numeric dates)
1179
+ currency = (pick_first("currency") or "").strip()
1180
+
1181
  ui["Invoice Number"] = pick_first("invoice_no", "invoice_number", "invoiceid", "invoice id") or ""
1182
+ ui["Invoice Date"] = normalize_date(pick_first("invoice_date", "date", "invoice date") or "", currency)
1183
+ ui["Due Date"] = normalize_date(pick_first("due_date", "due_date", "due") or "", currency)
1184
  ui["Sender Name"] = pick_first("sender_name", "sender") or ""
1185
  ui["Sender Address"] = pick_first("sender_addr", "sender_address", "sender addr") or ""
1186
  ui["Recipient Name"] = pick_first("rcpt_name", "recipient_name", "recipient", "rcpt") or ""
 
1200
  ui["Tax Percentage"] = clean_number(pick_first("tax_rate", "tax_percentage", "tax pct", "tax percentage") or 0.0)
1201
  ui["Total Tax"] = clean_number(pick_first("tax_amount", "tax", "total_tax") or 0.0)
1202
  ui["Total Amount"] = clean_number(pick_first("total_amount", "grand_total", "total", "amount") or 0.0)
1203
+ ui["Currency"] = currency
1204
 
1205
  items_rows = []
1206
 
 
1520
 
1521
  # --------- Initialize widget state - ONLY IF NOT EXISTS (avoid overwriting user edits) ----------
1522
  bank = form_data.get("Bank Details", {}) if isinstance(form_data.get("Bank Details", {}), dict) else {}
1523
+
1524
+ # Get currency for date parsing (USD uses MM/DD/YYYY for numeric dates)
1525
+ form_currency = form_data.get('Currency', '')
1526
 
1527
  # Only initialize if key doesn't exist - this preserves user edits between reruns
1528
  if f"Invoice Number_{selected_hash}" not in st.session_state:
1529
  st.session_state[f"Invoice Number_{selected_hash}"] = form_data.get('Invoice Number', '')
1530
 
1531
+ # Parse dates to date objects for date_input widgets (pass currency for US date handling)
1532
  if f"Invoice Date_{selected_hash}" not in st.session_state:
1533
+ invoice_date_obj = parse_date_to_object(form_data.get('Invoice Date', ''), form_currency)
1534
  st.session_state[f"Invoice Date_{selected_hash}"] = invoice_date_obj
1535
 
1536
  if f"Due Date_{selected_hash}" not in st.session_state:
1537
+ due_date_obj = parse_date_to_object(form_data.get('Due Date', ''), form_currency)
1538
  st.session_state[f"Due Date_{selected_hash}"] = due_date_obj
1539
 
1540
  if f"Currency_{selected_hash}" not in st.session_state: