Abhisesh7 commited on
Commit
aa78cd2
·
verified ·
1 Parent(s): bf24caa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -11
app.py CHANGED
@@ -286,6 +286,7 @@ def extract_entities(pdf_file, text):
286
  primary_invoice_number = "Unknown"
287
  vendor_name = "Unknown"
288
  invoice_date = datetime.now().date()
 
289
  total_amount = 0.0
290
 
291
  # Extract items first to use as a filter for NER
@@ -296,6 +297,7 @@ def extract_entities(pdf_file, text):
296
  invoice_num_pattern = r"(?:Invoice\s*(?:Number|No\.?|#)|Advice\s*(?:No\.?)|Order\s*(?:Number|No\.?))\s*[:\-\s#]*([\w-]+)|(?:INV-|ORD-|Z\d{2}APOT\d{9})([\w-]+)"
297
  vendor_pattern = r"(?:Vendor\s*(?:Name|Company)?|Supplier|Company\s*Name|From|Sold\s*By|Restaurant\s*Name|Vendor)\s*[:\-\s]*([A-Za-z\s&\.\-]+)(?=\s*(?:Address|Invoice\s*(?:No|Number)|Date|Phone|Email|\n|$))"
298
  invoice_date_pattern = r"(?:Invoice\s*Date|Date|Issue\s*Date)\s*[:\-\s]*(\d{4}-\d{2}-\d{2}|\d{2}/\d{2}/\d{4}|\d{2}-\d{2}-\d{4}|[A-Za-z]+\s*\d{1,2},\s*\d{4})"
 
299
  total_amount_pattern = r"(?:Total\s*(?:Amount|Due|Value))\s*[:\-\s]*[₹$£€]?\s*([\d,]+\.?\d*)\s*(?:USD|GBP|EUR|INR)?"
300
 
301
  # Invoice Numbers (capture all, then prioritize)
@@ -362,6 +364,24 @@ def extract_entities(pdf_file, text):
362
  except ValueError as e:
363
  print(f"Failed to parse Invoice Date '{date_str}': {str(e)}") # Debug
364
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
365
  # Total Amount (prioritize the final total after taxes and fees)
366
  total_amount_matches = re.finditer(total_amount_pattern, text, re.IGNORECASE)
367
  total_amounts = []
@@ -394,7 +414,7 @@ def extract_entities(pdf_file, text):
394
  break
395
  print(f"Calculated Total Amount: {total_amount}") # Debug
396
 
397
- return primary_invoice_number, vendor_name, invoice_date, total_amount
398
 
399
  def fetch_vendor_history(vendor_name, invoice_number, time_window_days=30):
400
  """Fetch historical invoices for the vendor from Salesforce."""
@@ -506,7 +526,7 @@ def calculate_fraud_score(amount, is_amount_anomaly, is_frequency_anomaly, is_ve
506
  score += 10
507
  reasoning.append("Invoice date is in the future.")
508
 
509
- if due_date < today and invoice_date < today:
510
  score += 15
511
  reasoning.append("Due date has passed, indicating potential payment delay.")
512
 
@@ -536,13 +556,10 @@ def process_invoice(pdf_file):
536
  if "Error" in text:
537
  return f"**Error**: {text}"
538
 
539
- invoice_number, vendor_name, invoice_date, total_amount = extract_entities(pdf_file, text)
540
  items = extract_items(pdf_file, text)
541
  text_length = len(text)
542
 
543
- # Calculate Due Date (Invoice Date + 30 days)
544
- due_date = invoice_date + timedelta(days=30)
545
-
546
  history_df = fetch_vendor_history(vendor_name, invoice_number)
547
  consistency_issues = check_data_consistency(invoice_number, vendor_name, invoice_date, history_df)
548
 
@@ -586,19 +603,26 @@ def process_invoice(pdf_file):
586
  items_str = "; ".join(item['description'] for item in items) # Fallback to raw descriptions
587
  print(f"Fallback items_str: {items_str}")
588
 
589
- # Format the invoice date and due date as DD-MM-YYYY
590
  formatted_invoice_date = invoice_date.strftime("%d-%m-%Y")
591
- formatted_due_date = due_date.strftime("%d-%m-%Y")
 
592
 
593
  output = [
594
  "## Fraud Detection Summary",
595
  f"- **Invoice Number**: {invoice_number}",
596
  f"- **Vendor Name**: {vendor_name}",
597
  f"- **Invoice Date**: {formatted_invoice_date}",
598
- f"- **Due Date**: {formatted_due_date}",
599
- f"- **Invoice Amount**: ${total_amount:,.2f}" if '$' in text else f"- **Invoice Amount**: ₹{total_amount:,.2f}",
600
  ]
601
 
 
 
 
 
 
 
 
 
602
  # Add items section
603
  output.append("- **Items Selected**:")
604
  if items:
@@ -629,13 +653,15 @@ def process_invoice(pdf_file):
629
  "Vendor_Name__c": vendor_name,
630
  "Invoice_Amount__c": total_amount,
631
  "Invoice_Date__c": str(invoice_date),
632
- "Due_Date__c": str(due_date),
633
  "Fraud_Score__c": fraud_score,
634
  "Fraud_Reason__c": "; ".join(fraud_reasoning),
635
  "Flagged__c": fraud_score > 50,
636
  "Status__c": "Flagged" if fraud_score > 50 else "Cleared",
637
  "Items_Selected__c": items_str
638
  }
 
 
 
639
  print(f"Record data being sent to Salesforce: {record_data}") # Debug
640
  sf.Invoice_Record__c.create(record_data)
641
  print(f"Successfully created Salesforce record with Items_Selected__c: {items_str}") # Debug
 
286
  primary_invoice_number = "Unknown"
287
  vendor_name = "Unknown"
288
  invoice_date = datetime.now().date()
289
+ due_date = None # Due Date will be None unless explicitly found in the invoice
290
  total_amount = 0.0
291
 
292
  # Extract items first to use as a filter for NER
 
297
  invoice_num_pattern = r"(?:Invoice\s*(?:Number|No\.?|#)|Advice\s*(?:No\.?)|Order\s*(?:Number|No\.?))\s*[:\-\s#]*([\w-]+)|(?:INV-|ORD-|Z\d{2}APOT\d{9})([\w-]+)"
298
  vendor_pattern = r"(?:Vendor\s*(?:Name|Company)?|Supplier|Company\s*Name|From|Sold\s*By|Restaurant\s*Name|Vendor)\s*[:\-\s]*([A-Za-z\s&\.\-]+)(?=\s*(?:Address|Invoice\s*(?:No|Number)|Date|Phone|Email|\n|$))"
299
  invoice_date_pattern = r"(?:Invoice\s*Date|Date|Issue\s*Date)\s*[:\-\s]*(\d{4}-\d{2}-\d{2}|\d{2}/\d{2}/\d{4}|\d{2}-\d{2}-\d{4}|[A-Za-z]+\s*\d{1,2},\s*\d{4})"
300
+ due_date_pattern = r"(?:Due\s*Date|Payment\s*Due\s*(?:Date)?)\s*[:\-\s]*(\d{4}-\d{2}-\d{2}|\d{2}/\d{2}/\d{4}|\d{2}-\d{2}-\d{4}|[A-Za-z]+\s*\d{1,2},\s*\d{4})"
301
  total_amount_pattern = r"(?:Total\s*(?:Amount|Due|Value))\s*[:\-\s]*[₹$£€]?\s*([\d,]+\.?\d*)\s*(?:USD|GBP|EUR|INR)?"
302
 
303
  # Invoice Numbers (capture all, then prioritize)
 
364
  except ValueError as e:
365
  print(f"Failed to parse Invoice Date '{date_str}': {str(e)}") # Debug
366
 
367
+ # Due Date (only extract if explicitly present in the invoice)
368
+ due_date_match = re.search(due_date_pattern, text, re.IGNORECASE)
369
+ if due_date_match:
370
+ date_str = due_date_match.group(1)
371
+ try:
372
+ if "/" in date_str:
373
+ due_date = datetime.strptime(date_str, "%m/%d/%Y").date()
374
+ elif "," in date_str:
375
+ due_date = datetime.strptime(date_str, "%B %d, %Y").date()
376
+ elif "-" in date_str:
377
+ try:
378
+ due_date = datetime.strptime(date_str, "%Y-%m-%d").date()
379
+ except ValueError:
380
+ due_date = datetime.strptime(date_str, "%d-%m-%Y").date()
381
+ print(f"Matched Due Date: {due_date}") # Debug
382
+ except ValueError as e:
383
+ print(f"Failed to parse Due Date '{date_str}': {str(e)}") # Debug
384
+
385
  # Total Amount (prioritize the final total after taxes and fees)
386
  total_amount_matches = re.finditer(total_amount_pattern, text, re.IGNORECASE)
387
  total_amounts = []
 
414
  break
415
  print(f"Calculated Total Amount: {total_amount}") # Debug
416
 
417
+ return primary_invoice_number, vendor_name, invoice_date, due_date, total_amount
418
 
419
  def fetch_vendor_history(vendor_name, invoice_number, time_window_days=30):
420
  """Fetch historical invoices for the vendor from Salesforce."""
 
526
  score += 10
527
  reasoning.append("Invoice date is in the future.")
528
 
529
+ if due_date and due_date < today and invoice_date < today:
530
  score += 15
531
  reasoning.append("Due date has passed, indicating potential payment delay.")
532
 
 
556
  if "Error" in text:
557
  return f"**Error**: {text}"
558
 
559
+ invoice_number, vendor_name, invoice_date, due_date, total_amount = extract_entities(pdf_file, text)
560
  items = extract_items(pdf_file, text)
561
  text_length = len(text)
562
 
 
 
 
563
  history_df = fetch_vendor_history(vendor_name, invoice_number)
564
  consistency_issues = check_data_consistency(invoice_number, vendor_name, invoice_date, history_df)
565
 
 
603
  items_str = "; ".join(item['description'] for item in items) # Fallback to raw descriptions
604
  print(f"Fallback items_str: {items_str}")
605
 
606
+ # Format the invoice date as DD-MM-YYYY
607
  formatted_invoice_date = invoice_date.strftime("%d-%m-%Y")
608
+ # Format the due date as DD-MM-YYYY only if it exists
609
+ formatted_due_date = due_date.strftime("%d-%m-%Y") if due_date else None
610
 
611
  output = [
612
  "## Fraud Detection Summary",
613
  f"- **Invoice Number**: {invoice_number}",
614
  f"- **Vendor Name**: {vendor_name}",
615
  f"- **Invoice Date**: {formatted_invoice_date}",
 
 
616
  ]
617
 
618
+ # Only include Due Date in the output if it was extracted from the invoice
619
+ if formatted_due_date:
620
+ output.append(f"- **Due Date**: {formatted_due_date}")
621
+
622
+ output.append(
623
+ f"- **Invoice Amount**: ${total_amount:,.2f}" if '$' in text else f"- **Invoice Amount**: ₹{total_amount:,.2f}"
624
+ )
625
+
626
  # Add items section
627
  output.append("- **Items Selected**:")
628
  if items:
 
653
  "Vendor_Name__c": vendor_name,
654
  "Invoice_Amount__c": total_amount,
655
  "Invoice_Date__c": str(invoice_date),
 
656
  "Fraud_Score__c": fraud_score,
657
  "Fraud_Reason__c": "; ".join(fraud_reasoning),
658
  "Flagged__c": fraud_score > 50,
659
  "Status__c": "Flagged" if fraud_score > 50 else "Cleared",
660
  "Items_Selected__c": items_str
661
  }
662
+ # Only include Due_Date__c if a due date was extracted
663
+ if due_date:
664
+ record_data["Due_Date__c"] = str(due_date)
665
  print(f"Record data being sent to Salesforce: {record_data}") # Debug
666
  sf.Invoice_Record__c.create(record_data)
667
  print(f"Successfully created Salesforce record with Items_Selected__c: {items_str}") # Debug