Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -254,7 +254,6 @@ def extract_items(pdf_file, text):
|
|
| 254 |
if platform_fee_start != -1:
|
| 255 |
platform_fee_end = len(lines)
|
| 256 |
for i in range(platform_fee_start, len(lines)):
|
| 257 |
-
locom = lines[i]
|
| 258 |
if "Total" in lines[i] and "Sr.No" not in lines[i]:
|
| 259 |
platform_fee_end = i + 1
|
| 260 |
break
|
|
@@ -490,8 +489,8 @@ def detect_anomalies(df, history_df):
|
|
| 490 |
|
| 491 |
return df
|
| 492 |
|
| 493 |
-
def calculate_fraud_score(amount, is_amount_anomaly, is_frequency_anomaly, is_vendor_pattern_anomaly, text_length, consistency_issues, invoice_date):
|
| 494 |
-
"""Calculate fraud score based on amount, anomalies, text length, consistency issues, and
|
| 495 |
score = 0.0
|
| 496 |
reasoning = []
|
| 497 |
today = datetime.now().date()
|
|
@@ -507,6 +506,10 @@ def calculate_fraud_score(amount, is_amount_anomaly, is_frequency_anomaly, is_ve
|
|
| 507 |
score += 10
|
| 508 |
reasoning.append("Invoice date is in the future.")
|
| 509 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 510 |
if is_amount_anomaly == -1:
|
| 511 |
score += 30
|
| 512 |
reasoning.append("Amount flagged as an anomaly.")
|
|
@@ -537,6 +540,9 @@ def process_invoice(pdf_file):
|
|
| 537 |
items = extract_items(pdf_file, text)
|
| 538 |
text_length = len(text)
|
| 539 |
|
|
|
|
|
|
|
|
|
|
| 540 |
history_df = fetch_vendor_history(vendor_name, invoice_number)
|
| 541 |
consistency_issues = check_data_consistency(invoice_number, vendor_name, invoice_date, history_df)
|
| 542 |
|
|
@@ -559,7 +565,8 @@ def process_invoice(pdf_file):
|
|
| 559 |
df["is_vendor_pattern_anomaly"].iloc[0],
|
| 560 |
text_length,
|
| 561 |
consistency_issues,
|
| 562 |
-
invoice_date
|
|
|
|
| 563 |
)
|
| 564 |
|
| 565 |
# Format items for Salesforce (only include item descriptions)
|
|
@@ -579,14 +586,16 @@ def process_invoice(pdf_file):
|
|
| 579 |
items_str = "; ".join(item['description'] for item in items) # Fallback to raw descriptions
|
| 580 |
print(f"Fallback items_str: {items_str}")
|
| 581 |
|
| 582 |
-
# Format the invoice date as DD-MM-YYYY
|
| 583 |
formatted_invoice_date = invoice_date.strftime("%d-%m-%Y")
|
|
|
|
| 584 |
|
| 585 |
output = [
|
| 586 |
"## Fraud Detection Summary",
|
| 587 |
f"- **Invoice Number**: {invoice_number}",
|
| 588 |
f"- **Vendor Name**: {vendor_name}",
|
| 589 |
f"- **Invoice Date**: {formatted_invoice_date}",
|
|
|
|
| 590 |
f"- **Invoice Amount**: ${total_amount:,.2f}" if '$' in text else f"- **Invoice Amount**: ₹{total_amount:,.2f}",
|
| 591 |
]
|
| 592 |
|
|
@@ -620,6 +629,7 @@ def process_invoice(pdf_file):
|
|
| 620 |
"Vendor_Name__c": vendor_name,
|
| 621 |
"Invoice_Amount__c": total_amount,
|
| 622 |
"Invoice_Date__c": str(invoice_date),
|
|
|
|
| 623 |
"Fraud_Score__c": fraud_score,
|
| 624 |
"Fraud_Reason__c": "; ".join(fraud_reasoning),
|
| 625 |
"Flagged__c": fraud_score > 50,
|
|
|
|
| 254 |
if platform_fee_start != -1:
|
| 255 |
platform_fee_end = len(lines)
|
| 256 |
for i in range(platform_fee_start, len(lines)):
|
|
|
|
| 257 |
if "Total" in lines[i] and "Sr.No" not in lines[i]:
|
| 258 |
platform_fee_end = i + 1
|
| 259 |
break
|
|
|
|
| 489 |
|
| 490 |
return df
|
| 491 |
|
| 492 |
+
def calculate_fraud_score(amount, is_amount_anomaly, is_frequency_anomaly, is_vendor_pattern_anomaly, text_length, consistency_issues, invoice_date, due_date):
|
| 493 |
+
"""Calculate fraud score based on amount, anomalies, text length, consistency issues, invoice date, and due date."""
|
| 494 |
score = 0.0
|
| 495 |
reasoning = []
|
| 496 |
today = datetime.now().date()
|
|
|
|
| 506 |
score += 10
|
| 507 |
reasoning.append("Invoice date is in the future.")
|
| 508 |
|
| 509 |
+
if due_date < today and invoice_date < today:
|
| 510 |
+
score += 15
|
| 511 |
+
reasoning.append("Due date has passed, indicating potential payment delay.")
|
| 512 |
+
|
| 513 |
if is_amount_anomaly == -1:
|
| 514 |
score += 30
|
| 515 |
reasoning.append("Amount flagged as an anomaly.")
|
|
|
|
| 540 |
items = extract_items(pdf_file, text)
|
| 541 |
text_length = len(text)
|
| 542 |
|
| 543 |
+
# Calculate Due Date (Invoice Date + 30 days)
|
| 544 |
+
due_date = invoice_date + timedelta(days=30)
|
| 545 |
+
|
| 546 |
history_df = fetch_vendor_history(vendor_name, invoice_number)
|
| 547 |
consistency_issues = check_data_consistency(invoice_number, vendor_name, invoice_date, history_df)
|
| 548 |
|
|
|
|
| 565 |
df["is_vendor_pattern_anomaly"].iloc[0],
|
| 566 |
text_length,
|
| 567 |
consistency_issues,
|
| 568 |
+
invoice_date,
|
| 569 |
+
due_date
|
| 570 |
)
|
| 571 |
|
| 572 |
# Format items for Salesforce (only include item descriptions)
|
|
|
|
| 586 |
items_str = "; ".join(item['description'] for item in items) # Fallback to raw descriptions
|
| 587 |
print(f"Fallback items_str: {items_str}")
|
| 588 |
|
| 589 |
+
# Format the invoice date and due date as DD-MM-YYYY
|
| 590 |
formatted_invoice_date = invoice_date.strftime("%d-%m-%Y")
|
| 591 |
+
formatted_due_date = due_date.strftime("%d-%m-%Y")
|
| 592 |
|
| 593 |
output = [
|
| 594 |
"## Fraud Detection Summary",
|
| 595 |
f"- **Invoice Number**: {invoice_number}",
|
| 596 |
f"- **Vendor Name**: {vendor_name}",
|
| 597 |
f"- **Invoice Date**: {formatted_invoice_date}",
|
| 598 |
+
f"- **Due Date**: {formatted_due_date}",
|
| 599 |
f"- **Invoice Amount**: ${total_amount:,.2f}" if '$' in text else f"- **Invoice Amount**: ₹{total_amount:,.2f}",
|
| 600 |
]
|
| 601 |
|
|
|
|
| 629 |
"Vendor_Name__c": vendor_name,
|
| 630 |
"Invoice_Amount__c": total_amount,
|
| 631 |
"Invoice_Date__c": str(invoice_date),
|
| 632 |
+
"Due_Date__c": str(due_date),
|
| 633 |
"Fraud_Score__c": fraud_score,
|
| 634 |
"Fraud_Reason__c": "; ".join(fraud_reasoning),
|
| 635 |
"Flagged__c": fraud_score > 50,
|