Abhisesh7 commited on
Commit
6ad9a18
·
verified ·
1 Parent(s): 0c25273

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +436 -22
app.py CHANGED
@@ -1,44 +1,458 @@
1
  import os
 
2
  import logging
 
 
 
 
 
 
 
 
 
3
  import gradio as gr
4
- from pdf_extraction import extract_text_from_pdf
5
- from image_extraction import extract_text_from_image
6
 
7
- # Set up logging
8
- logging.basicConfig(level=logging.DEBUG) # Set to DEBUG for detailed logging
9
- logger = logging.getLogger(__name__)
10
 
11
- def process_invoice(file):
12
- """Extract text from a single invoice (PDF or image) and return it as is."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  # Determine file type and extract text accordingly
14
- file_extension = os.path.splitext(file.name)[1].lower()
15
- if file_extension == '.pdf':
16
- text = extract_text_from_pdf(file.name)
17
- elif file_extension in ['.png', '.jpg', '.jpeg']:
18
- text = extract_text_from_image(file.name)
19
  else:
20
- return f"Error: Unsupported file type '{file_extension}'. Please upload a PDF, PNG, or JPG file."
21
 
22
  if "Error" in text:
23
- return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
- # Return the raw extracted text without any additional formatting
26
- return text
27
 
28
  def gradio_interface(file):
29
- """Gradio interface to process uploaded file (PDF or image) and display raw text."""
30
  if file is None:
31
  return "Please upload a PDF or image file."
32
  result = process_invoice(file)
33
  return result
34
 
35
  with gr.Blocks(css=".prose a[href*='share']:has(svg) {display:none !important;}") as iface:
36
- gr.Markdown("# Invoice Text Extraction")
37
  with gr.Row():
38
- file_input = gr.File(label="Upload Invoice (PDF, PNG, JPG)")
39
- result_output = gr.Textbox(label="Extracted Text")
40
  file_input.change(fn=gradio_interface, inputs=file_input, outputs=result_output)
41
 
42
  if __name__ == "__main__":
43
- logger.info("Starting the application...")
44
- iface.launch(share=True)
 
1
  import os
2
+ from dotenv import load_dotenv
3
  import logging
4
+ import pdfplumber
5
+ import pandas as pd
6
+ import numpy as np
7
+ from transformers import pipeline
8
+ from sklearn.ensemble import IsolationForest
9
+ from sklearn.preprocessing import StandardScaler
10
+ import uuid
11
+ from datetime import datetime, timedelta
12
+ import re
13
  import gradio as gr
14
+ from simple_salesforce import Salesforce, SalesforceAuthenticationFailed
15
+ from image_ocr import extract_text_from_image # Import the image OCR function
16
 
17
+ # Load environment variables from .env file
18
+ load_dotenv()
 
19
 
20
+ # Configure environment for CPU usage
21
+ os.environ["CUDA_VISIBLE_DEVICES"] = "" # Disable GPU usage
22
+ os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0" # Disable oneDNN optimizations
23
+
24
+ # Set up logging to suppress transformers warnings
25
+ logging.getLogger("transformers").setLevel(logging.ERROR)
26
+
27
+ # Read Salesforce credentials from environment variables
28
+ SF_USERNAME = os.getenv("SF_USERNAME")
29
+ SF_PASSWORD = os.getenv("SF_PASSWORD")
30
+ SF_SECURITY_TOKEN = os.getenv("SF_SECURITY_TOKEN")
31
+
32
+ print(f"Salesforce login info: username={SF_USERNAME}")
33
+
34
+ # Salesforce connection with error handling
35
+ try:
36
+ sf = Salesforce(
37
+ username=SF_USERNAME,
38
+ password=SF_PASSWORD,
39
+ security_token=SF_SECURITY_TOKEN
40
+ )
41
+ print("Salesforce login successful.")
42
+ except SalesforceAuthenticationFailed as e:
43
+ print(f"Salesforce authentication failed: {e}")
44
+ sf = None
45
+
46
+ # Initialize Hugging Face NER pipeline (force CPU)
47
+ ner_pipeline = pipeline("ner", model="dslim/bert-base-NER", tokenizer="dslim/bert-base-NER", device=-1)
48
+
49
+ def extract_text_from_pdf(pdf_file):
50
+ """Extract text from a PDF invoice."""
51
+ try:
52
+ with pdfplumber.open(pdf_file) as pdf:
53
+ text = ""
54
+ for page in pdf.pages:
55
+ page_text = page.extract_text() or ""
56
+ text += page_text + "\n"
57
+ print("Extracted text:\n", text) # Debug: Print extracted text
58
+ return text
59
+ except Exception as e:
60
+ return f"Error extracting text: {str(e)}"
61
+
62
+ def extract_items(text):
63
+ """Extract items from the invoice table with a simplified approach."""
64
+ items = []
65
+ # Replace escaped dollar signs
66
+ text = text.replace(r'\$', '$')
67
+
68
+ # Split text into lines
69
+ lines = text.split('\n')
70
+ print("Text split into lines:", lines) # Debug
71
+
72
+ # Find the table header
73
+ table_start = -1
74
+ for i, line in enumerate(lines):
75
+ if "Item Description" in line and "Quantity" in line and "Unit Price" in line and "Total Price" in line:
76
+ table_start = i + 1 # Table data starts after the header
77
+ break
78
+
79
+ if table_start == -1:
80
+ print("Table header not found.")
81
+ return items
82
+
83
+ # Find the end of the table (before "Total Amount", "Promo Code", or end of text)
84
+ table_end = len(lines)
85
+ for i in range(table_start, len(lines)):
86
+ if "Total Amount" in lines[i] or "Total Due" in lines[i] or "Promo Code" in lines[i]:
87
+ table_end = i
88
+ break
89
+
90
+ print(f"Table section: lines {table_start} to {table_end-1}") # Debug
91
+ table_lines = lines[table_start:table_end]
92
+ print("Table lines:", table_lines) # Debug
93
+
94
+ # Pattern to match table rows
95
+ table_row_pattern = r"\|?\s*([A-Za-z\s\d-]+(?:\s[A-Za-z\s\d-]+)*?)\s*\|?\s*(\d+)\s*\|?\s*([\d.]+)\s*\|?\s*([\d.]+)\s*\|?"
96
+
97
+ for line in table_lines:
98
+ line = line.strip()
99
+ if not line:
100
+ continue
101
+ # Skip alignment rows (e.g., "|---|---|")
102
+ if re.match(r"\|?\s*[-:]+(\s*\|\s*[-:]+)*\s*\|?", line):
103
+ print(f"Skipping alignment row: {line}")
104
+ continue
105
+ # Replace alignment markers in the row (e.g., "|---|") with "|"
106
+ line = re.sub(r'\|\s*---\s*\|', '|', line)
107
+ print(f"Processing table row: {line}") # Debug
108
+ match = re.match(table_row_pattern, line)
109
+ if match:
110
+ description = match.group(1).strip()
111
+ # Clean the description to remove any trailing quantity or price data
112
+ description = re.sub(r'\s*\d+\s*$', '', description).strip() # Remove trailing numbers
113
+ description = re.sub(r'\s*\$?\d+\.\d+\s*$', '', description).strip() # Remove trailing prices
114
+ # Skip lines that look like promo codes
115
+ if "Promo Code" in description:
116
+ print(f"Skipping promo code line: {line}")
117
+ continue
118
+ quantity = int(match.group(2))
119
+ unit_price = float(match.group(3))
120
+ total_price = float(match.group(4))
121
+ items.append({
122
+ "description": description,
123
+ "quantity": quantity,
124
+ "unit_price": unit_price,
125
+ "total_price": total_price
126
+ })
127
+ print(f"Extracted Item: {description}, Qty: {quantity}, Unit Price: {unit_price}, Total Price: {total_price}") # Debug
128
+ else:
129
+ print(f"Failed to match row: {line}")
130
+
131
+ return items
132
+
133
+ def extract_entities(text):
134
+ """Extract structured invoice details using flexible regex patterns."""
135
+ invoice_number = "Unknown"
136
+ vendor_name = "Unknown"
137
+ invoice_date = datetime.now().date()
138
+ total_amount = 0.0
139
+
140
+ # Extract items first to use as a filter for NER
141
+ items = extract_items(text)
142
+ item_descriptions = [item["description"].lower() for item in items]
143
+
144
+ # Flexible regex patterns to handle various invoice formats
145
+ invoice_num_pattern = r"(?:Invoice\s*(?:Number|No\.?|#)|Order\s*(?:Number|No\.?))\s*[:\-\s#]*([\w-]+)|(?:INV-|ORD-)([\w-]+)"
146
+ vendor_pattern = r"(?:Vendor\s*(?:Name|Company)?|Supplier|Company\s*Name|From|Sold\s*By)\s*[:\-\s]*([A-Za-z\s&\.\-]+)(?=\s*(?:Address|Invoice\s*(?:No|Number)|Date|Phone|Email|\n|$))"
147
+ invoice_date_pattern = r"(?:Invoice\s*Date|Date|Issue\s*Date)\s*[:\-\s]*(\d{4}-\d{2}-\d{2}|\d{2}/\d{2}/\d{4}|\d{2}-\d{2}-\d{4}|[A-Za-z]+\s*\d{1,2},\s*\d{4})"
148
+ total_amount_pattern = r"(?:Total\s*(?:Amount|Due)?|Amount\s*Due|Total)\s*[:\-\s]*[$£€]?\s*([\d,]+\.?\d*)\s*(?:USD|GBP|EUR)?"
149
+
150
+ # Invoice Number
151
+ invoice_num_match = re.search(invoice_num_pattern, text, re.IGNORECASE)
152
+ if invoice_num_match:
153
+ invoice_number = invoice_num_match.group(1) if invoice_num_match.group(1) else invoice_num_match.group(2)
154
+ print(f"Matched Invoice Number: {invoice_number}") # Debug
155
+
156
+ # Vendor Name
157
+ vendor_match = re.search(vendor_pattern, text, re.IGNORECASE)
158
+ if vendor_match:
159
+ vendor_name = vendor_match.group(1).strip()
160
+ print(f"Matched Vendor Name (Regex): {vendor_name}") # Debug
161
+ else:
162
+ # Enhanced NER fallback for multi-word organization names
163
+ ner_results = ner_pipeline(text)
164
+ org_name_parts = []
165
+ for i, entity in enumerate(ner_results):
166
+ if entity['entity'].startswith('B-ORG'):
167
+ org_name_parts = [entity['word']]
168
+ elif entity['entity'].startswith('I-ORG') and org_name_parts:
169
+ org_name_parts.append(entity['word'])
170
+ if org_name_parts:
171
+ candidate_vendor_name = " ".join(part.replace("##", "") for part in org_name_parts)
172
+ if candidate_vendor_name.lower() not in item_descriptions:
173
+ vendor_name = candidate_vendor_name
174
+ print(f"NER Matched Vendor Name: {vendor_name}") # Debug
175
+
176
+ # Invoice Date
177
+ invoice_date_match = re.search(invoice_date_pattern, text, re.IGNORECASE)
178
+ if invoice_date_match:
179
+ date_str = invoice_date_match.group(1)
180
+ try:
181
+ if "/" in date_str:
182
+ invoice_date = datetime.strptime(date_str, "%m/%d/%Y").date()
183
+ elif "," in date_str:
184
+ invoice_date = datetime.strptime(date_str, "%B %d, %Y").date()
185
+ elif "-" in date_str:
186
+ try:
187
+ invoice_date = datetime.strptime(date_str, "%Y-%m-%d").date()
188
+ except ValueError:
189
+ invoice_date = datetime.strptime(date_str, "%d-%m-%Y").date()
190
+ print(f"Matched Invoice Date: {invoice_date}") # Debug
191
+ except ValueError as e:
192
+ print(f"Failed to parse Invoice Date '{date_str}': {str(e)}") # Debug
193
+
194
+ # Total Amount
195
+ total_amount_match = re.search(total_amount_pattern, text, re.IGNORECASE)
196
+ if total_amount_match:
197
+ total_amount = float(total_amount_match.group(1).replace(",", ""))
198
+ print(f"Matched Total Amount: {total_amount}") # Debug
199
+
200
+ return invoice_number, vendor_name, invoice_date, total_amount
201
+
202
+ def fetch_vendor_history(vendor_name, invoice_number, time_window_days=30):
203
+ """Fetch historical invoices for the vendor from Salesforce."""
204
+ if sf is None:
205
+ return pd.DataFrame()
206
+
207
+ try:
208
+ end_date = datetime.now().date()
209
+ start_date = end_date - timedelta(days=time_window_days)
210
+
211
+ query = f"""
212
+ SELECT Invoice_Number__c, Invoice_Amount__c, Invoice_Date__c, Vendor_Name__c
213
+ FROM Invoice_Record__c
214
+ WHERE Invoice_Date__c >= {start_date} AND Invoice_Date__c <= {end_date}
215
+ AND Vendor_Name__c = '{vendor_name}'
216
+ LIMIT 100
217
+ """
218
+ result = sf.query(query)
219
+ records = result['records']
220
+
221
+ history_df = pd.DataFrame(records)
222
+ if not history_df.empty:
223
+ history_df['Invoice_Date__c'] = pd.to_datetime(history_df['Invoice_Date__c']).dt.date
224
+ return history_df
225
+ except Exception as e:
226
+ print(f"Failed to fetch vendor history: {str(e)}")
227
+ return pd.DataFrame()
228
+
229
+ def check_data_consistency(invoice_number, vendor_name, invoice_date, history_df):
230
+ """Check for data consistency issues like duplicates."""
231
+ consistency_issues = []
232
+
233
+ if not history_df.empty:
234
+ duplicate_invoices = history_df[history_df['Invoice_Number__c'] == invoice_number]
235
+ if not duplicate_invoices.empty:
236
+ consistency_issues.append(f"Duplicate invoice number '{invoice_number}' found for vendor '{vendor_name}'.")
237
+
238
+ return consistency_issues
239
+
240
+ def detect_anomalies(df, history_df):
241
+ """Detect anomalies in amount, frequency, and vendor patterns."""
242
+ df["is_amount_anomaly"] = 0
243
+ df["is_frequency_anomaly"] = 0
244
+ df["is_vendor_pattern_anomaly"] = 0
245
+
246
+ if not df.empty:
247
+ scaler = StandardScaler()
248
+ X_scaled = scaler.fit_transform(df[["amount"]])
249
+ model = IsolationForest(contamination=0.05, random_state=42)
250
+ df["is_amount_anomaly"] = model.fit_predict(X_scaled)
251
+
252
+ if not history_df.empty:
253
+ history_df['Invoice_Date__c'] = pd.to_datetime(history_df['Invoice_Date__c'])
254
+ date_range = (history_df['Invoice_Date__c'].max() - history_df['Invoice_Date__c'].min()).days + 1
255
+ frequency = len(history_df) / max(date_range, 1)
256
+
257
+ date_diffs = [(d - history_df['Invoice_Date__c'].min()).days for d in history_df['Invoice_Date__c']]
258
+ date_clustering = np.std(date_diffs) if len(date_diffs) > 1 else 0
259
+
260
+ frequency_df = pd.DataFrame({
261
+ "frequency": [frequency],
262
+ "date_clustering": [date_clustering]
263
+ })
264
+ scaler = StandardScaler()
265
+ X_scaled = scaler.fit_transform(frequency_df[["frequency", "date_clustering"]])
266
+ model = IsolationForest(contamination=0.05, random_state=42)
267
+ df["is_frequency_anomaly"] = model.fit_predict(X_scaled)[0]
268
+ else:
269
+ df["is_frequency_anomaly"] = 1
270
+
271
+ if not history_df.empty and len(history_df) > 1:
272
+ historical_amounts = history_df["Invoice_Amount__c"].astype(float)
273
+ mean_amount = historical_amounts.mean()
274
+ std_amount = historical_amounts.std() if len(historical_amounts) > 1 else 1
275
+ amount_variance = historical_amounts.var() if len(historical_amounts) > 1 else 0
276
+
277
+ current_amount = df["amount"].iloc[0]
278
+ deviation = abs(current_amount - mean_amount) / (std_amount if std_amount > 0 else 1)
279
+ invoice_count = len(history_df)
280
+
281
+ vendor_pattern_df = pd.DataFrame({
282
+ "amount_deviation": [deviation],
283
+ "invoice_count": [invoice_count],
284
+ "amount_variance": [amount_variance]
285
+ })
286
+ scaler = StandardScaler()
287
+ X_scaled = scaler.fit_transform(vendor_pattern_df[["amount_deviation", "invoice_count", "amount_variance"]])
288
+ model = IsolationForest(contamination=0.05, random_state=42)
289
+ df["is_vendor_pattern_anomaly"] = model.fit_predict(X_scaled)[0]
290
+ else:
291
+ df["is_vendor_pattern_anomaly"] = 1
292
+
293
+ return df
294
+
295
+ def calculate_fraud_score(amount, is_amount_anomaly, is_frequency_anomaly, is_vendor_pattern_anomaly, text_length, consistency_issues, invoice_date):
296
+ """Calculate fraud score based on amount, anomalies, text length, consistency issues, and invoice date."""
297
+ score = 0.0
298
+ reasoning = []
299
+ today = datetime.now().date()
300
+
301
+ if amount > 5000:
302
+ score += 40
303
+ reasoning.append("High invoice amount detected.")
304
+ elif amount < 10:
305
+ score += 20
306
+ reasoning.append("Unusually low invoice amount.")
307
+
308
+ if invoice_date > today:
309
+ score += 10
310
+ reasoning.append("Invoice date is in the future.")
311
+
312
+ if is_amount_anomaly == -1:
313
+ score += 30
314
+ reasoning.append("Amount flagged as an anomaly.")
315
+ if is_frequency_anomaly == -1:
316
+ score += 25
317
+ reasoning.append("Unusual invoice submission frequency or clustering detected.")
318
+ if is_vendor_pattern_anomaly == -1:
319
+ score += 25
320
+ reasoning.append("Unusual vendor pattern detected (amount deviation, frequency, or variance).")
321
+
322
+ if text_length > 500:
323
+ score += 10
324
+ reasoning.append("Excessive text length in invoice.")
325
+
326
+ if consistency_issues:
327
+ score += 15 * len(consistency_issues)
328
+ reasoning.extend(consistency_issues)
329
+
330
+ return min(score, 100), reasoning
331
+
332
+ def process_invoice(file_path):
333
+ """Process a single invoice (PDF or image) and return structured markdown output."""
334
  # Determine file type and extract text accordingly
335
+ if file_path.lower().endswith('.pdf'):
336
+ text = extract_text_from_pdf(file_path)
337
+ elif file_path.lower().endswith(('.png', '.jpg', '.jpeg')):
338
+ text = extract_text_from_image(file_path)
 
339
  else:
340
+ return "**Error**: Unsupported file type. Please upload a PDF or image (PNG/JPG/JPEG)."
341
 
342
  if "Error" in text:
343
+ return f"**Error**: {text}"
344
+
345
+ invoice_number, vendor_name, invoice_date, total_amount = extract_entities(text)
346
+ items = extract_items(text)
347
+ text_length = len(text)
348
+
349
+ history_df = fetch_vendor_history(vendor_name, invoice_number)
350
+ consistency_issues = check_data_consistency(invoice_number, vendor_name, invoice_date, history_df)
351
+
352
+ data = {
353
+ "invoice_id": str(uuid.uuid4()),
354
+ "invoice_number": invoice_number,
355
+ "vendor_name": vendor_name,
356
+ "amount": total_amount,
357
+ "invoice_date": invoice_date,
358
+ "text_length": text_length
359
+ }
360
+ df = pd.DataFrame([data])
361
+
362
+ df = detect_anomalies(df, history_df)
363
+
364
+ fraud_score, fraud_reasoning = calculate_fraud_score(
365
+ df["amount"].iloc[0],
366
+ df["is_amount_anomaly"].iloc[0],
367
+ df["is_frequency_anomaly"].iloc[0],
368
+ df["is_vendor_pattern_anomaly"].iloc[0],
369
+ text_length,
370
+ consistency_issues,
371
+ invoice_date
372
+ )
373
+
374
+ # Format items for Salesforce (only include item descriptions)
375
+ cleaned_items = []
376
+ for item in items:
377
+ desc = item['description']
378
+ # Additional cleaning to ensure no quantity or price data
379
+ desc = re.sub(r'\s*Quantity\s*\d+', '', desc, flags=re.IGNORECASE).strip()
380
+ desc = re.sub(r'\s*Unit\s*Price\s*\$\d+\.\d+', '', desc, flags=re.IGNORECASE).strip()
381
+ desc = re.sub(r'\s*Total\s*Price\s*\$\d+\.\d+', '', desc, flags=re.IGNORECASE).strip()
382
+ cleaned_items.append(desc)
383
+ items_str = "; ".join(cleaned_items) if cleaned_items else "No items found"
384
+ print(f"Items string for Salesforce (after cleaning): {items_str}") # Debug
385
+
386
+ # Validate items_str to ensure it contains no quantity or price data
387
+ if re.search(r'Quantity|Unit Price|Total Price|\$\d+\.\d+', items_str, re.IGNORECASE):
388
+ print(f"ERROR: items_str contains unexpected quantity or price data: {items_str}")
389
+ items_str = "; ".join(item['description'] for item in items) # Fallback to raw descriptions
390
+ print(f"Fallback items_str: {items_str}")
391
+
392
+ output = [
393
+ "## Fraud Detection Summary",
394
+ f"- **Invoice Number**: {invoice_number}",
395
+ f"- **Vendor Name**: {vendor_name}",
396
+ f"- **Invoice Date**: {invoice_date}",
397
+ f"- **Invoice Amount**: ${total_amount:,.2f}",
398
+ "- **Items Selected**:",
399
+ ]
400
+
401
+ if items:
402
+ for item in items:
403
+ clean_description = re.sub(r'\s*\d+\s*\d*$', '', item['description']).strip()
404
+ output.append(f" - {clean_description}")
405
+ else:
406
+ output.append(" - No items found")
407
+
408
+ output.extend([
409
+ f"- **Fraud Score**: {fraud_score}",
410
+ f"- **Status**: {'Flagged' if fraud_score > 50 else 'Cleared'}",
411
+ f"- **Flagged**: {fraud_score > 50}",
412
+ "",
413
+ "## Fraud Reasoning"
414
+ ])
415
+
416
+ if fraud_reasoning:
417
+ output.extend([f"- {reason}" for reason in fraud_reasoning])
418
+ else:
419
+ output.append("- No specific fraud indicators detected")
420
+
421
+ if sf is not None:
422
+ try:
423
+ record_data = {
424
+ "Invoice_Number__c": invoice_number,
425
+ "Vendor_Name__c": vendor_name,
426
+ "Invoice_Amount__c": total_amount,
427
+ "Invoice_Date__c": str(invoice_date),
428
+ "Fraud_Score__c": fraud_score,
429
+ "Fraud_Reason__c": "; ".join(fraud_reasoning),
430
+ "Flagged__c": fraud_score > 50,
431
+ "Status__c": "Flagged" if fraud_score > 50 else "Cleared",
432
+ "Items_Selected__c": items_str
433
+ }
434
+ print(f"Record data being sent to Salesforce: {record_data}") # Debug
435
+ sf.Invoice_Record__c.create(record_data)
436
+ print(f"Successfully created Salesforce record with Items_Selected__c: {items_str}") # Debug
437
+ except Exception as e:
438
+ print(f"Failed to create Salesforce record: {str(e)}")
439
+ pass
440
 
441
+ return "\n".join(output)
 
442
 
443
  def gradio_interface(file):
444
+ """Gradio interface to process uploaded file (PDF or image) and display structured results."""
445
  if file is None:
446
  return "Please upload a PDF or image file."
447
  result = process_invoice(file)
448
  return result
449
 
450
  with gr.Blocks(css=".prose a[href*='share']:has(svg) {display:none !important;}") as iface:
451
+ gr.Markdown("# Invoice Fraud Detection")
452
  with gr.Row():
453
+ file_input = gr.File(label="Upload Invoice (PDF or Image)")
454
+ result_output = gr.Markdown(label="Fraud Detection Results")
455
  file_input.change(fn=gradio_interface, inputs=file_input, outputs=result_output)
456
 
457
  if __name__ == "__main__":
458
+ iface.launch()