Seth0330 commited on
Commit
0ee76cc
·
verified ·
1 Parent(s): 0f1d0fa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -63
app.py CHANGED
@@ -10,7 +10,6 @@ from langchain_community.chat_models import ChatOpenAI
10
  from langchain.agents import initialize_agent, Tool, AgentType
11
  from fuzzywuzzy import fuzz
12
 
13
- # --- CONFIGURATION ---
14
  st.set_page_config(page_title="Accounts Payable AI Agent", layout="wide")
15
 
16
  MODELS = {
@@ -21,27 +20,9 @@ MODELS = {
21
  "response_format": None,
22
  "extra_headers": {},
23
  },
24
- # Keep other models for extraction/fine-tune if you want, but only OpenAI used for agent
25
- "DeepSeek v3": {
26
- "api_url": "https://api.deepseek.com/v1/chat/completions",
27
- "model": "deepseek-chat",
28
- "key_env": "DEEPSEEK_API_KEY",
29
- "response_format": {"type": "json_object"},
30
- },
31
- "Mistral Small": {
32
- "api_url": "https://openrouter.ai/api/v1/chat/completions",
33
- "model": "mistralai/ministral-8b",
34
- "key_env": "OPENROUTER_API_KEY",
35
- "response_format": {"type": "json_object"},
36
- "extra_headers": {
37
- "HTTP-Referer": "https://huggingface.co",
38
- "X-Title": "Invoice Extractor",
39
- },
40
- },
41
  }
42
 
43
- # --- UTILITY FUNCTIONS ---
44
-
45
  def get_api_key(model_choice):
46
  key = os.getenv(MODELS[model_choice]["key_env"])
47
  if not key:
@@ -231,16 +212,14 @@ def find_po_number_in_json(po_number, invoice_json):
231
  fields.append(str(obj))
232
  return fields
233
 
234
- # Clean up PO number
235
  po_str = str(po_number).strip().replace(" ", "").replace(".0", "")
236
  try:
237
- po_int = str(int(float(po_number))) # also get int version
238
  except:
239
  po_int = po_str
240
 
241
  all_strs = [str(s).strip().replace(" ", "").replace(".0", "") for s in _flatten(invoice_json)]
242
 
243
- # Check for exact match, or substring match, with all variants
244
  for s in all_strs:
245
  if not s:
246
  continue
@@ -250,13 +229,10 @@ def find_po_number_in_json(po_number, invoice_json):
250
  return True
251
  return False
252
 
253
-
254
  def find_best_po_match(inv, po_df):
255
  inv_hdr = inv["invoice_header"]
256
  inv_supplier = inv_hdr.get("supplier_name") or ""
257
  inv_po_number = inv_hdr.get("purchase_order_number") or inv_hdr.get("po_number") or inv_hdr.get("order_number") or ""
258
- inv_ship_to = inv_hdr.get("ship_to_name") or ""
259
- inv_bill_to = inv_hdr.get("bill_to_name") or ""
260
  inv_currency = inv_hdr.get("currency") or ""
261
  inv_total_due = clean_num(inv_hdr.get("total_due"))
262
  inv_line_items = inv.get("line_items", [])
@@ -264,9 +240,7 @@ def find_best_po_match(inv, po_df):
264
  scores = []
265
  for idx, row in po_df.iterrows():
266
  po_supplier = row.get("Supplier Name", "")
267
- po_po_number = str(row.get("PO Number", "")) # Make sure string
268
- po_ship_to = row.get("Ship To", "")
269
- po_bill_to = row.get("Bill To", "")
270
  po_currency = row.get("Currency", "")
271
  po_total = clean_num(row.get("PO Total Value", ""))
272
  po_desc = row.get("Item Description", "")
@@ -284,7 +258,6 @@ def find_best_po_match(inv, po_df):
284
  "score": s_supplier
285
  })
286
 
287
- # PO Number scoring: anywhere in JSON
288
  s_po_number = 100 if find_po_number_in_json(po_po_number, inv) else 0
289
  field_details.append({
290
  "field": "PO Number (anywhere in JSON)",
@@ -293,22 +266,6 @@ def find_best_po_match(inv, po_df):
293
  "score": s_po_number
294
  })
295
 
296
- s_ship_to = weighted_fuzzy_score(inv_ship_to, po_ship_to)
297
- field_details.append({
298
- "field": "Ship To",
299
- "invoice": inv_ship_to,
300
- "po": po_ship_to,
301
- "score": s_ship_to
302
- })
303
-
304
- s_bill_to = weighted_fuzzy_score(inv_bill_to, po_bill_to)
305
- field_details.append({
306
- "field": "Bill To",
307
- "invoice": inv_bill_to,
308
- "po": po_bill_to,
309
- "score": s_bill_to
310
- })
311
-
312
  s_currency = weighted_fuzzy_score(inv_currency, po_currency)
313
  field_details.append({
314
  "field": "Currency",
@@ -325,7 +282,7 @@ def find_best_po_match(inv, po_df):
325
  "score": s_total
326
  })
327
 
328
- # Check for at least one line item strong match
329
  line_item_score = 0
330
  line_reason = ""
331
  best_line_detail = None
@@ -363,22 +320,18 @@ def find_best_po_match(inv, po_df):
363
  f"unit_score={unit_score}, amount_score={amount_score}"
364
  )
365
 
366
- # Adjust scoring weights: Supplier 20%, PO Number 15%, Ship To 10%, Bill To 10%, Currency 10%, Total Due 20%, Line Item 15%
367
  total_score = (
368
- s_supplier * 0.20 +
369
- s_po_number * 0.15 +
370
- s_ship_to * 0.10 +
371
- s_bill_to * 0.10 +
372
  s_currency * 0.10 +
373
  s_total * 0.20 +
374
- line_item_score * 0.15
375
  )
376
 
377
  reason = (
378
  f"Supplier match: {s_supplier}/100 (invoice: '{inv_supplier}' vs PO: '{po_supplier}'), "
379
  f"PO Number: {s_po_number}/100 ({'found anywhere in JSON' if s_po_number else 'not found'}), "
380
- f"Ship To: {s_ship_to}/100 (invoice: '{inv_ship_to}' vs PO: '{po_ship_to}'), "
381
- f"Bill To: {s_bill_to}/100 (invoice: '{inv_bill_to}' vs PO: '{po_bill_to}'), "
382
  f"Currency: {s_currency}/100 (invoice: '{inv_currency}' vs PO: '{po_currency}'), "
383
  f"Total Due: {'match' if s_total else 'no match'} (invoice: {inv_total_due} vs PO: {po_total}), "
384
  f"Line item best match: {int(line_item_score)}/100. {line_reason}"
@@ -388,8 +341,6 @@ def find_best_po_match(inv, po_df):
388
  "po_idx": idx,
389
  "po_supplier": po_supplier,
390
  "po_po_number": po_po_number,
391
- "po_ship_to": po_ship_to,
392
- "po_bill_to": po_bill_to,
393
  "po_total": po_total,
394
  "scores": field_details,
395
  "line_item_score": line_item_score,
@@ -407,7 +358,6 @@ def find_best_po_match(inv, po_df):
407
  best_row, best_score, reason, debug = scores[0]
408
  return best_row, best_score, reason, debug
409
 
410
-
411
  def extract_invoice_info(model_choice, text):
412
  prompt = get_extraction_prompt(model_choice, text)
413
  raw = query_llm(model_choice, prompt)
@@ -533,6 +483,11 @@ if st.button("Extract") and inv_file:
533
  extracted_info = st.session_state.get('last_extracted_info', None)
534
  po_df = st.session_state.get('last_po_df', None)
535
 
 
 
 
 
 
536
  def po_match_tool_func(input_text):
537
  invoice = st.session_state.get("last_extracted_info")
538
  po_df = st.session_state.get("last_po_df")
@@ -545,9 +500,9 @@ def po_match_tool_func(input_text):
545
 
546
  best_row, best_score, reason, debug = find_best_po_match(invoice, po_df)
547
 
548
- if best_score > 85:
549
  status = "APPROVED"
550
- elif best_score > 70:
551
  status = "PARTIALLY APPROVED"
552
  else:
553
  status = "REJECTED"
@@ -564,7 +519,7 @@ if po_df is not None:
564
 
565
  if extracted_info is not None and po_df is not None:
566
  st.markdown("---")
567
- st.subheader("EZOFIS AP AGENT Decision")
568
  if st.button("Make a decision (EZOFIS AP AGENT)"):
569
  tools = [
570
  Tool(
@@ -573,7 +528,6 @@ if extracted_info is not None and po_df is not None:
573
  description="Smartly match invoice to PO using all possible fields.",
574
  )
575
  ]
576
- # Always use OpenAI GPT-4.1 for agent reasoning
577
  decision_llm = ChatOpenAI(
578
  openai_api_key=get_api_key("OpenAI GPT-4.1"),
579
  model=MODELS["OpenAI GPT-4.1"]["model"],
@@ -588,7 +542,7 @@ if extracted_info is not None and po_df is not None:
588
  )
589
  prompt = (
590
  "You are an expert accounts payable agent. "
591
- "Use po_match_tool to check for the best possible match using supplier, PO number (which may appear anywhere in the invoice JSON, even within other fields), ship to, bill to, currency, line items, and total value. "
592
  "Weigh the importance of each field as an expert would. "
593
  "Return a JSON with decision (APPROVED, PARTIALLY APPROVED, REJECTED), reason (include field scores and reasoning), debug, and the best matched PO row.\n"
594
  f"Invoice JSON:\n{json.dumps(extracted_info, indent=2)}"
 
10
  from langchain.agents import initialize_agent, Tool, AgentType
11
  from fuzzywuzzy import fuzz
12
 
 
13
  st.set_page_config(page_title="Accounts Payable AI Agent", layout="wide")
14
 
15
  MODELS = {
 
20
  "response_format": None,
21
  "extra_headers": {},
22
  },
23
+ # You can add other models here for extraction/fine-tune if desired.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  }
25
 
 
 
26
  def get_api_key(model_choice):
27
  key = os.getenv(MODELS[model_choice]["key_env"])
28
  if not key:
 
212
  fields.append(str(obj))
213
  return fields
214
 
 
215
  po_str = str(po_number).strip().replace(" ", "").replace(".0", "")
216
  try:
217
+ po_int = str(int(float(po_number)))
218
  except:
219
  po_int = po_str
220
 
221
  all_strs = [str(s).strip().replace(" ", "").replace(".0", "") for s in _flatten(invoice_json)]
222
 
 
223
  for s in all_strs:
224
  if not s:
225
  continue
 
229
  return True
230
  return False
231
 
 
232
  def find_best_po_match(inv, po_df):
233
  inv_hdr = inv["invoice_header"]
234
  inv_supplier = inv_hdr.get("supplier_name") or ""
235
  inv_po_number = inv_hdr.get("purchase_order_number") or inv_hdr.get("po_number") or inv_hdr.get("order_number") or ""
 
 
236
  inv_currency = inv_hdr.get("currency") or ""
237
  inv_total_due = clean_num(inv_hdr.get("total_due"))
238
  inv_line_items = inv.get("line_items", [])
 
240
  scores = []
241
  for idx, row in po_df.iterrows():
242
  po_supplier = row.get("Supplier Name", "")
243
+ po_po_number = str(row.get("PO Number", ""))
 
 
244
  po_currency = row.get("Currency", "")
245
  po_total = clean_num(row.get("PO Total Value", ""))
246
  po_desc = row.get("Item Description", "")
 
258
  "score": s_supplier
259
  })
260
 
 
261
  s_po_number = 100 if find_po_number_in_json(po_po_number, inv) else 0
262
  field_details.append({
263
  "field": "PO Number (anywhere in JSON)",
 
266
  "score": s_po_number
267
  })
268
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
  s_currency = weighted_fuzzy_score(inv_currency, po_currency)
270
  field_details.append({
271
  "field": "Currency",
 
282
  "score": s_total
283
  })
284
 
285
+ # Line item logic as before
286
  line_item_score = 0
287
  line_reason = ""
288
  best_line_detail = None
 
320
  f"unit_score={unit_score}, amount_score={amount_score}"
321
  )
322
 
323
+ # Adjust scoring weights: Supplier 25%, PO Number 25%, Currency 10%, Total Due 20%, Line Item 20%
324
  total_score = (
325
+ s_supplier * 0.25 +
326
+ s_po_number * 0.25 +
 
 
327
  s_currency * 0.10 +
328
  s_total * 0.20 +
329
+ line_item_score * 0.20
330
  )
331
 
332
  reason = (
333
  f"Supplier match: {s_supplier}/100 (invoice: '{inv_supplier}' vs PO: '{po_supplier}'), "
334
  f"PO Number: {s_po_number}/100 ({'found anywhere in JSON' if s_po_number else 'not found'}), "
 
 
335
  f"Currency: {s_currency}/100 (invoice: '{inv_currency}' vs PO: '{po_currency}'), "
336
  f"Total Due: {'match' if s_total else 'no match'} (invoice: {inv_total_due} vs PO: {po_total}), "
337
  f"Line item best match: {int(line_item_score)}/100. {line_reason}"
 
341
  "po_idx": idx,
342
  "po_supplier": po_supplier,
343
  "po_po_number": po_po_number,
 
 
344
  "po_total": po_total,
345
  "scores": field_details,
346
  "line_item_score": line_item_score,
 
358
  best_row, best_score, reason, debug = scores[0]
359
  return best_row, best_score, reason, debug
360
 
 
361
  def extract_invoice_info(model_choice, text):
362
  prompt = get_extraction_prompt(model_choice, text)
363
  raw = query_llm(model_choice, prompt)
 
483
  extracted_info = st.session_state.get('last_extracted_info', None)
484
  po_df = st.session_state.get('last_po_df', None)
485
 
486
+ # UI for scoring thresholds
487
+ st.sidebar.header("Set Decision Thresholds")
488
+ approved_threshold = st.sidebar.slider("Threshold for 'APPROVED'", min_value=0, max_value=100, value=85)
489
+ partial_threshold = st.sidebar.slider("Threshold for 'PARTIALLY APPROVED'", min_value=0, max_value=approved_threshold-1, value=70)
490
+
491
  def po_match_tool_func(input_text):
492
  invoice = st.session_state.get("last_extracted_info")
493
  po_df = st.session_state.get("last_po_df")
 
500
 
501
  best_row, best_score, reason, debug = find_best_po_match(invoice, po_df)
502
 
503
+ if best_score > approved_threshold:
504
  status = "APPROVED"
505
+ elif best_score > partial_threshold:
506
  status = "PARTIALLY APPROVED"
507
  else:
508
  status = "REJECTED"
 
519
 
520
  if extracted_info is not None and po_df is not None:
521
  st.markdown("---")
522
+ st.subheader("EZOFIS AP AGENT Decision (OpenAI Only)")
523
  if st.button("Make a decision (EZOFIS AP AGENT)"):
524
  tools = [
525
  Tool(
 
528
  description="Smartly match invoice to PO using all possible fields.",
529
  )
530
  ]
 
531
  decision_llm = ChatOpenAI(
532
  openai_api_key=get_api_key("OpenAI GPT-4.1"),
533
  model=MODELS["OpenAI GPT-4.1"]["model"],
 
542
  )
543
  prompt = (
544
  "You are an expert accounts payable agent. "
545
+ "Use po_match_tool to check for the best possible match using supplier, PO number (which may appear anywhere in the invoice JSON, even within other fields), currency, line items, and total value. "
546
  "Weigh the importance of each field as an expert would. "
547
  "Return a JSON with decision (APPROVED, PARTIALLY APPROVED, REJECTED), reason (include field scores and reasoning), debug, and the best matched PO row.\n"
548
  f"Invoice JSON:\n{json.dumps(extracted_info, indent=2)}"