Seth0330 commited on
Commit
a34f52b
·
verified ·
1 Parent(s): 1d61d87

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -19
app.py CHANGED
@@ -226,9 +226,9 @@ def weighted_fuzzy_score(s1, s2):
226
  def find_best_po_match(inv, po_df):
227
  inv_hdr = inv["invoice_header"]
228
  inv_supplier = inv_hdr.get("supplier_name") or ""
 
229
  inv_ship_to = inv_hdr.get("ship_to_name") or ""
230
  inv_bill_to = inv_hdr.get("bill_to_name") or ""
231
- inv_payment_terms = inv_hdr.get("payment_terms") or ""
232
  inv_currency = inv_hdr.get("currency") or ""
233
  inv_total_due = clean_num(inv_hdr.get("total_due"))
234
  inv_line_items = inv.get("line_items", [])
@@ -236,9 +236,9 @@ def find_best_po_match(inv, po_df):
236
  scores = []
237
  for idx, row in po_df.iterrows():
238
  po_supplier = row.get("Supplier Name", "")
 
239
  po_ship_to = row.get("Ship To", "")
240
  po_bill_to = row.get("Bill To", "")
241
- po_payment_terms = row.get("Payment Terms", "")
242
  po_currency = row.get("Currency", "")
243
  po_total = clean_num(row.get("PO Total Value", ""))
244
  po_desc = row.get("Item Description", "")
@@ -256,6 +256,14 @@ def find_best_po_match(inv, po_df):
256
  "score": s_supplier
257
  })
258
 
 
 
 
 
 
 
 
 
259
  s_ship_to = weighted_fuzzy_score(inv_ship_to, po_ship_to)
260
  field_details.append({
261
  "field": "Ship To",
@@ -272,14 +280,6 @@ def find_best_po_match(inv, po_df):
272
  "score": s_bill_to
273
  })
274
 
275
- s_terms = weighted_fuzzy_score(inv_payment_terms, po_payment_terms)
276
- field_details.append({
277
- "field": "Payment Terms",
278
- "invoice": inv_payment_terms,
279
- "po": po_payment_terms,
280
- "score": s_terms
281
- })
282
-
283
  s_currency = weighted_fuzzy_score(inv_currency, po_currency)
284
  field_details.append({
285
  "field": "Currency",
@@ -334,21 +334,22 @@ def find_best_po_match(inv, po_df):
334
  f"unit_score={unit_score}, amount_score={amount_score}"
335
  )
336
 
 
337
  total_score = (
338
- s_supplier * 0.25 +
339
- s_ship_to * 0.1 +
340
- s_bill_to * 0.1 +
341
- s_terms * 0.1 +
342
- s_currency * 0.05 +
343
- s_total * 0.2 +
344
- line_item_score * 0.2
345
  )
346
 
347
  reason = (
348
  f"Supplier match: {s_supplier}/100 (invoice: '{inv_supplier}' vs PO: '{po_supplier}'), "
 
349
  f"Ship To: {s_ship_to}/100 (invoice: '{inv_ship_to}' vs PO: '{po_ship_to}'), "
350
  f"Bill To: {s_bill_to}/100 (invoice: '{inv_bill_to}' vs PO: '{po_bill_to}'), "
351
- f"Payment Terms: {s_terms}/100 (invoice: '{inv_payment_terms}' vs PO: '{po_payment_terms}'), "
352
  f"Currency: {s_currency}/100 (invoice: '{inv_currency}' vs PO: '{po_currency}'), "
353
  f"Total Due: {'match' if s_total else 'no match'} (invoice: {inv_total_due} vs PO: {po_total}), "
354
  f"Line item best match: {int(line_item_score)}/100. {line_reason}"
@@ -357,6 +358,7 @@ def find_best_po_match(inv, po_df):
357
  debug = {
358
  "po_idx": idx,
359
  "po_supplier": po_supplier,
 
360
  "po_ship_to": po_ship_to,
361
  "po_bill_to": po_bill_to,
362
  "po_total": po_total,
@@ -445,7 +447,7 @@ def extract_text_from_unstract(uploaded_file):
445
  if status == "processed":
446
  status_placeholder.info("EZOFIS AI OCR AGENT STATUS: processed! 🎉")
447
  break
448
- status_placeholder.info(f"Unstract status: {status or 'waiting'}... ({i+1})")
449
  time.sleep(2)
450
  else:
451
  status_placeholder.error("Unstract: Timeout waiting for OCR to finish.")
 
226
  def find_best_po_match(inv, po_df):
227
  inv_hdr = inv["invoice_header"]
228
  inv_supplier = inv_hdr.get("supplier_name") or ""
229
+ inv_po_number = inv_hdr.get("purchase_order_number") or inv_hdr.get("po_number") or inv_hdr.get("order_number") or ""
230
  inv_ship_to = inv_hdr.get("ship_to_name") or ""
231
  inv_bill_to = inv_hdr.get("bill_to_name") or ""
 
232
  inv_currency = inv_hdr.get("currency") or ""
233
  inv_total_due = clean_num(inv_hdr.get("total_due"))
234
  inv_line_items = inv.get("line_items", [])
 
236
  scores = []
237
  for idx, row in po_df.iterrows():
238
  po_supplier = row.get("Supplier Name", "")
239
+ po_po_number = str(row.get("PO Number", "")) # Make sure string
240
  po_ship_to = row.get("Ship To", "")
241
  po_bill_to = row.get("Bill To", "")
 
242
  po_currency = row.get("Currency", "")
243
  po_total = clean_num(row.get("PO Total Value", ""))
244
  po_desc = row.get("Item Description", "")
 
256
  "score": s_supplier
257
  })
258
 
259
+ s_po_number = weighted_fuzzy_score(inv_po_number, po_po_number)
260
+ field_details.append({
261
+ "field": "PO Number",
262
+ "invoice": inv_po_number,
263
+ "po": po_po_number,
264
+ "score": s_po_number
265
+ })
266
+
267
  s_ship_to = weighted_fuzzy_score(inv_ship_to, po_ship_to)
268
  field_details.append({
269
  "field": "Ship To",
 
280
  "score": s_bill_to
281
  })
282
 
 
 
 
 
 
 
 
 
283
  s_currency = weighted_fuzzy_score(inv_currency, po_currency)
284
  field_details.append({
285
  "field": "Currency",
 
334
  f"unit_score={unit_score}, amount_score={amount_score}"
335
  )
336
 
337
+ # Adjust scoring weights: Supplier 20%, PO Number 15%, Ship To 10%, Bill To 10%, Currency 10%, Total Due 20%, Line Item 15%
338
  total_score = (
339
+ s_supplier * 0.20 +
340
+ s_po_number * 0.15 +
341
+ s_ship_to * 0.10 +
342
+ s_bill_to * 0.10 +
343
+ s_currency * 0.10 +
344
+ s_total * 0.20 +
345
+ line_item_score * 0.15
346
  )
347
 
348
  reason = (
349
  f"Supplier match: {s_supplier}/100 (invoice: '{inv_supplier}' vs PO: '{po_supplier}'), "
350
+ f"PO Number: {s_po_number}/100 (invoice: '{inv_po_number}' vs PO: '{po_po_number}'), "
351
  f"Ship To: {s_ship_to}/100 (invoice: '{inv_ship_to}' vs PO: '{po_ship_to}'), "
352
  f"Bill To: {s_bill_to}/100 (invoice: '{inv_bill_to}' vs PO: '{po_bill_to}'), "
 
353
  f"Currency: {s_currency}/100 (invoice: '{inv_currency}' vs PO: '{po_currency}'), "
354
  f"Total Due: {'match' if s_total else 'no match'} (invoice: {inv_total_due} vs PO: {po_total}), "
355
  f"Line item best match: {int(line_item_score)}/100. {line_reason}"
 
358
  debug = {
359
  "po_idx": idx,
360
  "po_supplier": po_supplier,
361
+ "po_po_number": po_po_number,
362
  "po_ship_to": po_ship_to,
363
  "po_bill_to": po_bill_to,
364
  "po_total": po_total,
 
447
  if status == "processed":
448
  status_placeholder.info("EZOFIS AI OCR AGENT STATUS: processed! 🎉")
449
  break
450
+ status_placeholder.info(f"EZOFIS AI OCR AGENT STATUS: {status or 'waiting'}... ({i+1})")
451
  time.sleep(2)
452
  else:
453
  status_placeholder.error("Unstract: Timeout waiting for OCR to finish.")