Update app.py
Browse files
app.py
CHANGED
|
@@ -226,9 +226,9 @@ def weighted_fuzzy_score(s1, s2):
|
|
| 226 |
def find_best_po_match(inv, po_df):
|
| 227 |
inv_hdr = inv["invoice_header"]
|
| 228 |
inv_supplier = inv_hdr.get("supplier_name") or ""
|
|
|
|
| 229 |
inv_ship_to = inv_hdr.get("ship_to_name") or ""
|
| 230 |
inv_bill_to = inv_hdr.get("bill_to_name") or ""
|
| 231 |
-
inv_payment_terms = inv_hdr.get("payment_terms") or ""
|
| 232 |
inv_currency = inv_hdr.get("currency") or ""
|
| 233 |
inv_total_due = clean_num(inv_hdr.get("total_due"))
|
| 234 |
inv_line_items = inv.get("line_items", [])
|
|
@@ -236,9 +236,9 @@ def find_best_po_match(inv, po_df):
|
|
| 236 |
scores = []
|
| 237 |
for idx, row in po_df.iterrows():
|
| 238 |
po_supplier = row.get("Supplier Name", "")
|
|
|
|
| 239 |
po_ship_to = row.get("Ship To", "")
|
| 240 |
po_bill_to = row.get("Bill To", "")
|
| 241 |
-
po_payment_terms = row.get("Payment Terms", "")
|
| 242 |
po_currency = row.get("Currency", "")
|
| 243 |
po_total = clean_num(row.get("PO Total Value", ""))
|
| 244 |
po_desc = row.get("Item Description", "")
|
|
@@ -256,6 +256,14 @@ def find_best_po_match(inv, po_df):
|
|
| 256 |
"score": s_supplier
|
| 257 |
})
|
| 258 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
s_ship_to = weighted_fuzzy_score(inv_ship_to, po_ship_to)
|
| 260 |
field_details.append({
|
| 261 |
"field": "Ship To",
|
|
@@ -272,14 +280,6 @@ def find_best_po_match(inv, po_df):
|
|
| 272 |
"score": s_bill_to
|
| 273 |
})
|
| 274 |
|
| 275 |
-
s_terms = weighted_fuzzy_score(inv_payment_terms, po_payment_terms)
|
| 276 |
-
field_details.append({
|
| 277 |
-
"field": "Payment Terms",
|
| 278 |
-
"invoice": inv_payment_terms,
|
| 279 |
-
"po": po_payment_terms,
|
| 280 |
-
"score": s_terms
|
| 281 |
-
})
|
| 282 |
-
|
| 283 |
s_currency = weighted_fuzzy_score(inv_currency, po_currency)
|
| 284 |
field_details.append({
|
| 285 |
"field": "Currency",
|
|
@@ -334,21 +334,22 @@ def find_best_po_match(inv, po_df):
|
|
| 334 |
f"unit_score={unit_score}, amount_score={amount_score}"
|
| 335 |
)
|
| 336 |
|
|
|
|
| 337 |
total_score = (
|
| 338 |
-
s_supplier * 0.
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
s_currency * 0.
|
| 343 |
-
s_total * 0.
|
| 344 |
-
line_item_score * 0.
|
| 345 |
)
|
| 346 |
|
| 347 |
reason = (
|
| 348 |
f"Supplier match: {s_supplier}/100 (invoice: '{inv_supplier}' vs PO: '{po_supplier}'), "
|
|
|
|
| 349 |
f"Ship To: {s_ship_to}/100 (invoice: '{inv_ship_to}' vs PO: '{po_ship_to}'), "
|
| 350 |
f"Bill To: {s_bill_to}/100 (invoice: '{inv_bill_to}' vs PO: '{po_bill_to}'), "
|
| 351 |
-
f"Payment Terms: {s_terms}/100 (invoice: '{inv_payment_terms}' vs PO: '{po_payment_terms}'), "
|
| 352 |
f"Currency: {s_currency}/100 (invoice: '{inv_currency}' vs PO: '{po_currency}'), "
|
| 353 |
f"Total Due: {'match' if s_total else 'no match'} (invoice: {inv_total_due} vs PO: {po_total}), "
|
| 354 |
f"Line item best match: {int(line_item_score)}/100. {line_reason}"
|
|
@@ -357,6 +358,7 @@ def find_best_po_match(inv, po_df):
|
|
| 357 |
debug = {
|
| 358 |
"po_idx": idx,
|
| 359 |
"po_supplier": po_supplier,
|
|
|
|
| 360 |
"po_ship_to": po_ship_to,
|
| 361 |
"po_bill_to": po_bill_to,
|
| 362 |
"po_total": po_total,
|
|
@@ -445,7 +447,7 @@ def extract_text_from_unstract(uploaded_file):
|
|
| 445 |
if status == "processed":
|
| 446 |
status_placeholder.info("EZOFIS AI OCR AGENT STATUS: processed! 🎉")
|
| 447 |
break
|
| 448 |
-
status_placeholder.info(f"
|
| 449 |
time.sleep(2)
|
| 450 |
else:
|
| 451 |
status_placeholder.error("Unstract: Timeout waiting for OCR to finish.")
|
|
|
|
| 226 |
def find_best_po_match(inv, po_df):
|
| 227 |
inv_hdr = inv["invoice_header"]
|
| 228 |
inv_supplier = inv_hdr.get("supplier_name") or ""
|
| 229 |
+
inv_po_number = inv_hdr.get("purchase_order_number") or inv_hdr.get("po_number") or inv_hdr.get("order_number") or ""
|
| 230 |
inv_ship_to = inv_hdr.get("ship_to_name") or ""
|
| 231 |
inv_bill_to = inv_hdr.get("bill_to_name") or ""
|
|
|
|
| 232 |
inv_currency = inv_hdr.get("currency") or ""
|
| 233 |
inv_total_due = clean_num(inv_hdr.get("total_due"))
|
| 234 |
inv_line_items = inv.get("line_items", [])
|
|
|
|
| 236 |
scores = []
|
| 237 |
for idx, row in po_df.iterrows():
|
| 238 |
po_supplier = row.get("Supplier Name", "")
|
| 239 |
+
po_po_number = str(row.get("PO Number", "")) # Make sure string
|
| 240 |
po_ship_to = row.get("Ship To", "")
|
| 241 |
po_bill_to = row.get("Bill To", "")
|
|
|
|
| 242 |
po_currency = row.get("Currency", "")
|
| 243 |
po_total = clean_num(row.get("PO Total Value", ""))
|
| 244 |
po_desc = row.get("Item Description", "")
|
|
|
|
| 256 |
"score": s_supplier
|
| 257 |
})
|
| 258 |
|
| 259 |
+
s_po_number = weighted_fuzzy_score(inv_po_number, po_po_number)
|
| 260 |
+
field_details.append({
|
| 261 |
+
"field": "PO Number",
|
| 262 |
+
"invoice": inv_po_number,
|
| 263 |
+
"po": po_po_number,
|
| 264 |
+
"score": s_po_number
|
| 265 |
+
})
|
| 266 |
+
|
| 267 |
s_ship_to = weighted_fuzzy_score(inv_ship_to, po_ship_to)
|
| 268 |
field_details.append({
|
| 269 |
"field": "Ship To",
|
|
|
|
| 280 |
"score": s_bill_to
|
| 281 |
})
|
| 282 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
s_currency = weighted_fuzzy_score(inv_currency, po_currency)
|
| 284 |
field_details.append({
|
| 285 |
"field": "Currency",
|
|
|
|
| 334 |
f"unit_score={unit_score}, amount_score={amount_score}"
|
| 335 |
)
|
| 336 |
|
| 337 |
+
# Adjust scoring weights: Supplier 20%, PO Number 15%, Ship To 10%, Bill To 10%, Currency 10%, Total Due 20%, Line Item 15%
|
| 338 |
total_score = (
|
| 339 |
+
s_supplier * 0.20 +
|
| 340 |
+
s_po_number * 0.15 +
|
| 341 |
+
s_ship_to * 0.10 +
|
| 342 |
+
s_bill_to * 0.10 +
|
| 343 |
+
s_currency * 0.10 +
|
| 344 |
+
s_total * 0.20 +
|
| 345 |
+
line_item_score * 0.15
|
| 346 |
)
|
| 347 |
|
| 348 |
reason = (
|
| 349 |
f"Supplier match: {s_supplier}/100 (invoice: '{inv_supplier}' vs PO: '{po_supplier}'), "
|
| 350 |
+
f"PO Number: {s_po_number}/100 (invoice: '{inv_po_number}' vs PO: '{po_po_number}'), "
|
| 351 |
f"Ship To: {s_ship_to}/100 (invoice: '{inv_ship_to}' vs PO: '{po_ship_to}'), "
|
| 352 |
f"Bill To: {s_bill_to}/100 (invoice: '{inv_bill_to}' vs PO: '{po_bill_to}'), "
|
|
|
|
| 353 |
f"Currency: {s_currency}/100 (invoice: '{inv_currency}' vs PO: '{po_currency}'), "
|
| 354 |
f"Total Due: {'match' if s_total else 'no match'} (invoice: {inv_total_due} vs PO: {po_total}), "
|
| 355 |
f"Line item best match: {int(line_item_score)}/100. {line_reason}"
|
|
|
|
| 358 |
debug = {
|
| 359 |
"po_idx": idx,
|
| 360 |
"po_supplier": po_supplier,
|
| 361 |
+
"po_po_number": po_po_number,
|
| 362 |
"po_ship_to": po_ship_to,
|
| 363 |
"po_bill_to": po_bill_to,
|
| 364 |
"po_total": po_total,
|
|
|
|
| 447 |
if status == "processed":
|
| 448 |
status_placeholder.info("EZOFIS AI OCR AGENT STATUS: processed! 🎉")
|
| 449 |
break
|
| 450 |
+
status_placeholder.info(f"EZOFIS AI OCR AGENT STATUS: {status or 'waiting'}... ({i+1})")
|
| 451 |
time.sleep(2)
|
| 452 |
else:
|
| 453 |
status_placeholder.error("Unstract: Timeout waiting for OCR to finish.")
|