Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -195,7 +195,6 @@ def get_extraction_prompt(model_choice, txt):
|
|
| 195 |
)
|
| 196 |
|
| 197 |
def ensure_total_due(invoice_header):
|
| 198 |
-
# If total_due is missing, try to find a close equivalent
|
| 199 |
if invoice_header.get("total_due") in [None, ""]:
|
| 200 |
for field in ["invoice_total", "invoice_value", "total_before_tax", "balance_due", "amount_paid"]:
|
| 201 |
if field in invoice_header and invoice_header[field]:
|
|
@@ -203,33 +202,6 @@ def ensure_total_due(invoice_header):
|
|
| 203 |
break
|
| 204 |
return invoice_header
|
| 205 |
|
| 206 |
-
def extract_invoice_info(model_choice, text):
|
| 207 |
-
prompt = get_extraction_prompt(model_choice, text)
|
| 208 |
-
raw = query_llm(model_choice, prompt)
|
| 209 |
-
if not raw:
|
| 210 |
-
return None
|
| 211 |
-
data = clean_json_response(raw)
|
| 212 |
-
if not data:
|
| 213 |
-
return None
|
| 214 |
-
hdr = data.get("invoice_header", {})
|
| 215 |
-
if not hdr and any(k in data for k in ("invoice_number","supplier_name","customer_name")):
|
| 216 |
-
hdr = data
|
| 217 |
-
for k in ("invoice_number","invoice_date","po_number","invoice_value","supplier_name","customer_name"):
|
| 218 |
-
hdr.setdefault(k, None)
|
| 219 |
-
if not hdr.get("supplier_name"):
|
| 220 |
-
hdr["supplier_name"] = fallback_supplier(text)
|
| 221 |
-
# Guarantee total_due is always present (if at all possible)
|
| 222 |
-
hdr = ensure_total_due(hdr)
|
| 223 |
-
items = data.get("line_items", [])
|
| 224 |
-
if not isinstance(items, list):
|
| 225 |
-
items = []
|
| 226 |
-
for itm in items:
|
| 227 |
-
if not isinstance(itm, dict):
|
| 228 |
-
continue
|
| 229 |
-
for k in ("item_number","description","quantity","unit_price","total_price"):
|
| 230 |
-
itm.setdefault(k, None)
|
| 231 |
-
return {"invoice_header": hdr, "line_items": items}
|
| 232 |
-
|
| 233 |
def get_content_type(filename):
|
| 234 |
mime, _ = mimetypes.guess_type(filename)
|
| 235 |
ext = filename.lower().split('.')[-1]
|
|
@@ -289,36 +261,96 @@ def extract_text_from_unstract(uploaded_file):
|
|
| 289 |
except Exception:
|
| 290 |
return r.text
|
| 291 |
|
| 292 |
-
def
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
- 'Invoice Total USD 9,070.26' -> 9070.26
|
| 297 |
-
- '$194.41' -> 194.41
|
| 298 |
-
- 194.41 -> 194.41
|
| 299 |
-
"""
|
| 300 |
-
if val is None:
|
| 301 |
-
return None
|
| 302 |
-
if isinstance(val, (int, float)):
|
| 303 |
-
return float(val)
|
| 304 |
-
# Find *all* numbers in the string (with commas, decimals, etc.)
|
| 305 |
-
matches = re.findall(r"[-+]?\d[\d,]*\.?\d*", str(val))
|
| 306 |
-
if matches:
|
| 307 |
-
# Pick the number with the most digits after removing commas
|
| 308 |
-
cleaned = [m.replace(',', '') for m in matches if m]
|
| 309 |
-
if cleaned:
|
| 310 |
-
# Return the largest float (usually the total)
|
| 311 |
-
as_floats = [float(c) for c in cleaned if c.replace('.', '', 1).isdigit()]
|
| 312 |
-
if as_floats:
|
| 313 |
-
# Pick the biggest one (most likely to be the invoice total)
|
| 314 |
-
return max(as_floats)
|
| 315 |
-
return None
|
| 316 |
-
|
| 317 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 318 |
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
|
|
|
|
|
|
|
|
|
| 322 |
|
| 323 |
st.sidebar.header("Step 1: Upload Active Purchase Orders (POs)")
|
| 324 |
po_file = st.sidebar.file_uploader(
|
|
@@ -344,6 +376,7 @@ if st.button("Extract") and inv_file:
|
|
| 344 |
with st.spinner("Extracting text from document using Unstract..."):
|
| 345 |
text = extract_text_from_unstract(inv_file)
|
| 346 |
if text:
|
|
|
|
| 347 |
extracted_info = extract_invoice_info(mdl, text)
|
| 348 |
if extracted_info:
|
| 349 |
if "invoice_header" in extracted_info:
|
|
@@ -355,126 +388,60 @@ if st.button("Extract") and inv_file:
|
|
| 355 |
st.table(extracted_info["line_items"])
|
| 356 |
st.session_state['last_extracted_info'] = extracted_info
|
| 357 |
|
| 358 |
-
# Always retrieve latest extracted info and PO df from session state!
|
| 359 |
extracted_info = st.session_state.get('last_extracted_info', None)
|
| 360 |
po_df = st.session_state.get('last_po_df', None)
|
| 361 |
|
| 362 |
def po_match_tool_func(input_text):
|
| 363 |
invoice = st.session_state.get("last_extracted_info")
|
| 364 |
po_df = st.session_state.get("last_po_df")
|
| 365 |
-
debug = {}
|
| 366 |
if invoice is None or po_df is None:
|
| 367 |
return json.dumps({
|
| 368 |
"decision": "REJECTED",
|
| 369 |
"reason": "Invoice or PO data not found.",
|
| 370 |
-
"debug":
|
| 371 |
})
|
| 372 |
|
| 373 |
-
|
| 374 |
-
inv_po_number = (inv_hdr.get("purchase_order_number") or
|
| 375 |
-
inv_hdr.get("order_number") or
|
| 376 |
-
inv_hdr.get("our_order_number") or "")
|
| 377 |
-
inv_supplier = inv_hdr.get("supplier_name") or ""
|
| 378 |
-
inv_total = inv_hdr.get("total_due") # <<--- ALWAYS USE total_due
|
| 379 |
-
inv_total = clean_num(inv_total)
|
| 380 |
-
inv_line_items = invoice.get("line_items", [])
|
| 381 |
-
|
| 382 |
-
debug["inv_po_number"] = inv_po_number
|
| 383 |
-
debug["inv_supplier"] = inv_supplier
|
| 384 |
-
debug["inv_total"] = inv_total
|
| 385 |
-
|
| 386 |
-
explanation = []
|
| 387 |
-
best_match = None
|
| 388 |
-
best_match_type = None
|
| 389 |
-
match_row_debug = None
|
| 390 |
-
|
| 391 |
-
for idx, row in po_df.iterrows():
|
| 392 |
-
po_number = str(row.get("PO Number", ""))
|
| 393 |
-
po_number_clean = normalize(po_number)
|
| 394 |
-
inv_po_number_clean = normalize(inv_po_number)
|
| 395 |
-
supplier = str(row.get("Supplier Name", ""))
|
| 396 |
-
supplier_clean = normalize(supplier)
|
| 397 |
-
inv_supplier_clean = normalize(inv_supplier)
|
| 398 |
-
po_total = clean_num(row.get("Total PO Value", ""))
|
| 399 |
-
po_desc = str(row.get("Description", "")).lower()
|
| 400 |
-
|
| 401 |
-
po_match = (po_number_clean in inv_po_number_clean or inv_po_number_clean in po_number_clean) and po_number_clean
|
| 402 |
-
supplier_score = fuzz.token_set_ratio(supplier, inv_supplier)
|
| 403 |
-
supplier_match = supplier_score >= 90
|
| 404 |
-
total_match = False
|
| 405 |
-
if po_total is not None and inv_total is not None:
|
| 406 |
-
total_match = abs(po_total - inv_total) < 1 # $1 tolerance
|
| 407 |
-
|
| 408 |
-
debug_row = {
|
| 409 |
-
"row_po_number": po_number,
|
| 410 |
-
"row_supplier": supplier,
|
| 411 |
-
"row_total": po_total,
|
| 412 |
-
"po_match": po_match,
|
| 413 |
-
"supplier_score": supplier_score,
|
| 414 |
-
"supplier_match": supplier_match,
|
| 415 |
-
"total_match": total_match,
|
| 416 |
-
"row_desc": po_desc,
|
| 417 |
-
}
|
| 418 |
-
|
| 419 |
-
if po_match and supplier_match and total_match:
|
| 420 |
-
best_match = row
|
| 421 |
-
best_match_type = "APPROVED"
|
| 422 |
-
explanation.append(f"PO Number, Supplier Name, and Total Due all matched. PO: {row.to_dict()}")
|
| 423 |
-
match_row_debug = debug_row
|
| 424 |
-
break
|
| 425 |
-
elif (po_match or supplier_match) and not total_match:
|
| 426 |
-
best_match = row
|
| 427 |
-
best_match_type = "PARTIALLY APPROVED"
|
| 428 |
-
fields = []
|
| 429 |
-
if po_match:
|
| 430 |
-
fields.append("PO Number matched")
|
| 431 |
-
if supplier_match:
|
| 432 |
-
fields.append("Supplier Name matched (fuzzy)")
|
| 433 |
-
explanation.append(f"{' and '.join(fields)}, but Total Due did not match. PO: {row.to_dict()}")
|
| 434 |
-
match_row_debug = debug_row
|
| 435 |
-
break
|
| 436 |
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
for line in inv_line_items:
|
| 444 |
-
desc = (line.get("description") or "").lower()
|
| 445 |
-
if not desc: continue
|
| 446 |
-
score = fuzz.token_set_ratio(desc, po_desc)
|
| 447 |
-
if (desc and po_desc and score >= 80):
|
| 448 |
-
line_item_matched = True
|
| 449 |
-
explanation.append(f"Line item '{desc}' matched PO description '{po_desc}' with score {score}. PO: {row.to_dict()}")
|
| 450 |
-
break
|
| 451 |
-
if line_item_matched and po_total is not None and inv_total is not None and abs(po_total - inv_total) < 1:
|
| 452 |
-
best_match = row
|
| 453 |
-
best_match_type = "APPROVED"
|
| 454 |
-
match_row_debug = {
|
| 455 |
-
"row_desc": po_desc,
|
| 456 |
-
"line_item_desc": desc,
|
| 457 |
-
"fuzzy_score": score,
|
| 458 |
-
"po_total": po_total,
|
| 459 |
-
"inv_total": inv_total,
|
| 460 |
-
"total_match": abs(po_total - inv_total) < 1,
|
| 461 |
-
}
|
| 462 |
-
break
|
| 463 |
|
| 464 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 465 |
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
return
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 478 |
|
| 479 |
if po_df is not None:
|
| 480 |
st.session_state["last_po_df"] = po_df
|
|
@@ -492,7 +459,7 @@ if extracted_info is not None and po_df is not None:
|
|
| 492 |
Tool(
|
| 493 |
name="po_match_tool",
|
| 494 |
func=po_match_tool_func,
|
| 495 |
-
description="
|
| 496 |
)
|
| 497 |
]
|
| 498 |
decision_llm = ChatOpenAI(
|
|
@@ -509,12 +476,9 @@ if extracted_info is not None and po_df is not None:
|
|
| 509 |
)
|
| 510 |
prompt = (
|
| 511 |
"You are an expert accounts payable agent. "
|
| 512 |
-
"Use po_match_tool to check
|
| 513 |
-
"
|
| 514 |
-
"
|
| 515 |
-
"- If neither, try matching at least one line item (by fuzzy description, quantity, or price) and require total to match for APPROVED.\n"
|
| 516 |
-
"- Otherwise, REJECTED.\n"
|
| 517 |
-
"Call the tool and return its result as-is. Do not invent or guess the answer, do not add any comments outside the JSON.\n"
|
| 518 |
f"Invoice JSON:\n{json.dumps(extracted_info, indent=2)}"
|
| 519 |
)
|
| 520 |
with st.spinner("AI is reasoning and making a decision..."):
|
|
@@ -523,10 +487,12 @@ if extracted_info is not None and po_df is not None:
|
|
| 523 |
result_json = json.loads(result)
|
| 524 |
st.write(f"**Decision:** {result_json.get('decision', 'N/A')}")
|
| 525 |
st.write(f"**Reason:** {result_json.get('reason', 'N/A')}")
|
| 526 |
-
with st.expander("Debug"):
|
| 527 |
st.json(result_json.get('debug'))
|
| 528 |
st.subheader("Extracted Invoice JSON")
|
| 529 |
st.json(extracted_info)
|
|
|
|
|
|
|
| 530 |
except Exception:
|
| 531 |
st.subheader("AI Decision & Reason")
|
| 532 |
st.write(result)
|
|
|
|
| 195 |
)
|
| 196 |
|
| 197 |
def ensure_total_due(invoice_header):
|
|
|
|
| 198 |
if invoice_header.get("total_due") in [None, ""]:
|
| 199 |
for field in ["invoice_total", "invoice_value", "total_before_tax", "balance_due", "amount_paid"]:
|
| 200 |
if field in invoice_header and invoice_header[field]:
|
|
|
|
| 202 |
break
|
| 203 |
return invoice_header
|
| 204 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
def get_content_type(filename):
|
| 206 |
mime, _ = mimetypes.guess_type(filename)
|
| 207 |
ext = filename.lower().split('.')[-1]
|
|
|
|
| 261 |
except Exception:
|
| 262 |
return r.text
|
| 263 |
|
| 264 |
+
def weighted_fuzzy_score(s1, s2):
|
| 265 |
+
if not s1 and not s2:
|
| 266 |
+
return 100
|
| 267 |
+
return fuzz.token_set_ratio(str(s1).lower(), str(s2).lower())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 268 |
|
| 269 |
+
def find_best_po_match(inv, po_df):
|
| 270 |
+
inv_hdr = inv["invoice_header"]
|
| 271 |
+
inv_supplier = inv_hdr.get("supplier_name") or ""
|
| 272 |
+
inv_ship_to = inv_hdr.get("ship_to_name") or ""
|
| 273 |
+
inv_bill_to = inv_hdr.get("bill_to_name") or ""
|
| 274 |
+
inv_payment_terms = inv_hdr.get("payment_terms") or ""
|
| 275 |
+
inv_currency = inv_hdr.get("currency") or ""
|
| 276 |
+
inv_total_due = clean_num(inv_hdr.get("total_due"))
|
| 277 |
+
inv_line_items = inv.get("line_items", [])
|
| 278 |
+
|
| 279 |
+
scores = []
|
| 280 |
+
for idx, row in po_df.iterrows():
|
| 281 |
+
po_supplier = row.get("Supplier Name", "")
|
| 282 |
+
po_ship_to = row.get("Ship To", "")
|
| 283 |
+
po_bill_to = row.get("Bill To", "")
|
| 284 |
+
po_payment_terms = row.get("Payment Terms", "")
|
| 285 |
+
po_currency = row.get("Currency", "")
|
| 286 |
+
po_total = clean_num(row.get("PO Total Value", ""))
|
| 287 |
+
po_desc = row.get("Item Description", "")
|
| 288 |
+
po_qty = str(row.get("Item Quantity", ""))
|
| 289 |
+
po_unit = str(row.get("Item Unit Price", ""))
|
| 290 |
+
po_line_total = clean_num(row.get("Line Item Total", ""))
|
| 291 |
+
|
| 292 |
+
# Weighted fuzzy scores
|
| 293 |
+
s_supplier = weighted_fuzzy_score(inv_supplier, po_supplier)
|
| 294 |
+
s_ship_to = weighted_fuzzy_score(inv_ship_to, po_ship_to)
|
| 295 |
+
s_bill_to = weighted_fuzzy_score(inv_bill_to, po_bill_to)
|
| 296 |
+
s_terms = weighted_fuzzy_score(inv_payment_terms, po_payment_terms)
|
| 297 |
+
s_currency = weighted_fuzzy_score(inv_currency, po_currency)
|
| 298 |
+
s_total = 100 if inv_total_due is not None and po_total is not None and abs(inv_total_due - po_total) < 2 else 0
|
| 299 |
+
|
| 300 |
+
# Check for at least one line item strong match
|
| 301 |
+
line_item_score = 0
|
| 302 |
+
line_reason = ""
|
| 303 |
+
for line in inv_line_items:
|
| 304 |
+
desc_score = weighted_fuzzy_score(line.get("description", ""), po_desc)
|
| 305 |
+
qty_score = 100 if clean_num(line.get("quantity")) == clean_num(po_qty) else 0
|
| 306 |
+
unit_score = 100 if clean_num(line.get("price")) == clean_num(po_unit) else 0
|
| 307 |
+
amount_score = 100 if clean_num(line.get("amount")) == po_line_total else 0
|
| 308 |
+
total = desc_score * 0.5 + qty_score * 0.2 + unit_score * 0.15 + amount_score * 0.15
|
| 309 |
+
if total > line_item_score:
|
| 310 |
+
line_item_score = total
|
| 311 |
+
line_reason = (f"Best line item: desc_score={desc_score}, qty_score={qty_score}, "
|
| 312 |
+
f"unit_score={unit_score}, amount_score={amount_score}")
|
| 313 |
+
# Score weights (tune as needed)
|
| 314 |
+
total_score = (
|
| 315 |
+
s_supplier * 0.25 +
|
| 316 |
+
s_ship_to * 0.1 +
|
| 317 |
+
s_bill_to * 0.1 +
|
| 318 |
+
s_terms * 0.1 +
|
| 319 |
+
s_currency * 0.05 +
|
| 320 |
+
s_total * 0.2 +
|
| 321 |
+
line_item_score * 0.2
|
| 322 |
+
)
|
| 323 |
+
reason = (
|
| 324 |
+
f"Supplier match: {s_supplier}/100, Ship To: {s_ship_to}/100, "
|
| 325 |
+
f"Bill To: {s_bill_to}/100, Payment Terms: {s_terms}/100, Currency: {s_currency}/100, "
|
| 326 |
+
f"Total Due: {'match' if s_total else 'no match'}, "
|
| 327 |
+
f"Line item best match: {int(line_item_score)}/100. {line_reason}"
|
| 328 |
+
)
|
| 329 |
+
debug = {
|
| 330 |
+
"po_idx": idx,
|
| 331 |
+
"po_supplier": po_supplier,
|
| 332 |
+
"po_ship_to": po_ship_to,
|
| 333 |
+
"po_bill_to": po_bill_to,
|
| 334 |
+
"po_total": po_total,
|
| 335 |
+
"s_supplier": s_supplier,
|
| 336 |
+
"s_ship_to": s_ship_to,
|
| 337 |
+
"s_bill_to": s_bill_to,
|
| 338 |
+
"s_terms": s_terms,
|
| 339 |
+
"s_currency": s_currency,
|
| 340 |
+
"s_total": s_total,
|
| 341 |
+
"line_item_score": line_item_score,
|
| 342 |
+
"total_score": total_score,
|
| 343 |
+
"line_reason": line_reason,
|
| 344 |
+
"inv_total_due": inv_total_due
|
| 345 |
+
}
|
| 346 |
+
scores.append((row, total_score, reason, debug))
|
| 347 |
|
| 348 |
+
# Pick the highest
|
| 349 |
+
scores.sort(key=lambda tup: tup[1], reverse=True)
|
| 350 |
+
if not scores:
|
| 351 |
+
return None, 0, "No POs found.", {}
|
| 352 |
+
best_row, best_score, reason, debug = scores[0]
|
| 353 |
+
return best_row, best_score, reason, debug
|
| 354 |
|
| 355 |
st.sidebar.header("Step 1: Upload Active Purchase Orders (POs)")
|
| 356 |
po_file = st.sidebar.file_uploader(
|
|
|
|
| 376 |
with st.spinner("Extracting text from document using Unstract..."):
|
| 377 |
text = extract_text_from_unstract(inv_file)
|
| 378 |
if text:
|
| 379 |
+
prompt = get_extraction_prompt(mdl, text)
|
| 380 |
extracted_info = extract_invoice_info(mdl, text)
|
| 381 |
if extracted_info:
|
| 382 |
if "invoice_header" in extracted_info:
|
|
|
|
| 388 |
st.table(extracted_info["line_items"])
|
| 389 |
st.session_state['last_extracted_info'] = extracted_info
|
| 390 |
|
|
|
|
| 391 |
extracted_info = st.session_state.get('last_extracted_info', None)
|
| 392 |
po_df = st.session_state.get('last_po_df', None)
|
| 393 |
|
| 394 |
def po_match_tool_func(input_text):
|
| 395 |
invoice = st.session_state.get("last_extracted_info")
|
| 396 |
po_df = st.session_state.get("last_po_df")
|
|
|
|
| 397 |
if invoice is None or po_df is None:
|
| 398 |
return json.dumps({
|
| 399 |
"decision": "REJECTED",
|
| 400 |
"reason": "Invoice or PO data not found.",
|
| 401 |
+
"debug": {},
|
| 402 |
})
|
| 403 |
|
| 404 |
+
best_row, best_score, reason, debug = find_best_po_match(invoice, po_df)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 405 |
|
| 406 |
+
if best_score > 85:
|
| 407 |
+
status = "APPROVED"
|
| 408 |
+
elif best_score > 70:
|
| 409 |
+
status = "PARTIALLY APPROVED"
|
| 410 |
+
else:
|
| 411 |
+
status = "REJECTED"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 412 |
|
| 413 |
+
return json.dumps({
|
| 414 |
+
"decision": status,
|
| 415 |
+
"reason": f"Best match score: {int(best_score)}/100. {reason}",
|
| 416 |
+
"debug": debug,
|
| 417 |
+
"po_row": best_row.to_dict() if best_row is not None else None
|
| 418 |
+
})
|
| 419 |
|
| 420 |
+
def extract_invoice_info(model_choice, text):
|
| 421 |
+
prompt = get_extraction_prompt(model_choice, text)
|
| 422 |
+
raw = query_llm(model_choice, prompt)
|
| 423 |
+
if not raw:
|
| 424 |
+
return None
|
| 425 |
+
data = clean_json_response(raw)
|
| 426 |
+
if not data:
|
| 427 |
+
return None
|
| 428 |
+
hdr = data.get("invoice_header", {})
|
| 429 |
+
if not hdr and any(k in data for k in ("invoice_number","supplier_name","customer_name")):
|
| 430 |
+
hdr = data
|
| 431 |
+
for k in ("invoice_number","invoice_date","po_number","invoice_value","supplier_name","customer_name"):
|
| 432 |
+
hdr.setdefault(k, None)
|
| 433 |
+
if not hdr.get("supplier_name"):
|
| 434 |
+
hdr["supplier_name"] = fallback_supplier(text)
|
| 435 |
+
hdr = ensure_total_due(hdr)
|
| 436 |
+
items = data.get("line_items", [])
|
| 437 |
+
if not isinstance(items, list):
|
| 438 |
+
items = []
|
| 439 |
+
for itm in items:
|
| 440 |
+
if not isinstance(itm, dict):
|
| 441 |
+
continue
|
| 442 |
+
for k in ("item_number","description","quantity","unit_price","total_price"):
|
| 443 |
+
itm.setdefault(k, None)
|
| 444 |
+
return {"invoice_header": hdr, "line_items": items}
|
| 445 |
|
| 446 |
if po_df is not None:
|
| 447 |
st.session_state["last_po_df"] = po_df
|
|
|
|
| 459 |
Tool(
|
| 460 |
name="po_match_tool",
|
| 461 |
func=po_match_tool_func,
|
| 462 |
+
description="Smartly match invoice to PO using all possible fields.",
|
| 463 |
)
|
| 464 |
]
|
| 465 |
decision_llm = ChatOpenAI(
|
|
|
|
| 476 |
)
|
| 477 |
prompt = (
|
| 478 |
"You are an expert accounts payable agent. "
|
| 479 |
+
"Use po_match_tool to check for the best possible match using supplier, ship to, bill to, payment terms, currency, line items, and total value."
|
| 480 |
+
"Weigh the importance of each field as an expert would."
|
| 481 |
+
"Return a JSON with decision (APPROVED, PARTIALLY APPROVED, REJECTED), reason (include field scores and reasoning), debug, and the best matched PO row.\n"
|
|
|
|
|
|
|
|
|
|
| 482 |
f"Invoice JSON:\n{json.dumps(extracted_info, indent=2)}"
|
| 483 |
)
|
| 484 |
with st.spinner("AI is reasoning and making a decision..."):
|
|
|
|
| 487 |
result_json = json.loads(result)
|
| 488 |
st.write(f"**Decision:** {result_json.get('decision', 'N/A')}")
|
| 489 |
st.write(f"**Reason:** {result_json.get('reason', 'N/A')}")
|
| 490 |
+
with st.expander("Debug & Matching Details"):
|
| 491 |
st.json(result_json.get('debug'))
|
| 492 |
st.subheader("Extracted Invoice JSON")
|
| 493 |
st.json(extracted_info)
|
| 494 |
+
st.subheader("Matched PO Row")
|
| 495 |
+
st.json(result_json.get('po_row'))
|
| 496 |
except Exception:
|
| 497 |
st.subheader("AI Decision & Reason")
|
| 498 |
st.write(result)
|