Update app.py
Browse files
app.py
CHANGED
|
@@ -10,7 +10,6 @@ from langchain_community.chat_models import ChatOpenAI
|
|
| 10 |
from langchain.agents import initialize_agent, Tool, AgentType
|
| 11 |
from fuzzywuzzy import fuzz
|
| 12 |
|
| 13 |
-
# --- CONFIGURATION ---
|
| 14 |
st.set_page_config(page_title="Accounts Payable AI Agent", layout="wide")
|
| 15 |
|
| 16 |
MODELS = {
|
|
@@ -21,27 +20,9 @@ MODELS = {
|
|
| 21 |
"response_format": None,
|
| 22 |
"extra_headers": {},
|
| 23 |
},
|
| 24 |
-
#
|
| 25 |
-
"DeepSeek v3": {
|
| 26 |
-
"api_url": "https://api.deepseek.com/v1/chat/completions",
|
| 27 |
-
"model": "deepseek-chat",
|
| 28 |
-
"key_env": "DEEPSEEK_API_KEY",
|
| 29 |
-
"response_format": {"type": "json_object"},
|
| 30 |
-
},
|
| 31 |
-
"Mistral Small": {
|
| 32 |
-
"api_url": "https://openrouter.ai/api/v1/chat/completions",
|
| 33 |
-
"model": "mistralai/ministral-8b",
|
| 34 |
-
"key_env": "OPENROUTER_API_KEY",
|
| 35 |
-
"response_format": {"type": "json_object"},
|
| 36 |
-
"extra_headers": {
|
| 37 |
-
"HTTP-Referer": "https://huggingface.co",
|
| 38 |
-
"X-Title": "Invoice Extractor",
|
| 39 |
-
},
|
| 40 |
-
},
|
| 41 |
}
|
| 42 |
|
| 43 |
-
# --- UTILITY FUNCTIONS ---
|
| 44 |
-
|
| 45 |
def get_api_key(model_choice):
|
| 46 |
key = os.getenv(MODELS[model_choice]["key_env"])
|
| 47 |
if not key:
|
|
@@ -231,16 +212,14 @@ def find_po_number_in_json(po_number, invoice_json):
|
|
| 231 |
fields.append(str(obj))
|
| 232 |
return fields
|
| 233 |
|
| 234 |
-
# Clean up PO number
|
| 235 |
po_str = str(po_number).strip().replace(" ", "").replace(".0", "")
|
| 236 |
try:
|
| 237 |
-
po_int = str(int(float(po_number)))
|
| 238 |
except:
|
| 239 |
po_int = po_str
|
| 240 |
|
| 241 |
all_strs = [str(s).strip().replace(" ", "").replace(".0", "") for s in _flatten(invoice_json)]
|
| 242 |
|
| 243 |
-
# Check for exact match, or substring match, with all variants
|
| 244 |
for s in all_strs:
|
| 245 |
if not s:
|
| 246 |
continue
|
|
@@ -250,13 +229,10 @@ def find_po_number_in_json(po_number, invoice_json):
|
|
| 250 |
return True
|
| 251 |
return False
|
| 252 |
|
| 253 |
-
|
| 254 |
def find_best_po_match(inv, po_df):
|
| 255 |
inv_hdr = inv["invoice_header"]
|
| 256 |
inv_supplier = inv_hdr.get("supplier_name") or ""
|
| 257 |
inv_po_number = inv_hdr.get("purchase_order_number") or inv_hdr.get("po_number") or inv_hdr.get("order_number") or ""
|
| 258 |
-
inv_ship_to = inv_hdr.get("ship_to_name") or ""
|
| 259 |
-
inv_bill_to = inv_hdr.get("bill_to_name") or ""
|
| 260 |
inv_currency = inv_hdr.get("currency") or ""
|
| 261 |
inv_total_due = clean_num(inv_hdr.get("total_due"))
|
| 262 |
inv_line_items = inv.get("line_items", [])
|
|
@@ -264,9 +240,7 @@ def find_best_po_match(inv, po_df):
|
|
| 264 |
scores = []
|
| 265 |
for idx, row in po_df.iterrows():
|
| 266 |
po_supplier = row.get("Supplier Name", "")
|
| 267 |
-
po_po_number = str(row.get("PO Number", ""))
|
| 268 |
-
po_ship_to = row.get("Ship To", "")
|
| 269 |
-
po_bill_to = row.get("Bill To", "")
|
| 270 |
po_currency = row.get("Currency", "")
|
| 271 |
po_total = clean_num(row.get("PO Total Value", ""))
|
| 272 |
po_desc = row.get("Item Description", "")
|
|
@@ -284,7 +258,6 @@ def find_best_po_match(inv, po_df):
|
|
| 284 |
"score": s_supplier
|
| 285 |
})
|
| 286 |
|
| 287 |
-
# PO Number scoring: anywhere in JSON
|
| 288 |
s_po_number = 100 if find_po_number_in_json(po_po_number, inv) else 0
|
| 289 |
field_details.append({
|
| 290 |
"field": "PO Number (anywhere in JSON)",
|
|
@@ -293,22 +266,6 @@ def find_best_po_match(inv, po_df):
|
|
| 293 |
"score": s_po_number
|
| 294 |
})
|
| 295 |
|
| 296 |
-
s_ship_to = weighted_fuzzy_score(inv_ship_to, po_ship_to)
|
| 297 |
-
field_details.append({
|
| 298 |
-
"field": "Ship To",
|
| 299 |
-
"invoice": inv_ship_to,
|
| 300 |
-
"po": po_ship_to,
|
| 301 |
-
"score": s_ship_to
|
| 302 |
-
})
|
| 303 |
-
|
| 304 |
-
s_bill_to = weighted_fuzzy_score(inv_bill_to, po_bill_to)
|
| 305 |
-
field_details.append({
|
| 306 |
-
"field": "Bill To",
|
| 307 |
-
"invoice": inv_bill_to,
|
| 308 |
-
"po": po_bill_to,
|
| 309 |
-
"score": s_bill_to
|
| 310 |
-
})
|
| 311 |
-
|
| 312 |
s_currency = weighted_fuzzy_score(inv_currency, po_currency)
|
| 313 |
field_details.append({
|
| 314 |
"field": "Currency",
|
|
@@ -325,7 +282,7 @@ def find_best_po_match(inv, po_df):
|
|
| 325 |
"score": s_total
|
| 326 |
})
|
| 327 |
|
| 328 |
-
#
|
| 329 |
line_item_score = 0
|
| 330 |
line_reason = ""
|
| 331 |
best_line_detail = None
|
|
@@ -363,22 +320,18 @@ def find_best_po_match(inv, po_df):
|
|
| 363 |
f"unit_score={unit_score}, amount_score={amount_score}"
|
| 364 |
)
|
| 365 |
|
| 366 |
-
# Adjust scoring weights: Supplier
|
| 367 |
total_score = (
|
| 368 |
-
s_supplier * 0.
|
| 369 |
-
s_po_number * 0.
|
| 370 |
-
s_ship_to * 0.10 +
|
| 371 |
-
s_bill_to * 0.10 +
|
| 372 |
s_currency * 0.10 +
|
| 373 |
s_total * 0.20 +
|
| 374 |
-
line_item_score * 0.
|
| 375 |
)
|
| 376 |
|
| 377 |
reason = (
|
| 378 |
f"Supplier match: {s_supplier}/100 (invoice: '{inv_supplier}' vs PO: '{po_supplier}'), "
|
| 379 |
f"PO Number: {s_po_number}/100 ({'found anywhere in JSON' if s_po_number else 'not found'}), "
|
| 380 |
-
f"Ship To: {s_ship_to}/100 (invoice: '{inv_ship_to}' vs PO: '{po_ship_to}'), "
|
| 381 |
-
f"Bill To: {s_bill_to}/100 (invoice: '{inv_bill_to}' vs PO: '{po_bill_to}'), "
|
| 382 |
f"Currency: {s_currency}/100 (invoice: '{inv_currency}' vs PO: '{po_currency}'), "
|
| 383 |
f"Total Due: {'match' if s_total else 'no match'} (invoice: {inv_total_due} vs PO: {po_total}), "
|
| 384 |
f"Line item best match: {int(line_item_score)}/100. {line_reason}"
|
|
@@ -388,8 +341,6 @@ def find_best_po_match(inv, po_df):
|
|
| 388 |
"po_idx": idx,
|
| 389 |
"po_supplier": po_supplier,
|
| 390 |
"po_po_number": po_po_number,
|
| 391 |
-
"po_ship_to": po_ship_to,
|
| 392 |
-
"po_bill_to": po_bill_to,
|
| 393 |
"po_total": po_total,
|
| 394 |
"scores": field_details,
|
| 395 |
"line_item_score": line_item_score,
|
|
@@ -407,7 +358,6 @@ def find_best_po_match(inv, po_df):
|
|
| 407 |
best_row, best_score, reason, debug = scores[0]
|
| 408 |
return best_row, best_score, reason, debug
|
| 409 |
|
| 410 |
-
|
| 411 |
def extract_invoice_info(model_choice, text):
|
| 412 |
prompt = get_extraction_prompt(model_choice, text)
|
| 413 |
raw = query_llm(model_choice, prompt)
|
|
@@ -533,6 +483,11 @@ if st.button("Extract") and inv_file:
|
|
| 533 |
extracted_info = st.session_state.get('last_extracted_info', None)
|
| 534 |
po_df = st.session_state.get('last_po_df', None)
|
| 535 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 536 |
def po_match_tool_func(input_text):
|
| 537 |
invoice = st.session_state.get("last_extracted_info")
|
| 538 |
po_df = st.session_state.get("last_po_df")
|
|
@@ -545,9 +500,9 @@ def po_match_tool_func(input_text):
|
|
| 545 |
|
| 546 |
best_row, best_score, reason, debug = find_best_po_match(invoice, po_df)
|
| 547 |
|
| 548 |
-
if best_score >
|
| 549 |
status = "APPROVED"
|
| 550 |
-
elif best_score >
|
| 551 |
status = "PARTIALLY APPROVED"
|
| 552 |
else:
|
| 553 |
status = "REJECTED"
|
|
@@ -564,7 +519,7 @@ if po_df is not None:
|
|
| 564 |
|
| 565 |
if extracted_info is not None and po_df is not None:
|
| 566 |
st.markdown("---")
|
| 567 |
-
st.subheader("EZOFIS AP AGENT Decision")
|
| 568 |
if st.button("Make a decision (EZOFIS AP AGENT)"):
|
| 569 |
tools = [
|
| 570 |
Tool(
|
|
@@ -573,7 +528,6 @@ if extracted_info is not None and po_df is not None:
|
|
| 573 |
description="Smartly match invoice to PO using all possible fields.",
|
| 574 |
)
|
| 575 |
]
|
| 576 |
-
# Always use OpenAI GPT-4.1 for agent reasoning
|
| 577 |
decision_llm = ChatOpenAI(
|
| 578 |
openai_api_key=get_api_key("OpenAI GPT-4.1"),
|
| 579 |
model=MODELS["OpenAI GPT-4.1"]["model"],
|
|
@@ -588,7 +542,7 @@ if extracted_info is not None and po_df is not None:
|
|
| 588 |
)
|
| 589 |
prompt = (
|
| 590 |
"You are an expert accounts payable agent. "
|
| 591 |
-
"Use po_match_tool to check for the best possible match using supplier, PO number (which may appear anywhere in the invoice JSON, even within other fields),
|
| 592 |
"Weigh the importance of each field as an expert would. "
|
| 593 |
"Return a JSON with decision (APPROVED, PARTIALLY APPROVED, REJECTED), reason (include field scores and reasoning), debug, and the best matched PO row.\n"
|
| 594 |
f"Invoice JSON:\n{json.dumps(extracted_info, indent=2)}"
|
|
|
|
| 10 |
from langchain.agents import initialize_agent, Tool, AgentType
|
| 11 |
from fuzzywuzzy import fuzz
|
| 12 |
|
|
|
|
| 13 |
st.set_page_config(page_title="Accounts Payable AI Agent", layout="wide")
|
| 14 |
|
| 15 |
MODELS = {
|
|
|
|
| 20 |
"response_format": None,
|
| 21 |
"extra_headers": {},
|
| 22 |
},
|
| 23 |
+
# You can add other models here for extraction/fine-tune if desired.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
}
|
| 25 |
|
|
|
|
|
|
|
| 26 |
def get_api_key(model_choice):
|
| 27 |
key = os.getenv(MODELS[model_choice]["key_env"])
|
| 28 |
if not key:
|
|
|
|
| 212 |
fields.append(str(obj))
|
| 213 |
return fields
|
| 214 |
|
|
|
|
| 215 |
po_str = str(po_number).strip().replace(" ", "").replace(".0", "")
|
| 216 |
try:
|
| 217 |
+
po_int = str(int(float(po_number)))
|
| 218 |
except:
|
| 219 |
po_int = po_str
|
| 220 |
|
| 221 |
all_strs = [str(s).strip().replace(" ", "").replace(".0", "") for s in _flatten(invoice_json)]
|
| 222 |
|
|
|
|
| 223 |
for s in all_strs:
|
| 224 |
if not s:
|
| 225 |
continue
|
|
|
|
| 229 |
return True
|
| 230 |
return False
|
| 231 |
|
|
|
|
| 232 |
def find_best_po_match(inv, po_df):
|
| 233 |
inv_hdr = inv["invoice_header"]
|
| 234 |
inv_supplier = inv_hdr.get("supplier_name") or ""
|
| 235 |
inv_po_number = inv_hdr.get("purchase_order_number") or inv_hdr.get("po_number") or inv_hdr.get("order_number") or ""
|
|
|
|
|
|
|
| 236 |
inv_currency = inv_hdr.get("currency") or ""
|
| 237 |
inv_total_due = clean_num(inv_hdr.get("total_due"))
|
| 238 |
inv_line_items = inv.get("line_items", [])
|
|
|
|
| 240 |
scores = []
|
| 241 |
for idx, row in po_df.iterrows():
|
| 242 |
po_supplier = row.get("Supplier Name", "")
|
| 243 |
+
po_po_number = str(row.get("PO Number", ""))
|
|
|
|
|
|
|
| 244 |
po_currency = row.get("Currency", "")
|
| 245 |
po_total = clean_num(row.get("PO Total Value", ""))
|
| 246 |
po_desc = row.get("Item Description", "")
|
|
|
|
| 258 |
"score": s_supplier
|
| 259 |
})
|
| 260 |
|
|
|
|
| 261 |
s_po_number = 100 if find_po_number_in_json(po_po_number, inv) else 0
|
| 262 |
field_details.append({
|
| 263 |
"field": "PO Number (anywhere in JSON)",
|
|
|
|
| 266 |
"score": s_po_number
|
| 267 |
})
|
| 268 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
s_currency = weighted_fuzzy_score(inv_currency, po_currency)
|
| 270 |
field_details.append({
|
| 271 |
"field": "Currency",
|
|
|
|
| 282 |
"score": s_total
|
| 283 |
})
|
| 284 |
|
| 285 |
+
# Line item logic as before
|
| 286 |
line_item_score = 0
|
| 287 |
line_reason = ""
|
| 288 |
best_line_detail = None
|
|
|
|
| 320 |
f"unit_score={unit_score}, amount_score={amount_score}"
|
| 321 |
)
|
| 322 |
|
| 323 |
+
# Adjust scoring weights: Supplier 25%, PO Number 25%, Currency 10%, Total Due 20%, Line Item 20%
|
| 324 |
total_score = (
|
| 325 |
+
s_supplier * 0.25 +
|
| 326 |
+
s_po_number * 0.25 +
|
|
|
|
|
|
|
| 327 |
s_currency * 0.10 +
|
| 328 |
s_total * 0.20 +
|
| 329 |
+
line_item_score * 0.20
|
| 330 |
)
|
| 331 |
|
| 332 |
reason = (
|
| 333 |
f"Supplier match: {s_supplier}/100 (invoice: '{inv_supplier}' vs PO: '{po_supplier}'), "
|
| 334 |
f"PO Number: {s_po_number}/100 ({'found anywhere in JSON' if s_po_number else 'not found'}), "
|
|
|
|
|
|
|
| 335 |
f"Currency: {s_currency}/100 (invoice: '{inv_currency}' vs PO: '{po_currency}'), "
|
| 336 |
f"Total Due: {'match' if s_total else 'no match'} (invoice: {inv_total_due} vs PO: {po_total}), "
|
| 337 |
f"Line item best match: {int(line_item_score)}/100. {line_reason}"
|
|
|
|
| 341 |
"po_idx": idx,
|
| 342 |
"po_supplier": po_supplier,
|
| 343 |
"po_po_number": po_po_number,
|
|
|
|
|
|
|
| 344 |
"po_total": po_total,
|
| 345 |
"scores": field_details,
|
| 346 |
"line_item_score": line_item_score,
|
|
|
|
| 358 |
best_row, best_score, reason, debug = scores[0]
|
| 359 |
return best_row, best_score, reason, debug
|
| 360 |
|
|
|
|
| 361 |
def extract_invoice_info(model_choice, text):
|
| 362 |
prompt = get_extraction_prompt(model_choice, text)
|
| 363 |
raw = query_llm(model_choice, prompt)
|
|
|
|
| 483 |
extracted_info = st.session_state.get('last_extracted_info', None)
|
| 484 |
po_df = st.session_state.get('last_po_df', None)
|
| 485 |
|
| 486 |
+
# UI for scoring thresholds
|
| 487 |
+
st.sidebar.header("Set Decision Thresholds")
|
| 488 |
+
approved_threshold = st.sidebar.slider("Threshold for 'APPROVED'", min_value=0, max_value=100, value=85)
|
| 489 |
+
partial_threshold = st.sidebar.slider("Threshold for 'PARTIALLY APPROVED'", min_value=0, max_value=approved_threshold-1, value=70)
|
| 490 |
+
|
| 491 |
def po_match_tool_func(input_text):
|
| 492 |
invoice = st.session_state.get("last_extracted_info")
|
| 493 |
po_df = st.session_state.get("last_po_df")
|
|
|
|
| 500 |
|
| 501 |
best_row, best_score, reason, debug = find_best_po_match(invoice, po_df)
|
| 502 |
|
| 503 |
+
if best_score > approved_threshold:
|
| 504 |
status = "APPROVED"
|
| 505 |
+
elif best_score > partial_threshold:
|
| 506 |
status = "PARTIALLY APPROVED"
|
| 507 |
else:
|
| 508 |
status = "REJECTED"
|
|
|
|
| 519 |
|
| 520 |
if extracted_info is not None and po_df is not None:
|
| 521 |
st.markdown("---")
|
| 522 |
+
st.subheader("EZOFIS AP AGENT Decision (OpenAI Only)")
|
| 523 |
if st.button("Make a decision (EZOFIS AP AGENT)"):
|
| 524 |
tools = [
|
| 525 |
Tool(
|
|
|
|
| 528 |
description="Smartly match invoice to PO using all possible fields.",
|
| 529 |
)
|
| 530 |
]
|
|
|
|
| 531 |
decision_llm = ChatOpenAI(
|
| 532 |
openai_api_key=get_api_key("OpenAI GPT-4.1"),
|
| 533 |
model=MODELS["OpenAI GPT-4.1"]["model"],
|
|
|
|
| 542 |
)
|
| 543 |
prompt = (
|
| 544 |
"You are an expert accounts payable agent. "
|
| 545 |
+
"Use po_match_tool to check for the best possible match using supplier, PO number (which may appear anywhere in the invoice JSON, even within other fields), currency, line items, and total value. "
|
| 546 |
"Weigh the importance of each field as an expert would. "
|
| 547 |
"Return a JSON with decision (APPROVED, PARTIALLY APPROVED, REJECTED), reason (include field scores and reasoning), debug, and the best matched PO row.\n"
|
| 548 |
f"Invoice JSON:\n{json.dumps(extracted_info, indent=2)}"
|