Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,6 +14,18 @@ st.set_page_config(page_title="Accounts Payable AI Agent", layout="wide")
|
|
| 14 |
|
| 15 |
# -------- LLM Model Setup --------
|
| 16 |
MODELS = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
"OpenAI GPT-4.1": {
|
| 18 |
"api_url": "https://api.openai.com/v1/chat/completions",
|
| 19 |
"model": "gpt-4-1106-preview",
|
|
@@ -21,6 +33,16 @@ MODELS = {
|
|
| 21 |
"response_format": None,
|
| 22 |
"extra_headers": {},
|
| 23 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
}
|
| 25 |
|
| 26 |
def get_api_key(model_choice):
|
|
@@ -99,74 +121,10 @@ def get_extraction_prompt(model_choice, txt):
|
|
| 99 |
"Shipment/invoice-level fields such as CAR NUMBER, SHIPPING POINT, SHIPMENT NUMBER, CURRENCY, etc., must go ONLY into the 'invoice_header', not as line item fields.\n"
|
| 100 |
"Use this schema:\n"
|
| 101 |
'{\n'
|
| 102 |
-
' "invoice_header": {
|
| 103 |
-
'
|
| 104 |
-
'
|
| 105 |
-
|
| 106 |
-
' "currency": "string or null",\n'
|
| 107 |
-
' "invoice_number": "string or null",\n'
|
| 108 |
-
' "invoice_date": "string or null",\n'
|
| 109 |
-
' "order_number": "string or null",\n'
|
| 110 |
-
' "customer_order_number": "string or null",\n'
|
| 111 |
-
' "our_order_number": "string or null",\n'
|
| 112 |
-
' "sales_order_number": "string or null",\n'
|
| 113 |
-
' "purchase_order_number": "string or null",\n'
|
| 114 |
-
' "order_date": "string or null",\n'
|
| 115 |
-
' "supplier_name": "string or null",\n'
|
| 116 |
-
' "supplier_address": "string or null",\n'
|
| 117 |
-
' "supplier_phone": "string or null",\n'
|
| 118 |
-
' "supplier_email": "string or null",\n'
|
| 119 |
-
' "supplier_tax_id": "string or null",\n'
|
| 120 |
-
' "customer_name": "string or null",\n'
|
| 121 |
-
' "customer_address": "string or null",\n'
|
| 122 |
-
' "customer_phone": "string or null",\n'
|
| 123 |
-
' "customer_email": "string or null",\n'
|
| 124 |
-
' "customer_tax_id": "string or null",\n'
|
| 125 |
-
' "ship_to_name": "string or null",\n'
|
| 126 |
-
' "ship_to_address": "string or null",\n'
|
| 127 |
-
' "bill_to_name": "string or null",\n'
|
| 128 |
-
' "bill_to_address": "string or null",\n'
|
| 129 |
-
' "remit_to_name": "string or null",\n'
|
| 130 |
-
' "remit_to_address": "string or null",\n'
|
| 131 |
-
' "tax_id": "string or null",\n'
|
| 132 |
-
' "tax_registration_number": "string or null",\n'
|
| 133 |
-
' "vat_number": "string or null",\n'
|
| 134 |
-
' "payment_terms": "string or null",\n'
|
| 135 |
-
' "payment_method": "string or null",\n'
|
| 136 |
-
' "payment_reference": "string or null",\n'
|
| 137 |
-
' "bank_account_number": "string or null",\n'
|
| 138 |
-
' "iban": "string or null",\n'
|
| 139 |
-
' "swift_code": "string or null",\n'
|
| 140 |
-
' "total_before_tax": "string or null",\n'
|
| 141 |
-
' "tax_amount": "string or null",\n'
|
| 142 |
-
' "tax_rate": "string or null",\n'
|
| 143 |
-
' "shipping_charges": "string or null",\n'
|
| 144 |
-
' "discount": "string or null",\n'
|
| 145 |
-
' "total_due": "string or null",\n'
|
| 146 |
-
' "amount_paid": "string or null",\n'
|
| 147 |
-
' "balance_due": "string or null",\n'
|
| 148 |
-
' "due_date": "string or null",\n'
|
| 149 |
-
' "invoice_status": "string or null",\n'
|
| 150 |
-
' "reference_number": "string or null",\n'
|
| 151 |
-
' "project_code": "string or null",\n'
|
| 152 |
-
' "department": "string or null",\n'
|
| 153 |
-
' "contact_person": "string or null",\n'
|
| 154 |
-
' "notes": "string or null",\n'
|
| 155 |
-
' "additional_info": "string or null"\n'
|
| 156 |
-
' },\n'
|
| 157 |
-
' "line_items": [\n'
|
| 158 |
-
' {\n'
|
| 159 |
-
' "quantity": "string or null",\n'
|
| 160 |
-
' "units": "string or null",\n'
|
| 161 |
-
' "description": "string or null",\n'
|
| 162 |
-
' "footage": "string or null",\n'
|
| 163 |
-
' "price": "string or null",\n'
|
| 164 |
-
' "amount": "string or null",\n'
|
| 165 |
-
' "notes": "string or null"\n'
|
| 166 |
-
' }\n'
|
| 167 |
-
' ]\n'
|
| 168 |
-
'}'
|
| 169 |
-
"\nIf a field is missing for a line item or header, use null. "
|
| 170 |
"Do not invent fields. Do not add any header or shipment data to any line item. Return ONLY the JSON object, no explanation.\n"
|
| 171 |
"\nInvoice Text:\n"
|
| 172 |
f"{txt}"
|
|
@@ -266,7 +224,7 @@ if po_file:
|
|
| 266 |
st.sidebar.dataframe(po_df.head())
|
| 267 |
|
| 268 |
st.title("Invoice/Document Extractor")
|
| 269 |
-
mdl = st.selectbox("Model", list(MODELS.keys()), key="extract_model")
|
| 270 |
inv_file = st.file_uploader(
|
| 271 |
"Step 2: Upload Invoice or Document File",
|
| 272 |
type=["pdf", "docx", "xlsx", "xls", "png", "jpg", "jpeg", "tiff"]
|
|
@@ -292,71 +250,94 @@ extracted_info = extracted_info or st.session_state.get("last_extracted_info", N
|
|
| 292 |
def po_match_tool_func(input_text):
|
| 293 |
invoice = st.session_state.get("last_extracted_info")
|
| 294 |
po_df = st.session_state.get("po_df")
|
| 295 |
-
|
| 296 |
if invoice is None or po_df is None:
|
| 297 |
return "Invoice or PO data not found."
|
| 298 |
|
| 299 |
inv_hdr = invoice["invoice_header"]
|
| 300 |
inv_po_number = inv_hdr.get("purchase_order_number") or inv_hdr.get("order_number") or inv_hdr.get("our_order_number")
|
| 301 |
inv_supplier = inv_hdr.get("supplier_name")
|
| 302 |
-
|
|
|
|
|
|
|
| 303 |
matched_po = None
|
| 304 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 305 |
for idx, row in po_df.iterrows():
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
|
|
|
|
|
|
| 311 |
break
|
| 312 |
-
|
| 313 |
-
potential_matches = po_df[po_df["Supplier Name"].str.lower().str.strip() == inv_supplier.lower().strip()]
|
| 314 |
-
if not potential_matches.empty:
|
| 315 |
-
matched_po = potential_matches.iloc[0]
|
| 316 |
-
explanation += f"Matched on Supplier Name: {inv_supplier}\n"
|
| 317 |
if matched_po is not None:
|
| 318 |
-
return f"PO matched: {matched_po.to_dict()}"
|
| 319 |
-
return "No matching PO found."
|
| 320 |
|
| 321 |
if po_df is not None:
|
| 322 |
st.session_state["po_df"] = po_df
|
| 323 |
|
|
|
|
| 324 |
if extracted_info is not None and po_df is not None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 325 |
if st.button("Make a decision (AI Agent)"):
|
| 326 |
tools = [
|
| 327 |
Tool(
|
| 328 |
name="po_match_tool",
|
| 329 |
func=po_match_tool_func,
|
| 330 |
-
description="Use this tool to check if the invoice matches any PO in the current PO list.",
|
| 331 |
)
|
| 332 |
]
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
model="gpt-4-1106-preview",
|
| 337 |
temperature=0,
|
| 338 |
streaming=False,
|
| 339 |
)
|
| 340 |
agent = initialize_agent(
|
| 341 |
tools,
|
| 342 |
-
|
| 343 |
agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
|
| 344 |
verbose=True,
|
| 345 |
)
|
| 346 |
prompt = (
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 353 |
f"Invoice JSON:\n{json.dumps(extracted_info, indent=2)}"
|
| 354 |
)
|
| 355 |
with st.spinner("AI is reasoning and making a decision..."):
|
| 356 |
result = agent.run(prompt)
|
| 357 |
try:
|
| 358 |
result_json = json.loads(result)
|
| 359 |
-
st.subheader("AI Decision")
|
| 360 |
st.write(f"**Decision:** {result_json.get('decision', 'N/A')}")
|
| 361 |
st.write(f"**Reason:** {result_json.get('reason', 'N/A')}")
|
| 362 |
except Exception:
|
|
|
|
| 14 |
|
| 15 |
# -------- LLM Model Setup --------
|
| 16 |
MODELS = {
|
| 17 |
+
"DeepSeek v3": {
|
| 18 |
+
"api_url": "https://api.deepseek.com/v1/chat/completions",
|
| 19 |
+
"model": "deepseek-chat",
|
| 20 |
+
"key_env": "DEEPSEEK_API_KEY",
|
| 21 |
+
"response_format": {"type": "json_object"},
|
| 22 |
+
},
|
| 23 |
+
"DeepSeek R1": {
|
| 24 |
+
"api_url": "https://api.deepseek.com/v1/chat/completions",
|
| 25 |
+
"model": "deepseek-reasoner",
|
| 26 |
+
"key_env": "DEEPSEEK_API_KEY",
|
| 27 |
+
"response_format": None,
|
| 28 |
+
},
|
| 29 |
"OpenAI GPT-4.1": {
|
| 30 |
"api_url": "https://api.openai.com/v1/chat/completions",
|
| 31 |
"model": "gpt-4-1106-preview",
|
|
|
|
| 33 |
"response_format": None,
|
| 34 |
"extra_headers": {},
|
| 35 |
},
|
| 36 |
+
"Mistral Small": {
|
| 37 |
+
"api_url": "https://openrouter.ai/api/v1/chat/completions",
|
| 38 |
+
"model": "mistralai/ministral-8b",
|
| 39 |
+
"key_env": "OPENROUTER_API_KEY",
|
| 40 |
+
"response_format": {"type": "json_object"},
|
| 41 |
+
"extra_headers": {
|
| 42 |
+
"HTTP-Referer": "https://huggingface.co",
|
| 43 |
+
"X-Title": "Invoice Extractor",
|
| 44 |
+
},
|
| 45 |
+
},
|
| 46 |
}
|
| 47 |
|
| 48 |
def get_api_key(model_choice):
|
|
|
|
| 121 |
"Shipment/invoice-level fields such as CAR NUMBER, SHIPPING POINT, SHIPMENT NUMBER, CURRENCY, etc., must go ONLY into the 'invoice_header', not as line item fields.\n"
|
| 122 |
"Use this schema:\n"
|
| 123 |
'{\n'
|
| 124 |
+
' "invoice_header": {...},\n'
|
| 125 |
+
' "line_items": [ {...} ]\n'
|
| 126 |
+
'}\n'
|
| 127 |
+
"If a field is missing for a line item or header, use null. "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
"Do not invent fields. Do not add any header or shipment data to any line item. Return ONLY the JSON object, no explanation.\n"
|
| 129 |
"\nInvoice Text:\n"
|
| 130 |
f"{txt}"
|
|
|
|
| 224 |
st.sidebar.dataframe(po_df.head())
|
| 225 |
|
| 226 |
st.title("Invoice/Document Extractor")
|
| 227 |
+
mdl = st.selectbox("Model for Extraction", list(MODELS.keys()), key="extract_model")
|
| 228 |
inv_file = st.file_uploader(
|
| 229 |
"Step 2: Upload Invoice or Document File",
|
| 230 |
type=["pdf", "docx", "xlsx", "xls", "png", "jpg", "jpeg", "tiff"]
|
|
|
|
| 250 |
def po_match_tool_func(input_text):
|
| 251 |
invoice = st.session_state.get("last_extracted_info")
|
| 252 |
po_df = st.session_state.get("po_df")
|
|
|
|
| 253 |
if invoice is None or po_df is None:
|
| 254 |
return "Invoice or PO data not found."
|
| 255 |
|
| 256 |
inv_hdr = invoice["invoice_header"]
|
| 257 |
inv_po_number = inv_hdr.get("purchase_order_number") or inv_hdr.get("order_number") or inv_hdr.get("our_order_number")
|
| 258 |
inv_supplier = inv_hdr.get("supplier_name")
|
| 259 |
+
inv_line_items = invoice.get("line_items", [])
|
| 260 |
+
|
| 261 |
+
# Try to match PO by number or supplier name
|
| 262 |
matched_po = None
|
| 263 |
+
explanation = ""
|
| 264 |
+
for idx, row in po_df.iterrows():
|
| 265 |
+
po_number = str(row.get("PO Number", "")).lower().replace(" ", "")
|
| 266 |
+
supplier = str(row.get("Supplier Name", "")).lower().strip()
|
| 267 |
+
if inv_po_number and po_number == str(inv_po_number).lower().replace(" ", ""):
|
| 268 |
+
matched_po = row
|
| 269 |
+
explanation += f"Matched on PO Number: {inv_po_number}. "
|
| 270 |
+
break
|
| 271 |
+
elif inv_supplier and supplier == inv_supplier.lower().strip():
|
| 272 |
+
matched_po = row
|
| 273 |
+
explanation += f"Matched on Supplier Name: {inv_supplier}. "
|
| 274 |
+
break
|
| 275 |
+
|
| 276 |
+
# If no direct match, try to match by line items
|
| 277 |
+
if matched_po is None and len(inv_line_items) > 0:
|
| 278 |
for idx, row in po_df.iterrows():
|
| 279 |
+
po_desc = str(row.get("Description", "")).lower()
|
| 280 |
+
for line in inv_line_items:
|
| 281 |
+
if line.get("description") and line["description"].lower() in po_desc:
|
| 282 |
+
matched_po = row
|
| 283 |
+
explanation += f"Matched on line item description: '{line['description']}'. "
|
| 284 |
+
break
|
| 285 |
+
if matched_po is not None:
|
| 286 |
break
|
| 287 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 288 |
if matched_po is not None:
|
| 289 |
+
return f"PO matched: {matched_po.to_dict()}. {explanation}"
|
| 290 |
+
return "No matching PO found based on PO Number, Supplier, or Line Items."
|
| 291 |
|
| 292 |
if po_df is not None:
|
| 293 |
st.session_state["po_df"] = po_df
|
| 294 |
|
| 295 |
+
# -------------- DECISION SECTION --------------
|
| 296 |
if extracted_info is not None and po_df is not None:
|
| 297 |
+
st.markdown("---")
|
| 298 |
+
st.subheader("AI Agent Decision")
|
| 299 |
+
decision_model = st.selectbox(
|
| 300 |
+
"Model for AI Decision",
|
| 301 |
+
list(MODELS.keys()),
|
| 302 |
+
key="decision_model"
|
| 303 |
+
)
|
| 304 |
if st.button("Make a decision (AI Agent)"):
|
| 305 |
tools = [
|
| 306 |
Tool(
|
| 307 |
name="po_match_tool",
|
| 308 |
func=po_match_tool_func,
|
| 309 |
+
description="Use this tool to check if the invoice matches any PO in the current PO list, including by line items.",
|
| 310 |
)
|
| 311 |
]
|
| 312 |
+
decision_llm = ChatOpenAI(
|
| 313 |
+
openai_api_key=get_api_key(decision_model),
|
| 314 |
+
model=MODELS[decision_model]["model"],
|
|
|
|
| 315 |
temperature=0,
|
| 316 |
streaming=False,
|
| 317 |
)
|
| 318 |
agent = initialize_agent(
|
| 319 |
tools,
|
| 320 |
+
decision_llm,
|
| 321 |
agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
|
| 322 |
verbose=True,
|
| 323 |
)
|
| 324 |
prompt = (
|
| 325 |
+
"You are an expert accounts payable decision agent.\n"
|
| 326 |
+
"You are given an extracted invoice in JSON and have access to a tool called po_match_tool, which can check for matches with all available POs (including matching line items/descriptions between invoice and PO).\n"
|
| 327 |
+
"To approve an invoice, you must verify at least one of the following:\n"
|
| 328 |
+
"- The PO number matches a PO\n"
|
| 329 |
+
"- The supplier name matches a PO\n"
|
| 330 |
+
"- At least one line item description or quantity/price matches with a PO's item\n"
|
| 331 |
+
"If you can't match on PO number or supplier, do your best to match using the invoice's line items (description/quantity/unit price/etc) and the PO data, and explain your reasoning step by step."
|
| 332 |
+
"In your reasoning, list all fields and line items that matched, or say if nothing matched (be specific about what was compared)."
|
| 333 |
+
"At the end, respond in this JSON format ONLY:\n"
|
| 334 |
+
'{"decision": "APPROVED or REJECTED", "reason": "<detailed step-by-step explanation for your decision, showing what matched and what did not, including line item checks>"}\n'
|
| 335 |
f"Invoice JSON:\n{json.dumps(extracted_info, indent=2)}"
|
| 336 |
)
|
| 337 |
with st.spinner("AI is reasoning and making a decision..."):
|
| 338 |
result = agent.run(prompt)
|
| 339 |
try:
|
| 340 |
result_json = json.loads(result)
|
|
|
|
| 341 |
st.write(f"**Decision:** {result_json.get('decision', 'N/A')}")
|
| 342 |
st.write(f"**Reason:** {result_json.get('reason', 'N/A')}")
|
| 343 |
except Exception:
|