DOC_VALID_AGENT

Sleeping

App Files Files Community

Seth0330 commited on Jun 17, 2025

Commit

1541e64

verified ·

1 Parent(s): c8211fd

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -97

app.py CHANGED Viewed

@@ -14,6 +14,18 @@ st.set_page_config(page_title="Accounts Payable AI Agent", layout="wide")
 # -------- LLM Model Setup --------
 MODELS = {
     "OpenAI GPT-4.1": {
         "api_url": "https://api.openai.com/v1/chat/completions",
         "model": "gpt-4-1106-preview",
@@ -21,6 +33,16 @@ MODELS = {
         "response_format": None,
         "extra_headers": {},
     },
 }
 def get_api_key(model_choice):
@@ -99,74 +121,10 @@ def get_extraction_prompt(model_choice, txt):
         "Shipment/invoice-level fields such as CAR NUMBER, SHIPPING POINT, SHIPMENT NUMBER, CURRENCY, etc., must go ONLY into the 'invoice_header', not as line item fields.\n"
         "Use this schema:\n"
         '{\n'
-        '  "invoice_header": {\n'
-        '    "car_number": "string or null",\n'
-        '    "shipment_number": "string or null",\n'
-        '    "shipping_point": "string or null",\n'
-        '    "currency": "string or null",\n'
-        '    "invoice_number": "string or null",\n'
-        '    "invoice_date": "string or null",\n'
-        '    "order_number": "string or null",\n'
-        '    "customer_order_number": "string or null",\n'
-        '    "our_order_number": "string or null",\n'
-        '    "sales_order_number": "string or null",\n'
-        '    "purchase_order_number": "string or null",\n'
-        '    "order_date": "string or null",\n'
-        '    "supplier_name": "string or null",\n'
-        '    "supplier_address": "string or null",\n'
-        '    "supplier_phone": "string or null",\n'
-        '    "supplier_email": "string or null",\n'
-        '    "supplier_tax_id": "string or null",\n'
-        '    "customer_name": "string or null",\n'
-        '    "customer_address": "string or null",\n'
-        '    "customer_phone": "string or null",\n'
-        '    "customer_email": "string or null",\n'
-        '    "customer_tax_id": "string or null",\n'
-        '    "ship_to_name": "string or null",\n'
-        '    "ship_to_address": "string or null",\n'
-        '    "bill_to_name": "string or null",\n'
-        '    "bill_to_address": "string or null",\n'
-        '    "remit_to_name": "string or null",\n'
-        '    "remit_to_address": "string or null",\n'
-        '    "tax_id": "string or null",\n'
-        '    "tax_registration_number": "string or null",\n'
-        '    "vat_number": "string or null",\n'
-        '    "payment_terms": "string or null",\n'
-        '    "payment_method": "string or null",\n'
-        '    "payment_reference": "string or null",\n'
-        '    "bank_account_number": "string or null",\n'
-        '    "iban": "string or null",\n'
-        '    "swift_code": "string or null",\n'
-        '    "total_before_tax": "string or null",\n'
-        '    "tax_amount": "string or null",\n'
-        '    "tax_rate": "string or null",\n'
-        '    "shipping_charges": "string or null",\n'
-        '    "discount": "string or null",\n'
-        '    "total_due": "string or null",\n'
-        '    "amount_paid": "string or null",\n'
-        '    "balance_due": "string or null",\n'
-        '    "due_date": "string or null",\n'
-        '    "invoice_status": "string or null",\n'
-        '    "reference_number": "string or null",\n'
-        '    "project_code": "string or null",\n'
-        '    "department": "string or null",\n'
-        '    "contact_person": "string or null",\n'
-        '    "notes": "string or null",\n'
-        '    "additional_info": "string or null"\n'
-        '  },\n'
-        '  "line_items": [\n'
-        '    {\n'
-        '      "quantity": "string or null",\n'
-        '      "units": "string or null",\n'
-        '      "description": "string or null",\n'
-        '      "footage": "string or null",\n'
-        '      "price": "string or null",\n'
-        '      "amount": "string or null",\n'
-        '      "notes": "string or null"\n'
-        '    }\n'
-        '  ]\n'
-        '}'
-        "\nIf a field is missing for a line item or header, use null. "
         "Do not invent fields. Do not add any header or shipment data to any line item. Return ONLY the JSON object, no explanation.\n"
         "\nInvoice Text:\n"
         f"{txt}"
@@ -266,7 +224,7 @@ if po_file:
     st.sidebar.dataframe(po_df.head())
 st.title("Invoice/Document Extractor")
-mdl = st.selectbox("Model", list(MODELS.keys()), key="extract_model")
 inv_file = st.file_uploader(
     "Step 2: Upload Invoice or Document File",
     type=["pdf", "docx", "xlsx", "xls", "png", "jpg", "jpeg", "tiff"]
@@ -292,71 +250,94 @@ extracted_info = extracted_info or st.session_state.get("last_extracted_info", N
 def po_match_tool_func(input_text):
     invoice = st.session_state.get("last_extracted_info")
     po_df = st.session_state.get("po_df")
     if invoice is None or po_df is None:
         return "Invoice or PO data not found."
     inv_hdr = invoice["invoice_header"]
     inv_po_number = inv_hdr.get("purchase_order_number") or inv_hdr.get("order_number") or inv_hdr.get("our_order_number")
     inv_supplier = inv_hdr.get("supplier_name")
-    explanation = ""
     matched_po = None
-    if inv_po_number:
         for idx, row in po_df.iterrows():
-            if (
-                str(row.get("PO Number", "")).lower().replace(" ", "") == str(inv_po_number).lower().replace(" ", "")
-            ):
-                matched_po = row
-                explanation += f"Matched on PO Number: {inv_po_number}\n"
                 break
-    if matched_po is None and inv_supplier:
-        potential_matches = po_df[po_df["Supplier Name"].str.lower().str.strip() == inv_supplier.lower().strip()]
-        if not potential_matches.empty:
-            matched_po = potential_matches.iloc[0]
-            explanation += f"Matched on Supplier Name: {inv_supplier}\n"
     if matched_po is not None:
-        return f"PO matched: {matched_po.to_dict()}"
-    return "No matching PO found."
 if po_df is not None:
     st.session_state["po_df"] = po_df
 if extracted_info is not None and po_df is not None:
     if st.button("Make a decision (AI Agent)"):
         tools = [
             Tool(
                 name="po_match_tool",
                 func=po_match_tool_func,
-                description="Use this tool to check if the invoice matches any PO in the current PO list.",
             )
         ]
-        openai_api_key = os.getenv("OPENAI_API_KEY")
-        llm = ChatOpenAI(
-            openai_api_key=openai_api_key,
-            model="gpt-4-1106-preview",
             temperature=0,
             streaming=False,
         )
         agent = initialize_agent(
             tools,
-            llm,
             agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
             verbose=True,
         )
         prompt = (
-            f"Below is an extracted invoice in JSON and a list of active POs is loaded in the system. "
-            f"Use po_match_tool to check if the invoice matches any PO in the current PO list. "
-            f"Step by step, reason whether the invoice matches an active PO and can be approved. "
-            f"If there is a match, state the matched PO, otherwise explain why not. "
-            f"At the end, respond with a JSON like this: "
-            f'{{"decision": "APPROVED or REJECTED", "reason": "<short explanation>"}}.\n'
             f"Invoice JSON:\n{json.dumps(extracted_info, indent=2)}"
         )
         with st.spinner("AI is reasoning and making a decision..."):
             result = agent.run(prompt)
         try:
             result_json = json.loads(result)
-            st.subheader("AI Decision")
             st.write(f"**Decision:** {result_json.get('decision', 'N/A')}")
             st.write(f"**Reason:** {result_json.get('reason', 'N/A')}")
         except Exception:

 # -------- LLM Model Setup --------
 MODELS = {
+    "DeepSeek v3": {
+        "api_url": "https://api.deepseek.com/v1/chat/completions",
+        "model": "deepseek-chat",
+        "key_env": "DEEPSEEK_API_KEY",
+        "response_format": {"type": "json_object"},
+    },
+    "DeepSeek R1": {
+        "api_url": "https://api.deepseek.com/v1/chat/completions",
+        "model": "deepseek-reasoner",
+        "key_env": "DEEPSEEK_API_KEY",
+        "response_format": None,
+    },
     "OpenAI GPT-4.1": {
         "api_url": "https://api.openai.com/v1/chat/completions",
         "model": "gpt-4-1106-preview",
         "response_format": None,
         "extra_headers": {},
     },
+    "Mistral Small": {
+        "api_url": "https://openrouter.ai/api/v1/chat/completions",
+        "model": "mistralai/ministral-8b",
+        "key_env": "OPENROUTER_API_KEY",
+        "response_format": {"type": "json_object"},
+        "extra_headers": {
+            "HTTP-Referer": "https://huggingface.co",
+            "X-Title": "Invoice Extractor",
+        },
+    },
 }
 def get_api_key(model_choice):
         "Shipment/invoice-level fields such as CAR NUMBER, SHIPPING POINT, SHIPMENT NUMBER, CURRENCY, etc., must go ONLY into the 'invoice_header', not as line item fields.\n"
         "Use this schema:\n"
         '{\n'
+        '  "invoice_header": {...},\n'
+        '  "line_items": [ {...} ]\n'
+        '}\n'
+        "If a field is missing for a line item or header, use null. "
         "Do not invent fields. Do not add any header or shipment data to any line item. Return ONLY the JSON object, no explanation.\n"
         "\nInvoice Text:\n"
         f"{txt}"
     st.sidebar.dataframe(po_df.head())
 st.title("Invoice/Document Extractor")
+mdl = st.selectbox("Model for Extraction", list(MODELS.keys()), key="extract_model")
 inv_file = st.file_uploader(
     "Step 2: Upload Invoice or Document File",
     type=["pdf", "docx", "xlsx", "xls", "png", "jpg", "jpeg", "tiff"]
 def po_match_tool_func(input_text):
     invoice = st.session_state.get("last_extracted_info")
     po_df = st.session_state.get("po_df")
     if invoice is None or po_df is None:
         return "Invoice or PO data not found."
     inv_hdr = invoice["invoice_header"]
     inv_po_number = inv_hdr.get("purchase_order_number") or inv_hdr.get("order_number") or inv_hdr.get("our_order_number")
     inv_supplier = inv_hdr.get("supplier_name")
+    inv_line_items = invoice.get("line_items", [])
+    # Try to match PO by number or supplier name
     matched_po = None
+    explanation = ""
+    for idx, row in po_df.iterrows():
+        po_number = str(row.get("PO Number", "")).lower().replace(" ", "")
+        supplier = str(row.get("Supplier Name", "")).lower().strip()
+        if inv_po_number and po_number == str(inv_po_number).lower().replace(" ", ""):
+            matched_po = row
+            explanation += f"Matched on PO Number: {inv_po_number}. "
+            break
+        elif inv_supplier and supplier == inv_supplier.lower().strip():
+            matched_po = row
+            explanation += f"Matched on Supplier Name: {inv_supplier}. "
+            break
+    # If no direct match, try to match by line items
+    if matched_po is None and len(inv_line_items) > 0:
         for idx, row in po_df.iterrows():
+            po_desc = str(row.get("Description", "")).lower()
+            for line in inv_line_items:
+                if line.get("description") and line["description"].lower() in po_desc:
+                    matched_po = row
+                    explanation += f"Matched on line item description: '{line['description']}'. "
+                    break
+            if matched_po is not None:
                 break
     if matched_po is not None:
+        return f"PO matched: {matched_po.to_dict()}. {explanation}"
+    return "No matching PO found based on PO Number, Supplier, or Line Items."
 if po_df is not None:
     st.session_state["po_df"] = po_df
+# -------------- DECISION SECTION --------------
 if extracted_info is not None and po_df is not None:
+    st.markdown("---")
+    st.subheader("AI Agent Decision")
+    decision_model = st.selectbox(
+        "Model for AI Decision",
+        list(MODELS.keys()),
+        key="decision_model"
+    )
     if st.button("Make a decision (AI Agent)"):
         tools = [
             Tool(
                 name="po_match_tool",
                 func=po_match_tool_func,
+                description="Use this tool to check if the invoice matches any PO in the current PO list, including by line items.",
             )
         ]
+        decision_llm = ChatOpenAI(
+            openai_api_key=get_api_key(decision_model),
+            model=MODELS[decision_model]["model"],
             temperature=0,
             streaming=False,
         )
         agent = initialize_agent(
             tools,
+            decision_llm,
             agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
             verbose=True,
         )
         prompt = (
+            "You are an expert accounts payable decision agent.\n"
+            "You are given an extracted invoice in JSON and have access to a tool called po_match_tool, which can check for matches with all available POs (including matching line items/descriptions between invoice and PO).\n"
+            "To approve an invoice, you must verify at least one of the following:\n"
+            "- The PO number matches a PO\n"
+            "- The supplier name matches a PO\n"
+            "- At least one line item description or quantity/price matches with a PO's item\n"
+            "If you can't match on PO number or supplier, do your best to match using the invoice's line items (description/quantity/unit price/etc) and the PO data, and explain your reasoning step by step."
+            "In your reasoning, list all fields and line items that matched, or say if nothing matched (be specific about what was compared)."
+            "At the end, respond in this JSON format ONLY:\n"
+            '{"decision": "APPROVED or REJECTED", "reason": "<detailed step-by-step explanation for your decision, showing what matched and what did not, including line item checks>"}\n'
             f"Invoice JSON:\n{json.dumps(extracted_info, indent=2)}"
         )
         with st.spinner("AI is reasoning and making a decision..."):
             result = agent.run(prompt)
         try:
             result_json = json.loads(result)
             st.write(f"**Decision:** {result_json.get('decision', 'N/A')}")
             st.write(f"**Reason:** {result_json.get('reason', 'N/A')}")
         except Exception: