AP_AGENT

Sleeping

App Files Files Community

Seth0330 commited on Jun 17, 2025

Commit

1d73f48

verified ·

1 Parent(s): 042447e

Update app.py

Browse files

Files changed (1) hide show

app.py +89 -15

app.py CHANGED Viewed

@@ -121,15 +121,88 @@ def get_extraction_prompt(model_choice, txt):
         "Shipment/invoice-level fields such as CAR NUMBER, SHIPPING POINT, SHIPMENT NUMBER, CURRENCY, etc., must go ONLY into the 'invoice_header', not as line item fields.\n"
         "Use this schema:\n"
         '{\n'
-        '  "invoice_header": {...},\n'
-        '  "line_items": [ {...} ]\n'
-        '}\n'
-        "If a field is missing for a line item or header, use null. "
         "Do not invent fields. Do not add any header or shipment data to any line item. Return ONLY the JSON object, no explanation.\n"
         "\nInvoice Text:\n"
         f"{txt}"
     )
 def extract_invoice_info(model_choice, text):
     prompt = get_extraction_prompt(model_choice, text)
     raw = query_llm(model_choice, prompt)
@@ -145,6 +218,8 @@ def extract_invoice_info(model_choice, text):
         hdr.setdefault(k, None)
     if not hdr.get("supplier_name"):
         hdr["supplier_name"] = fallback_supplier(text)
     items = data.get("line_items", [])
     if not isinstance(items, list):
         items = []
@@ -188,7 +263,7 @@ def extract_text_from_unstract(uploaded_file):
     status_url = f"{UNSTRACT_BASE}/whisper-status?whisper_hash={whisper_hash}"
     status_placeholder = st.empty()
-    for i in range(30):  # Wait up to 60s (2s x 30)
         status_r = requests.get(status_url, headers={"unstract-key": UNSTRACT_API_KEY})
         if status_r.status_code != 200:
             st.error(f"Unstract: Error checking status: {status_r.status_code} - {status_r.text}")
@@ -214,8 +289,6 @@ def extract_text_from_unstract(uploaded_file):
     except Exception:
         return r.text
-# --- Utility functions for fuzzy and normalized matching ---
 def clean_num(val):
     if not val: return None
     val = re.sub(r"[^0-9.\-]", "", str(val))
@@ -228,7 +301,6 @@ def normalize(s):
     if not s: return ""
     return re.sub(r"\W+", "", str(s).lower().strip())
-# --------- Upload PO CSV ---------
 st.sidebar.header("Step 1: Upload Active Purchase Orders (POs)")
 po_file = st.sidebar.file_uploader(
     "Upload POs CSV (must include PO number, Supplier, Items, etc.)",
@@ -240,6 +312,7 @@ if po_file:
     po_df = pd.read_csv(po_file)
     st.sidebar.success(f"Loaded {len(po_df)} Purchase Orders.")
     st.sidebar.dataframe(po_df.head())
 st.title("Invoice/Document Extractor")
 mdl = st.selectbox("Model for Extraction", list(MODELS.keys()), key="extract_model")
@@ -247,7 +320,6 @@ inv_file = st.file_uploader(
     "Step 2: Upload Invoice or Document File",
     type=["pdf", "docx", "xlsx", "xls", "png", "jpg", "jpeg", "tiff"]
 )
-extracted_info = None
 if st.button("Extract") and inv_file:
     with st.spinner("Extracting text from document using Unstract..."):
@@ -255,19 +327,22 @@ if st.button("Extract") and inv_file:
     if text:
         extracted_info = extract_invoice_info(mdl, text)
         if extracted_info:
             st.success("Extraction Complete")
             st.subheader("Invoice Metadata")
             st.table([{k.replace("_", " ").title(): v for k, v in extracted_info["invoice_header"].items()}])
             st.subheader("Line Items")
             st.table(extracted_info["line_items"])
-            st.session_state["last_extracted_info"] = extracted_info  # store in session
-extracted_info = extracted_info or st.session_state.get("last_extracted_info", None)
-# --------- Classic ReAct AGENT ---------
 def po_match_tool_func(input_text):
     invoice = st.session_state.get("last_extracted_info")
-    po_df = st.session_state.get("po_df")
     debug = {}
     if invoice is None or po_df is None:
         return json.dumps({
@@ -383,9 +458,8 @@ def po_match_tool_func(input_text):
         })
 if po_df is not None:
-    st.session_state["po_df"] = po_df
-# -------------- DECISION SECTION --------------
 if extracted_info is not None and po_df is not None:
     st.markdown("---")
     st.subheader("AI Agent Decision")

         "Shipment/invoice-level fields such as CAR NUMBER, SHIPPING POINT, SHIPMENT NUMBER, CURRENCY, etc., must go ONLY into the 'invoice_header', not as line item fields.\n"
         "Use this schema:\n"
         '{\n'
+        '  "invoice_header": {\n'
+        '    "car_number": "string or null",\n'
+        '    "shipment_number": "string or null",\n'
+        '    "shipping_point": "string or null",\n'
+        '    "currency": "string or null",\n'
+        '    "invoice_number": "string or null",\n'
+        '    "invoice_date": "string or null",\n'
+        '    "order_number": "string or null",\n'
+        '    "customer_order_number": "string or null",\n'
+        '    "our_order_number": "string or null",\n'
+        '    "sales_order_number": "string or null",\n'
+        '    "purchase_order_number": "string or null",\n'
+        '    "order_date": "string or null",\n'
+        '    "supplier_name": "string or null",\n'
+        '    "supplier_address": "string or null",\n'
+        '    "supplier_phone": "string or null",\n'
+        '    "supplier_email": "string or null",\n'
+        '    "supplier_tax_id": "string or null",\n'
+        '    "customer_name": "string or null",\n'
+        '    "customer_address": "string or null",\n'
+        '    "customer_phone": "string or null",\n'
+        '    "customer_email": "string or null",\n'
+        '    "customer_tax_id": "string or null",\n'
+        '    "ship_to_name": "string or null",\n'
+        '    "ship_to_address": "string or null",\n'
+        '    "bill_to_name": "string or null",\n'
+        '    "bill_to_address": "string or null",\n'
+        '    "remit_to_name": "string or null",\n'
+        '    "remit_to_address": "string or null",\n'
+        '    "tax_id": "string or null",\n'
+        '    "tax_registration_number": "string or null",\n'
+        '    "vat_number": "string or null",\n'
+        '    "payment_terms": "string or null",\n'
+        '    "payment_method": "string or null",\n'
+        '    "payment_reference": "string or null",\n'
+        '    "bank_account_number": "string or null",\n'
+        '    "iban": "string or null",\n'
+        '    "swift_code": "string or null",\n'
+        '    "total_before_tax": "string or null",\n'
+        '    "tax_amount": "string or null",\n'
+        '    "tax_rate": "string or null",\n'
+        '    "shipping_charges": "string or null",\n'
+        '    "discount": "string or null",\n'
+        '    "total_due": "string or null",\n'
+        '    "amount_paid": "string or null",\n'
+        '    "balance_due": "string or null",\n'
+        '    "due_date": "string or null",\n'
+        '    "invoice_status": "string or null",\n'
+        '    "reference_number": "string or null",\n'
+        '    "project_code": "string or null",\n'
+        '    "department": "string or null",\n'
+        '    "contact_person": "string or null",\n'
+        '    "notes": "string or null",\n'
+        '    "additional_info": "string or null"\n'
+        '  },\n'
+        '  "line_items": [\n'
+        '    {\n'
+        '      "quantity": "string or null",\n'
+        '      "units": "string or null",\n'
+        '      "description": "string or null",\n'
+        '      "footage": "string or null",\n'
+        '      "price": "string or null",\n'
+        '      "amount": "string or null",\n'
+        '      "notes": "string or null"\n'
+        '    }\n'
+        '  ]\n'
+        '}'
+        "\nIf a field is missing for a line item or header, use null. "
         "Do not invent fields. Do not add any header or shipment data to any line item. Return ONLY the JSON object, no explanation.\n"
         "\nInvoice Text:\n"
         f"{txt}"
     )
+def ensure_total_due(invoice_header):
+    # If total_due is missing, try to find a close equivalent
+    if invoice_header.get("total_due") in [None, ""]:
+        for field in ["invoice_total", "invoice_value", "total_before_tax", "balance_due", "amount_paid"]:
+            if field in invoice_header and invoice_header[field]:
+                invoice_header["total_due"] = invoice_header[field]
+                break
+    return invoice_header
 def extract_invoice_info(model_choice, text):
     prompt = get_extraction_prompt(model_choice, text)
     raw = query_llm(model_choice, prompt)
         hdr.setdefault(k, None)
     if not hdr.get("supplier_name"):
         hdr["supplier_name"] = fallback_supplier(text)
+    # Guarantee total_due is always present (if at all possible)
+    hdr = ensure_total_due(hdr)
     items = data.get("line_items", [])
     if not isinstance(items, list):
         items = []
     status_url = f"{UNSTRACT_BASE}/whisper-status?whisper_hash={whisper_hash}"
     status_placeholder = st.empty()
+    for i in range(30):
         status_r = requests.get(status_url, headers={"unstract-key": UNSTRACT_API_KEY})
         if status_r.status_code != 200:
             st.error(f"Unstract: Error checking status: {status_r.status_code} - {status_r.text}")
     except Exception:
         return r.text
 def clean_num(val):
     if not val: return None
     val = re.sub(r"[^0-9.\-]", "", str(val))
     if not s: return ""
     return re.sub(r"\W+", "", str(s).lower().strip())
 st.sidebar.header("Step 1: Upload Active Purchase Orders (POs)")
 po_file = st.sidebar.file_uploader(
     "Upload POs CSV (must include PO number, Supplier, Items, etc.)",
     po_df = pd.read_csv(po_file)
     st.sidebar.success(f"Loaded {len(po_df)} Purchase Orders.")
     st.sidebar.dataframe(po_df.head())
+    st.session_state['last_po_df'] = po_df  # Save PO to session
 st.title("Invoice/Document Extractor")
 mdl = st.selectbox("Model for Extraction", list(MODELS.keys()), key="extract_model")
     "Step 2: Upload Invoice or Document File",
     type=["pdf", "docx", "xlsx", "xls", "png", "jpg", "jpeg", "tiff"]
 )
 if st.button("Extract") and inv_file:
     with st.spinner("Extracting text from document using Unstract..."):
     if text:
         extracted_info = extract_invoice_info(mdl, text)
         if extracted_info:
+            if "invoice_header" in extracted_info:
+                extracted_info["invoice_header"] = ensure_total_due(extracted_info["invoice_header"])
             st.success("Extraction Complete")
             st.subheader("Invoice Metadata")
             st.table([{k.replace("_", " ").title(): v for k, v in extracted_info["invoice_header"].items()}])
             st.subheader("Line Items")
             st.table(extracted_info["line_items"])
+            st.session_state['last_extracted_info'] = extracted_info
+# Always retrieve latest extracted info and PO df from session state!
+extracted_info = st.session_state.get('last_extracted_info', None)
+po_df = st.session_state.get('last_po_df', None)
 def po_match_tool_func(input_text):
     invoice = st.session_state.get("last_extracted_info")
+    po_df = st.session_state.get("last_po_df")
     debug = {}
     if invoice is None or po_df is None:
         return json.dumps({
         })
 if po_df is not None:
+    st.session_state["last_po_df"] = po_df
 if extracted_info is not None and po_df is not None:
     st.markdown("---")
     st.subheader("AI Agent Decision")