DOC_VALID_AGENT

Sleeping

App Files Files Community

Seth0330 commited on Jun 17, 2025

Commit

2cf6487

verified ·

1 Parent(s): fc63d23

Update app.py

Browse files

Files changed (1) hide show

app.py +195 -421

app.py CHANGED Viewed

@@ -6,58 +6,122 @@ import os
 import time
 import mimetypes
 import pandas as pd
 from langchain_community.chat_models import ChatOpenAI
 from langchain.agents import initialize_agent, Tool, AgentType
 from fuzzywuzzy import fuzz
-st.set_page_config(page_title="Accounts Payable AI Agent", layout="wide")
-MODELS = {
-    "OpenAI GPT-4.1": {
-        "api_url": "https://api.openai.com/v1/chat/completions",
-        "model": "gpt-4-1106-preview",
-        "key_env": "OPENAI_API_KEY",
-        "response_format": None,
-        "extra_headers": {},
-    },
 }
-def get_api_key(model_choice):
-    key = os.getenv(MODELS[model_choice]["key_env"])
     if not key:
-        st.error(f"❌ {MODELS[model_choice]['key_env']} not set")
         st.stop()
     return key
-def query_llm(model_choice, prompt):
-    cfg = MODELS[model_choice]
     headers = {
-        "Authorization": f"Bearer {get_api_key(model_choice)}",
         "Content-Type": "application/json",
     }
-    if cfg.get("extra_headers"):
-        headers.update(cfg["extra_headers"])
     payload = {
-        "model": cfg["model"],
         "messages": [{"role": "user", "content": prompt}],
         "temperature": 0.1,
         "max_tokens": 2000,
     }
-    if cfg.get("response_format"):
-        payload["response_format"] = cfg["response_format"]
-    try:
-        with st.spinner(f"🔍 Fine Tuning The Extracted Data..."):
-            r = requests.post(cfg["api_url"], headers=headers, json=payload, timeout=90)
-        if r.status_code != 200:
-            st.error(f"🚨 API Error {r.status_code}: {r.text}")
-            return None
-        content = r.json()["choices"][0]["message"]["content"]
-        st.session_state.last_api = content
-        st.session_state.last_raw = r.text
-        return content
-    except Exception as e:
-        st.error(f"Connection error: {e}")
         return None
 def clean_json_response(text):
     if not text:
@@ -82,102 +146,28 @@ def clean_json_response(text):
             st.code(frag)
             return None
-def fallback_supplier(text):
-    for line in text.splitlines():
-        line = line.strip()
-        if line:
-            return line
-    return None
-def get_extraction_prompt(model_choice, txt):
     return (
-        "You are an expert invoice parser. "
-        "Extract data according to the visible table structure and column headers in the invoice. "
-        "For every line item, only extract fields that correspond to the table columns for that row (do not include header/shipment fields in line items). "
-        "Merge all multi-line content within a single cell into that field (especially for the 'description' and 'notes'). "
-        "Shipment/invoice-level fields such as CAR NUMBER, SHIPPING POINT, SHIPMENT NUMBER, CURRENCY, etc., must go ONLY into the 'invoice_header', not as line item fields.\n"
         "Use this schema:\n"
-        '{\n'
-        '  "invoice_header": {\n'
-        '    "car_number": "string or null",\n'
-        '    "shipment_number": "string or null",\n'
-        '    "shipping_point": "string or null",\n'
-        '    "currency": "string or null",\n'
-        '    "invoice_number": "string or null",\n'
-        '    "invoice_date": "string or null",\n'
-        '    "order_number": "string or null",\n'
-        '    "customer_order_number": "string or null",\n'
-        '    "our_order_number": "string or null",\n'
-        '    "sales_order_number": "string or null",\n'
-        '    "purchase_order_number": "string or null",\n'
-        '    "order_date": "string or null",\n'
-        '    "supplier_name": "string or null",\n'
-        '    "supplier_address": "string or null",\n'
-        '    "supplier_phone": "string or null",\n'
-        '    "supplier_email": "string or null",\n'
-        '    "supplier_tax_id": "string or null",\n'
-        '    "customer_name": "string or null",\n'
-        '    "customer_address": "string or null",\n'
-        '    "customer_phone": "string or null",\n'
-        '    "customer_email": "string or null",\n'
-        '    "customer_tax_id": "string or null",\n'
-        '    "ship_to_name": "string or null",\n'
-        '    "ship_to_address": "string or null",\n'
-        '    "bill_to_name": "string or null",\n'
-        '    "bill_to_address": "string or null",\n'
-        '    "remit_to_name": "string or null",\n'
-        '    "remit_to_address": "string or null",\n'
-        '    "tax_id": "string or null",\n'
-        '    "tax_registration_number": "string or null",\n'
-        '    "vat_number": "string or null",\n'
-        '    "payment_terms": "string or null",\n'
-        '    "payment_method": "string or null",\n'
-        '    "payment_reference": "string or null",\n'
-        '    "bank_account_number": "string or null",\n'
-        '    "iban": "string or null",\n'
-        '    "swift_code": "string or null",\n'
-        '    "total_before_tax": "string or null",\n'
-        '    "tax_amount": "string or null",\n'
-        '    "tax_rate": "string or null",\n'
-        '    "shipping_charges": "string or null",\n'
-        '    "discount": "string or null",\n'
-        '    "total_due": "string or null",\n'
-        '    "amount_paid": "string or null",\n'
-        '    "balance_due": "string or null",\n'
-        '    "due_date": "string or null",\n'
-        '    "invoice_status": "string or null",\n'
-        '    "reference_number": "string or null",\n'
-        '    "project_code": "string or null",\n'
-        '    "department": "string or null",\n'
-        '    "contact_person": "string or null",\n'
-        '    "notes": "string or null",\n'
-        '    "additional_info": "string or null"\n'
-        '  },\n'
-        '  "line_items": [\n'
-        '    {\n'
-        '      "quantity": "string or null",\n'
-        '      "units": "string or null",\n'
-        '      "description": "string or null",\n'
-        '      "footage": "string or null",\n'
-        '      "price": "string or null",\n'
-        '      "amount": "string or null",\n'
-        '      "notes": "string or null"\n'
-        '    }\n'
-        '  ]\n'
-        '}'
-        "\nIf a field is missing for a line item or header, use null. "
-        "Do not invent fields. Do not add any header or shipment data to any line item. Return ONLY the JSON object, no explanation.\n"
         "\nInvoice Text:\n"
         f"{txt}"
     )
-def ensure_total_due(invoice_header):
-    if invoice_header.get("total_due") in [None, ""]:
-        for field in ["invoice_total", "invoice_value", "total_before_tax", "balance_due", "amount_paid"]:
-            if field in invoice_header and invoice_header[field]:
-                invoice_header["total_due"] = invoice_header[field]
-                break
-    return invoice_header
 def clean_num(val):
     if val is None:
@@ -197,246 +187,85 @@ def weighted_fuzzy_score(s1, s2):
         return 100
     return fuzz.token_set_ratio(str(s1).lower(), str(s2).lower())
-def find_po_number_in_json(po_number, invoice_json):
-    def _flatten(obj):
-        fields = []
-        if isinstance(obj, dict):
-            for v in obj.values():
-                fields.extend(_flatten(v))
-        elif isinstance(obj, list):
-            for item in obj:
-                fields.extend(_flatten(item))
-        elif obj is not None:
-            fields.append(str(obj))
-        return fields
-    po_str = str(po_number).strip().replace(" ", "").replace(".0", "")
-    try:
-        po_int = str(int(float(po_number)))
-    except:
-        po_int = po_str
-    all_strs = [str(s).strip().replace(" ", "").replace(".0", "") for s in _flatten(invoice_json)]
-    for s in all_strs:
-        if not s:
-            continue
-        if po_str and (po_str in s or s in po_str):
-            return True
-        if po_int and (po_int in s or s in po_int):
-            return True
-    return False
-# --- Step 1: Upload POs CSV (very top) ---
-st.sidebar.header("Step 1: Upload Active Purchase Orders (POs)")
-po_file = st.sidebar.file_uploader(
-    "Upload POs CSV (must include PO number, Supplier, Items, etc.)",
-    type=["csv"],
-    key="po_csv"
-)
-po_df = None
-if po_file:
-    po_df = pd.read_csv(po_file)
-    st.sidebar.success(f"Loaded {len(po_df)} rows from uploaded CSV.")
-    st.sidebar.dataframe(po_df.head())
-    st.session_state['last_po_df'] = po_df  # Save PO to session
-# --- Set Scoring Weights (Total = 100%) ---
-st.sidebar.header("Set Scoring Weights (Total = 100%)")
-def int_slider(label, value, key):
-    # A slider with number input
-    return st.sidebar.slider(label, 0, 100, value, 1, key=key, format="%d")
-weight_supplier = int_slider("Supplier Name Weight (%)", 25, "w_supplier")
-weight_po_number = int_slider("PO Number Weight (%)", 25, "w_po")
-weight_currency = int_slider("Currency Weight (%)", 10, "w_curr")
-weight_total_due = int_slider("Total Due Weight (%)", 20, "w_due")
-weight_line_item = int_slider("Line Item Weight (%)", 20, "w_line")
-weight_sum = weight_supplier + weight_po_number + weight_currency + weight_total_due + weight_line_item
-if weight_sum != 100:
-    st.sidebar.warning(f"Sum of weights is {weight_sum}%. Adjust so it equals 100%.")
-# --- Thresholds for decision ---
-st.sidebar.header("Set Decision Thresholds")
-approved_threshold = st.sidebar.slider("Threshold for 'APPROVED'", min_value=0, max_value=100, value=85, format="%d")
-partial_threshold = st.sidebar.slider("Threshold for 'PARTIALLY APPROVED'", min_value=0, max_value=approved_threshold-1, value=70, format="%d")
-def find_best_po_match(inv, po_df):
     inv_hdr = inv["invoice_header"]
     inv_supplier = inv_hdr.get("supplier_name") or ""
-    inv_po_number = inv_hdr.get("purchase_order_number") or inv_hdr.get("po_number") or inv_hdr.get("order_number") or ""
     inv_currency = inv_hdr.get("currency") or ""
     inv_total_due = clean_num(inv_hdr.get("total_due"))
     inv_line_items = inv.get("line_items", [])
     scores = []
     for idx, row in po_df.iterrows():
         po_supplier = row.get("Supplier Name", "")
         po_po_number = str(row.get("PO Number", ""))
         po_currency = row.get("Currency", "")
         po_total = clean_num(row.get("PO Total Value", ""))
-        po_desc = row.get("Item Description", "")
-        po_qty = str(row.get("Item Quantity", ""))
-        po_unit = str(row.get("Item Unit Price", ""))
-        po_line_total = clean_num(row.get("Line Item Total", ""))
-        field_details = []
         s_supplier = weighted_fuzzy_score(inv_supplier, po_supplier)
-        field_details.append({
-            "field": "Supplier Name",
-            "invoice": inv_supplier,
-            "po": po_supplier,
-            "score": s_supplier
-        })
-        s_po_number = 100 if find_po_number_in_json(po_po_number, inv) else 0
-        field_details.append({
-            "field": "PO Number (anywhere in JSON)",
-            "invoice": "found" if s_po_number else "not found",
-            "po": po_po_number,
-            "score": s_po_number
-        })
         s_currency = weighted_fuzzy_score(inv_currency, po_currency)
-        field_details.append({
-            "field": "Currency",
-            "invoice": inv_currency,
-            "po": po_currency,
-            "score": s_currency
-        })
         s_total = 100 if inv_total_due is not None and po_total is not None and abs(inv_total_due - po_total) < 2 else 0
-        field_details.append({
-            "field": "Total Due",
-            "invoice": inv_total_due,
-            "po": po_total,
-            "score": s_total
-        })
-        # Line item logic as before
         line_item_score = 0
-        line_reason = ""
-        best_line_detail = None
         for line in inv_line_items:
-            desc_score = weighted_fuzzy_score(line.get("description", ""), po_desc)
-            qty_score = 100 if clean_num(line.get("quantity")) == clean_num(po_qty) else 0
-            unit_score = 100 if clean_num(line.get("price")) == clean_num(po_unit) else 0
-            amount_score = 100 if clean_num(line.get("amount")) == po_line_total else 0
             total = desc_score * 0.5 + qty_score * 0.2 + unit_score * 0.15 + amount_score * 0.15
-            detail = {
-                "field": "Line Item",
-                "invoice": {
-                    "description": line.get("description", ""),
-                    "quantity": line.get("quantity", ""),
-                    "price": line.get("price", ""),
-                    "amount": line.get("amount", ""),
-                },
-                "po": {
-                    "description": po_desc,
-                    "quantity": po_qty,
-                    "price": po_unit,
-                    "amount": po_line_total,
-                },
-                "desc_score": desc_score,
-                "qty_score": qty_score,
-                "unit_score": unit_score,
-                "amount_score": amount_score,
-                "line_item_score": total
-            }
             if total > line_item_score:
                 line_item_score = total
-                best_line_detail = detail
-                line_reason = (
-                    f"Best line item: desc_score={desc_score}, qty_score={qty_score}, "
-                    f"unit_score={unit_score}, amount_score={amount_score}"
-                )
-        wsum = weight_supplier + weight_po_number + weight_currency + weight_total_due + weight_line_item
         total_score = (
-            s_supplier * weight_supplier/100 +
-            s_po_number * weight_po_number/100 +
-            s_currency * weight_currency/100 +
-            s_total * weight_total_due/100 +
-            line_item_score * weight_line_item/100
-        ) if wsum == 100 else 0
         reason = (
-            f"Supplier match: {s_supplier}/100 (invoice: '{inv_supplier}' vs PO: '{po_supplier}'), "
-            f"PO Number: {s_po_number}/100 ({'found anywhere in JSON' if s_po_number else 'not found'}), "
-            f"Currency: {s_currency}/100 (invoice: '{inv_currency}' vs PO: '{po_currency}'), "
-            f"Total Due: {'match' if s_total else 'no match'} (invoice: {inv_total_due} vs PO: {po_total}), "
-            f"Line item best match: {int(line_item_score)}/100. {line_reason}"
         )
         debug = {
             "po_idx": idx,
-            "po_supplier": po_supplier,
-            "po_po_number": po_po_number,
-            "po_total": po_total,
-            "scores": field_details,
-            "line_item_score": line_item_score,
-            "best_line_detail": best_line_detail,
-            "total_score": total_score,
-            "line_reason": line_reason,
-            "inv_total_due": inv_total_due
         }
         scores.append((row, total_score, reason, debug))
     scores.sort(key=lambda tup: tup[1], reverse=True)
     if not scores:
         return None, 0, "No POs found.", {}
     best_row, best_score, reason, debug = scores[0]
     return best_row, best_score, reason, debug
-# --- Extraction, decision, and UI logic below is unchanged ---
-def extract_invoice_info(model_choice, text):
-    prompt = get_extraction_prompt(model_choice, text)
-    raw = query_llm(model_choice, prompt)
-    if not raw:
-        return None
-    data = clean_json_response(raw)
-    if not data:
-        return None
-    hdr = data.get("invoice_header", {})
-    if not hdr and any(k in data for k in ("invoice_number","supplier_name","customer_name")):
-        hdr = data
-    for k in ("invoice_number","invoice_date","po_number","invoice_value","supplier_name","customer_name"):
-        hdr.setdefault(k, None)
-    if not hdr.get("supplier_name"):
-        hdr["supplier_name"] = fallback_supplier(text)
-    hdr = ensure_total_due(hdr)
-    items = data.get("line_items", [])
-    if not isinstance(items, list):
-        items = []
-    for itm in items:
-        if not isinstance(itm, dict):
-            continue
-        for k in ("item_number","description","quantity","unit_price","total_price"):
-            itm.setdefault(k, None)
-    return {"invoice_header": hdr, "line_items": items}
-def get_content_type(filename):
-    mime, _ = mimetypes.guess_type(filename)
-    ext = filename.lower().split('.')[-1]
-    if ext == "pdf":
-        return "text/plain"
-    if mime is None:
-        return "application/octet-stream"
-    return mime
-UNSTRACT_BASE = "https://llmwhisperer-api.us-central.unstract.com/api/v2"
-UNSTRACT_API_KEY = os.getenv("UNSTRACT_API_KEY")
 def extract_text_from_unstract(uploaded_file):
     filename = getattr(uploaded_file, "name", "uploaded_file")
     file_bytes = uploaded_file.read()
-    content_type = get_content_type(filename)
     headers = {
-        "unstract-key": UNSTRACT_API_KEY,
         "Content-Type": content_type,
     }
-    url = f"{UNSTRACT_BASE}/whisper"
     with st.spinner("Uploading and processing document with EZOFIS AI OCR AGENT..."):
         r = requests.post(url, headers=headers, data=file_bytes)
         if r.status_code != 202:
@@ -446,11 +275,10 @@ def extract_text_from_unstract(uploaded_file):
         if not whisper_hash:
             st.error("Unstract: No whisper_hash received.")
             return None
-    status_url = f"{UNSTRACT_BASE}/whisper-status?whisper_hash={whisper_hash}"
     status_placeholder = st.empty()
     for i in range(30):
-        status_r = requests.get(status_url, headers={"unstract-key": UNSTRACT_API_KEY})
         if status_r.status_code != 200:
             st.error(f"Unstract: Error checking status: {status_r.status_code} - {status_r.text}")
             return None
@@ -463,9 +291,8 @@ def extract_text_from_unstract(uploaded_file):
     else:
         status_placeholder.error("Unstract: Timeout waiting for OCR to finish.")
         return None
-    retrieve_url = f"{UNSTRACT_BASE}/whisper-retrieve?whisper_hash={whisper_hash}&text_only=true"
-    r = requests.get(retrieve_url, headers={"unstract-key": UNSTRACT_API_KEY})
     if r.status_code != 200:
         st.error(f"Unstract: Error retrieving extracted text: {r.status_code} - {r.text}")
         return None
@@ -475,105 +302,52 @@ def extract_text_from_unstract(uploaded_file):
     except Exception:
         return r.text
-# --- Main page
-st.title("Invoice/Document Extractor")
-mdl = st.selectbox("Model for Extraction", list(MODELS.keys()), key="extract_model")
-inv_file = st.file_uploader(
-    "Step 2: Upload Invoice or Document File",
-    type=["pdf", "docx", "xlsx", "xls", "png", "jpg", "jpeg", "tiff"]
-)
 if st.button("Extract") and inv_file:
     with st.spinner("Extracting text from document using Unstract..."):
         text = extract_text_from_unstract(inv_file)
     if text:
-        extracted_info = extract_invoice_info(mdl, text)
         if extracted_info:
-            if "invoice_header" in extracted_info:
-                extracted_info["invoice_header"] = ensure_total_due(extracted_info["invoice_header"])
-            st.success("Extraction Complete")
-            st.subheader("Invoice Metadata")
-            st.table([{k.replace("_", " ").title(): v for k, v in extracted_info["invoice_header"].items()}])
-            st.subheader("Line Items")
-            st.table(extracted_info["line_items"])
             st.session_state['last_extracted_info'] = extracted_info
 extracted_info = st.session_state.get('last_extracted_info', None)
 po_df = st.session_state.get('last_po_df', None)
-def po_match_tool_func(input_text):
-    invoice = st.session_state.get("last_extracted_info")
-    po_df = st.session_state.get("last_po_df")
-    if invoice is None or po_df is None:
-        return json.dumps({
-            "decision": "REJECTED",
-            "reason": "Invoice or PO data not found.",
-            "debug": {},
-        })
-    best_row, best_score, reason, debug = find_best_po_match(invoice, po_df)
-    if best_score > approved_threshold:
-        status = "APPROVED"
-    elif best_score > partial_threshold:
-        status = "PARTIALLY APPROVED"
-    else:
-        status = "REJECTED"
-    return json.dumps({
-        "decision": status,
-        "reason": f"Best match score: {int(best_score)}/100. {reason}",
-        "debug": debug,
-        "po_row": best_row.to_dict() if best_row is not None else None
-    })
-if po_df is not None:
-    st.session_state["last_po_df"] = po_df
-if extracted_info is not None and po_df is not None:
-    st.markdown("---")
-    st.subheader("EZOFIS AP AGENT Decision (OpenAI Only)")
     if st.button("Make a decision (EZOFIS AP AGENT)"):
-        tools = [
-            Tool(
-                name="po_match_tool",
-                func=po_match_tool_func,
-                description="Smartly match invoice to PO using all possible fields.",
-            )
-        ]
-        decision_llm = ChatOpenAI(
-            openai_api_key=get_api_key("OpenAI GPT-4.1"),
-            model=MODELS["OpenAI GPT-4.1"]["model"],
-            temperature=0,
-            streaming=False,
-        )
-        agent = initialize_agent(
-            tools,
-            decision_llm,
-            agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
-            verbose=True,
-        )
-        prompt = (
-            "You are an expert accounts payable agent. "
-            "Use po_match_tool to check for the best possible match using supplier, PO number (which may appear anywhere in the invoice JSON, even within other fields), currency, line items, and total value. "
-            "Weigh the importance of each field as an expert would, according to the user-configured weights. "
-            "Return a JSON with decision (APPROVED, PARTIALLY APPROVED, REJECTED), reason (include field scores and reasoning), debug, and the best matched PO row.\n"
-            f"Invoice JSON:\n{json.dumps(extracted_info, indent=2)}"
-        )
-        with st.spinner("AI is reasoning and making a decision..."):
-            result = agent.run(prompt)
-        try:
-            result_json = json.loads(result)
-            st.write(f"**Decision:** {result_json.get('decision', 'N/A')}")
-            st.write(f"**Reason:** {result_json.get('reason', 'N/A')}")
-            with st.expander("Debug & Matching Details"):
-                st.json(result_json.get('debug'))
-                st.subheader("Extracted Invoice JSON")
-                st.json(extracted_info)
-                st.subheader("Matched PO Row")
-                st.json(result_json.get('po_row'))
-        except Exception:
-            st.subheader("AI Decision & Reason")
-            st.write(result)
-if "last_api" in st.session_state:
-    with st.expander("Debug"):
-        st.code(st.session_state.last_api)
-        st.code(st.session_state.last_raw)

 import time
 import mimetypes
 import pandas as pd
 from langchain_community.chat_models import ChatOpenAI
 from langchain.agents import initialize_agent, Tool, AgentType
 from fuzzywuzzy import fuzz
+# ---- Custom CSS to hide status and streamline look ----
+st.markdown("""
+<style>
+header[data-testid="stHeader"] {visibility: hidden;}
+#MainMenu, .stDeployButton {visibility: hidden;}
+.st-bb, .st-c6, .stDataFrameContainer, .stDataFrame {background: transparent !important;}
+.stButton>button {
+    background: linear-gradient(90deg, #1e88e5 0%, #0057b8 100%);
+    color: #fff !important;
+    border-radius: 8px !important;
+    font-weight: 600;
+    border: none;
+    box-shadow: 0 2px 8px rgba(30,136,229,0.15);
 }
+.stButton>button:hover {background: #1565c0;}
+</style>
+""", unsafe_allow_html=True)
+# ---- Sidebar ----
+with st.sidebar:
+    st.markdown("<div style='font-size:1.25em; font-weight:700; margin-bottom:0.2em; margin-top:0.7em;'>Step 1: Upload Active Purchase Orders (POs)</div>", unsafe_allow_html=True)
+    st.markdown("<div style='color:#eee; margin-bottom:1.1em; font-size:1em;'>Upload a POs CSV (must include PO number, Supplier, Items, etc.)</div>", unsafe_allow_html=True)
+    po_file = st.file_uploader(
+        "", type=["csv"], key="po_csv", label_visibility="collapsed"
+    )
+    po_df = None
+    if po_file:
+        po_df = pd.read_csv(po_file)
+        st.session_state['last_po_df'] = po_df
+        st.success(f"{len(po_df)} rows uploaded and active.", icon="✅")
+    else:
+        st.markdown("<span style='color:#bbc2cf; font-size:0.9em'>No PO file uploaded yet.</span>", unsafe_allow_html=True)
+    st.markdown("<hr style='border:0.5px solid #324259; margin:2em 0 1em 0;'/>")
+    st.markdown("<span style='color:#b6b8bc; font-size:0.93em;'>Need help? <b>Contact your admin</b></span>", unsafe_allow_html=True)
+# ---- Scoring Weights Section ----
+st.markdown('<div style="font-size:2rem;font-weight:700;color:#1e2a3a;margin-bottom:0.2em;margin-top:0.5em;">Invoice/Document Extractor</div>', unsafe_allow_html=True)
+st.markdown('<div style="color:#6073a3; margin-bottom:1.3em;">Digitally process and approve invoices with AI-powered PO matching.</div>', unsafe_allow_html=True)
+with st.container():
+    st.markdown('<div style="background:#fff;border-radius:14px;box-shadow:0 4px 32px rgba(34,48,90,0.09),0 1.5px 3.5px rgba(30,136,229,0.07);padding:2rem 2.5rem 1.5rem 2.5rem;margin-bottom:1.5em;">', unsafe_allow_html=True)
+    st.markdown("<h3>Set Scoring Weights (Total = 100%)</h3>", unsafe_allow_html=True)
+    if "scoring_weights" not in st.session_state:
+        st.session_state.scoring_weights = {
+            "Supplier": 20,
+            "PO Number": 25,
+            "Currency": 10,
+            "Total Due": 25,
+            "Line Item": 20,
+        }
+    scoring_weights = st.session_state.scoring_weights
+    total_weight = 0
+    cols = st.columns(len(scoring_weights))
+    field_keys = list(scoring_weights.keys())
+    for i, field in enumerate(field_keys):
+        val = cols[i].number_input(
+            f"{field} (%)",
+            min_value=0, max_value=100,
+            value=int(scoring_weights[field]),
+            key=f"scoring_{field}",
+            step=1,
+            format="%d"
+        )
+        scoring_weights[field] = val
+        total_weight += val
+    st.markdown(
+        f"<span style='font-size:1em; color:{'#E53935' if total_weight != 100 else '#3BB273'}; font-weight:600;'>"
+        f"Total = {total_weight}/100</span>",
+        unsafe_allow_html=True
+    )
+    if total_weight != 100:
+        st.warning("Scoring weights must sum to 100!", icon="⚠️")
+    st.markdown("</div>", unsafe_allow_html=True)
+# ---- Upload Invoice/Document ----
+with st.container():
+    st.markdown('<div style="background:#fff;border-radius:14px;box-shadow:0 4px 32px rgba(34,48,90,0.09),0 1.5px 3.5px rgba(30,136,229,0.07);padding:2rem 2.5rem 1.5rem 2.5rem;margin-bottom:1.5em;">', unsafe_allow_html=True)
+    st.markdown("<h3>Step 2: Upload Invoice or Document</h3>", unsafe_allow_html=True)
+    # -- NO model dropdown! --
+    inv_file = st.file_uploader(
+        "", type=["pdf", "docx", "xlsx", "xls", "png", "jpg", "jpeg", "tiff"], label_visibility="collapsed"
+    )
+    st.markdown("</div>", unsafe_allow_html=True)
+# ========== BUSINESS LOGIC FUNCTIONS (INSERTS) ==========
+def get_api_key():
+    key = os.getenv("OPENAI_API_KEY")
     if not key:
+        st.error("❌ OPENAI_API_KEY not set")
         st.stop()
     return key
+def query_llm(prompt):
+    api_url = "https://api.openai.com/v1/chat/completions"
     headers = {
+        "Authorization": f"Bearer {get_api_key()}",
         "Content-Type": "application/json",
     }
     payload = {
+        "model": "gpt-4-1106-preview",
         "messages": [{"role": "user", "content": prompt}],
         "temperature": 0.1,
         "max_tokens": 2000,
     }
+    with st.spinner(f"🔍 Fine Tuning The Extracted Data..."):
+        r = requests.post(api_url, headers=headers, json=payload, timeout=90)
+    if r.status_code != 200:
+        st.error(f"🚨 API Error {r.status_code}: {r.text}")
         return None
+    return r.json()["choices"][0]["message"]["content"]
 def clean_json_response(text):
     if not text:
             st.code(frag)
             return None
+def get_extraction_prompt(txt):
     return (
+        "You are an expert invoice parser. Extract data according to the visible table structure and column headers in the invoice. "
+        "For every line item, only extract fields that correspond to the table columns for that row. "
         "Use this schema:\n"
+        '{ "invoice_header": {"supplier_name":"string", "po_number":"string", "currency":"string", "total_due":"string"}, "line_items": [{"description":"string", "quantity":"string", "price":"string", "amount":"string"}] }'
+        "\nIf a field is missing, use null. Return ONLY the JSON object, no explanation.\n"
         "\nInvoice Text:\n"
         f"{txt}"
     )
+def extract_invoice_info(text):
+    prompt = get_extraction_prompt(text)
+    raw = query_llm(prompt)
+    if not raw:
+        return None
+    data = clean_json_response(raw)
+    if not data:
+        return None
+    hdr = data.get("invoice_header", {})
+    items = data.get("line_items", [])
+    return {"invoice_header": hdr, "line_items": items}
 def clean_num(val):
     if val is None:
         return 100
     return fuzz.token_set_ratio(str(s1).lower(), str(s2).lower())
+def find_po_number_anywhere(inv_json, po_number):
+    if not po_number or not inv_json:
+        return False
+    po_str = str(po_number).replace(",", "").replace(".0", "")
+    flat = json.dumps(inv_json)
+    return po_str in flat.replace(",", "").replace(".0", "")
+def find_best_po_match(inv, po_df, weights):
     inv_hdr = inv["invoice_header"]
     inv_supplier = inv_hdr.get("supplier_name") or ""
     inv_currency = inv_hdr.get("currency") or ""
     inv_total_due = clean_num(inv_hdr.get("total_due"))
     inv_line_items = inv.get("line_items", [])
     scores = []
     for idx, row in po_df.iterrows():
         po_supplier = row.get("Supplier Name", "")
         po_po_number = str(row.get("PO Number", ""))
         po_currency = row.get("Currency", "")
         po_total = clean_num(row.get("PO Total Value", ""))
+        # --- SCORING FIELDS ---
         s_supplier = weighted_fuzzy_score(inv_supplier, po_supplier)
+        po_number_in_json = find_po_number_anywhere(inv, po_po_number)
+        s_po_number = 100 if po_number_in_json else 0
         s_currency = weighted_fuzzy_score(inv_currency, po_currency)
         s_total = 100 if inv_total_due is not None and po_total is not None and abs(inv_total_due - po_total) < 2 else 0
+        # --- LINE ITEM MATCH (basic) ---
         line_item_score = 0
         for line in inv_line_items:
+            desc_score = weighted_fuzzy_score(line.get("description", ""), row.get("Item Description", ""))
+            qty_score = 100 if clean_num(line.get("quantity")) == clean_num(row.get("Item Quantity", "")) else 0
+            unit_score = 100 if clean_num(line.get("price")) == clean_num(row.get("Item Unit Price", "")) else 0
+            amount_score = 100 if clean_num(line.get("amount")) == clean_num(row.get("Line Item Total", "")) else 0
             total = desc_score * 0.5 + qty_score * 0.2 + unit_score * 0.15 + amount_score * 0.15
             if total > line_item_score:
                 line_item_score = total
+        # -- WEIGHTED FINAL SCORE --
         total_score = (
+            s_supplier * (weights["Supplier"]/100) +
+            s_po_number * (weights["PO Number"]/100) +
+            s_currency * (weights["Currency"]/100) +
+            s_total * (weights["Total Due"]/100) +
+            line_item_score * (weights["Line Item"]/100)
+        )
         reason = (
+            f"Supplier match: {s_supplier}/100, "
+            f"PO Number: {s_po_number}/100, "
+            f"Currency: {s_currency}/100, "
+            f"Total Due: {'match' if s_total else 'no match'}, "
+            f"Line item best match: {int(line_item_score)}/100."
         )
         debug = {
             "po_idx": idx,
+            "scores": [
+                {"field":"Supplier","score":s_supplier},
+                {"field":"PO Number (anywhere in JSON)","score":s_po_number},
+                {"field":"Currency","score":s_currency},
+                {"field":"Total Due","score":s_total},
+                {"field":"Line Item","score":line_item_score}
+            ],
+            "total_score": total_score
         }
         scores.append((row, total_score, reason, debug))
     scores.sort(key=lambda tup: tup[1], reverse=True)
     if not scores:
         return None, 0, "No POs found.", {}
     best_row, best_score, reason, debug = scores[0]
     return best_row, best_score, reason, debug
 def extract_text_from_unstract(uploaded_file):
     filename = getattr(uploaded_file, "name", "uploaded_file")
     file_bytes = uploaded_file.read()
+    content_type = "application/octet-stream"
+    if filename.lower().endswith(".pdf"):
+        content_type = "text/plain"
     headers = {
+        "unstract-key": os.getenv("UNSTRACT_API_KEY"),
         "Content-Type": content_type,
     }
+    url = "https://llmwhisperer-api.us-central.unstract.com/api/v2/whisper"
     with st.spinner("Uploading and processing document with EZOFIS AI OCR AGENT..."):
         r = requests.post(url, headers=headers, data=file_bytes)
         if r.status_code != 202:
         if not whisper_hash:
             st.error("Unstract: No whisper_hash received.")
             return None
+    status_url = f"https://llmwhisperer-api.us-central.unstract.com/api/v2/whisper-status?whisper_hash={whisper_hash}"
     status_placeholder = st.empty()
     for i in range(30):
+        status_r = requests.get(status_url, headers={"unstract-key": os.getenv("UNSTRACT_API_KEY")})
         if status_r.status_code != 200:
             st.error(f"Unstract: Error checking status: {status_r.status_code} - {status_r.text}")
             return None
     else:
         status_placeholder.error("Unstract: Timeout waiting for OCR to finish.")
         return None
+    retrieve_url = f"https://llmwhisperer-api.us-central.unstract.com/api/v2/whisper-retrieve?whisper_hash={whisper_hash}&text_only=true"
+    r = requests.get(retrieve_url, headers={"unstract-key": os.getenv("UNSTRACT_API_KEY")})
     if r.status_code != 200:
         st.error(f"Unstract: Error retrieving extracted text: {r.status_code} - {r.text}")
         return None
     except Exception:
         return r.text
+# ========== END BUSINESS LOGIC ==========
+# ---- Extraction/Decision Main UI ----
 if st.button("Extract") and inv_file:
     with st.spinner("Extracting text from document using Unstract..."):
         text = extract_text_from_unstract(inv_file)
     if text:
+        extracted_info = extract_invoice_info(text)
         if extracted_info:
+            st.success("Extraction Complete!", icon="✅")
+            st.markdown('<div style="background:#fff;border-radius:14px;box-shadow:0 4px 32px rgba(34,48,90,0.09),0 1.5px 3.5px rgba(30,136,229,0.07);padding:2rem 2.5rem 1.5rem 2.5rem;margin-bottom:1.5em;">', unsafe_allow_html=True)
+            st.markdown("#### Invoice Metadata")
+            st.json(extracted_info["invoice_header"])
+            st.markdown("#### Line Items")
+            st.json(extracted_info["line_items"])
             st.session_state['last_extracted_info'] = extracted_info
+            st.markdown("</div>", unsafe_allow_html=True)
 extracted_info = st.session_state.get('last_extracted_info', None)
 po_df = st.session_state.get('last_po_df', None)
+scoring_weights = st.session_state.get("scoring_weights", {
+    "Supplier": 20,
+    "PO Number": 25,
+    "Currency": 10,
+    "Total Due": 25,
+    "Line Item": 20,
+})
+if extracted_info is not None and po_df is not None and sum(scoring_weights.values()) == 100:
+    st.markdown('<div style="background:#fff;border-radius:14px;box-shadow:0 4px 32px rgba(34,48,90,0.09),0 1.5px 3.5px rgba(30,136,229,0.07);padding:2rem 2.5rem 1.5rem 2.5rem;margin-bottom:1.5em;">', unsafe_allow_html=True)
+    st.markdown("<h3>EZOFIS AP AGENT Decision</h3>", unsafe_allow_html=True)
     if st.button("Make a decision (EZOFIS AP AGENT)"):
+        # Smart PO matching
+        best_row, best_score, reason, debug = find_best_po_match(extracted_info, po_df, scoring_weights)
+        if best_score > 85:
+            status = "APPROVED"
+        elif best_score > 70:
+            status = "PARTIALLY APPROVED"
+        else:
+            status = "REJECTED"
+        st.write(f"**Decision:** {status}")
+        st.write(f"**Reason:** Best match score: {int(best_score)}/100. {reason}")
+        with st.expander("Debug & Matching Details"):
+            st.json(debug)
+            st.subheader("Extracted Invoice JSON")
+            st.json(extracted_info)
+            st.subheader("Matched PO Row")
+            st.json(best_row.to_dict() if best_row is not None else None)
+    st.markdown("</div>", unsafe_allow_html=True)