AP_AGENT

Sleeping

File size: 26,323 Bytes

import streamlit as st
import requests
import json
import re
import os
import time
import mimetypes
import pandas as pd
from langchain_community.chat_models import ChatOpenAI
from langchain.agents import initialize_agent, Tool, AgentType
from fuzzywuzzy import fuzz

# --- Streamlit Page Settings ---
st.set_page_config(page_title="EZOFIS Accounts Payable Agent", layout="wide")

# --- Styles for SaaS Feel ---
st.markdown("""
    <style>
    .block-card {
        background: #fff;
        border-radius: 20px;
        box-shadow: 0 2px 16px rgba(25,39,64,0.05);
        padding: 32px 26px 24px 26px;
        margin-bottom: 24px;
    }
    .step-num {
        background: #A020F0;
        color: #fff;
        border-radius: 999px;
        padding: 6px 13px;
        font-weight: 700;
        margin-right: 14px;
        font-size: 20px;
        display: inline-block;
        vertical-align: middle;
    }
    .stButton>button {
        background: #A020F0 !important;
        color: white !important;
        border-radius: 12px !important;
        padding: 10px 32px !important;
        font-weight: 700;
        border: none !important;
        font-size: 18px !important;
        margin-top: 12px !important;
    }
    .stSlider>div>div>div>div {
        background: #F3F6FB !important;
        border-radius: 999px;
    }
    .css-12w0qpk {padding-top: 0rem;}
    .css-1kyxreq {padding-top: 0rem;}
    </style>
""", unsafe_allow_html=True)

MODELS = {
    "OpenAI GPT-4.1": {
        "api_url": "https://api.openai.com/v1/chat/completions",
        "model": "gpt-4-1106-preview",
        "key_env": "OPENAI_API_KEY",
        "response_format": None,
        "extra_headers": {},
    },
}

def get_api_key(model_choice):
    key = os.getenv(MODELS[model_choice]["key_env"])
    if not key:
        st.error(f"❌ {MODELS[model_choice]['key_env']} not set")
        st.stop()
    return key

def query_llm(model_choice, prompt):
    cfg = MODELS[model_choice]
    headers = {
        "Authorization": f"Bearer {get_api_key(model_choice)}",
        "Content-Type": "application/json",
    }
    if cfg.get("extra_headers"):
        headers.update(cfg["extra_headers"])
    payload = {
        "model": cfg["model"],
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.1,
        "max_tokens": 2000,
    }
    if cfg.get("response_format"):
        payload["response_format"] = cfg["response_format"]
    try:
        with st.spinner(f"🔍 Fine Tuning The Extracted Data..."):
            r = requests.post(cfg["api_url"], headers=headers, json=payload, timeout=90)
        if r.status_code != 200:
            st.error(f"🚨 API Error {r.status_code}: {r.text}")
            return None
        content = r.json()["choices"][0]["message"]["content"]
        st.session_state.last_api = content
        st.session_state.last_raw = r.text
        return content
    except Exception as e:
        st.error(f"Connection error: {e}")
        return None

def clean_json_response(text):
    if not text:
        return None
    orig = text
    text = re.sub(r'```(?:json)?', '', text).strip()
    start, end = text.find('{'), text.rfind('}') + 1
    if start < 0 or end < 1:
        st.error("Couldn't locate JSON in response.")
        st.code(orig)
        return None
    frag = text[start:end]
    frag = re.sub(r',\s*([}\]])', r'\1', frag)
    try:
        return json.loads(frag)
    except json.JSONDecodeError as e:
        repaired = re.sub(r'"\s*"\s*(?="[^"]+"\s*:)', '","', frag)
        try:
            return json.loads(repaired)
        except json.JSONDecodeError:
            st.error(f"JSON parse error: {e}")
            st.code(frag)
            return None

def fallback_supplier(text):
    for line in text.splitlines():
        line = line.strip()
        if line:
            return line
    return None

def get_extraction_prompt(model_choice, txt):
    return (
        "You are an expert invoice parser. "
        "Extract data according to the visible table structure and column headers in the invoice. "
        "For every line item, only extract fields that correspond to the table columns for that row (do not include header/shipment fields in line items). "
        "Merge all multi-line content within a single cell into that field (especially for the 'description' and 'notes'). "
        "Shipment/invoice-level fields such as CAR NUMBER, SHIPPING POINT, SHIPMENT NUMBER, CURRENCY, etc., must go ONLY into the 'invoice_header', not as line item fields.\n"
        "Use this schema:\n"
        '{\n'
        '  "invoice_header": {\n'
        '    "car_number": "string or null",\n'
        '    "shipment_number": "string or null",\n'
        '    "shipping_point": "string or null",\n'
        '    "currency": "string or null",\n'
        '    "invoice_number": "string or null",\n'
        '    "invoice_date": "string or null",\n'
        '    "order_number": "string or null",\n'
        '    "customer_order_number": "string or null",\n'
        '    "our_order_number": "string or null",\n'
        '    "sales_order_number": "string or null",\n'
        '    "purchase_order_number": "string or null",\n'
        '    "order_date": "string or null",\n'
        '    "supplier_name": "string or null",\n'
        '    "supplier_address": "string or null",\n'
        '    "supplier_phone": "string or null",\n'
        '    "supplier_email": "string or null",\n'
        '    "supplier_tax_id": "string or null",\n'
        '    "customer_name": "string or null",\n'
        '    "customer_address": "string or null",\n'
        '    "customer_phone": "string or null",\n'
        '    "customer_email": "string or null",\n'
        '    "customer_tax_id": "string or null",\n'
        '    "ship_to_name": "string or null",\n'
        '    "ship_to_address": "string or null",\n'
        '    "bill_to_name": "string or null",\n'
        '    "bill_to_address": "string or null",\n'
        '    "remit_to_name": "string or null",\n'
        '    "remit_to_address": "string or null",\n'
        '    "tax_id": "string or null",\n'
        '    "tax_registration_number": "string or null",\n'
        '    "vat_number": "string or null",\n'
        '    "payment_terms": "string or null",\n'
        '    "payment_method": "string or null",\n'
        '    "payment_reference": "string or null",\n'
        '    "bank_account_number": "string or null",\n'
        '    "iban": "string or null",\n'
        '    "swift_code": "string or null",\n'
        '    "total_before_tax": "string or null",\n'
        '    "tax_amount": "string or null",\n'
        '    "tax_rate": "string or null",\n'
        '    "shipping_charges": "string or null",\n'
        '    "discount": "string or null",\n'
        '    "total_due": "string or null",\n'
        '    "amount_paid": "string or null",\n'
        '    "balance_due": "string or null",\n'
        '    "due_date": "string or null",\n'
        '    "invoice_status": "string or null",\n'
        '    "reference_number": "string or null",\n'
        '    "project_code": "string or null",\n'
        '    "department": "string or null",\n'
        '    "contact_person": "string or null",\n'
        '    "notes": "string or null",\n'
        '    "additional_info": "string or null"\n'
        '  },\n'
        '  "line_items": [\n'
        '    {\n'
        '      "quantity": "string or null",\n'
        '      "units": "string or null",\n'
        '      "description": "string or null",\n'
        '      "footage": "string or null",\n'
        '      "price": "string or null",\n'
        '      "amount": "string or null",\n'
        '      "notes": "string or null"\n'
        '    }\n'
        '  ]\n'
        '}'
        "\nIf a field is missing for a line item or header, use null. "
        "Do not invent fields. Do not add any header or shipment data to any line item. Return ONLY the JSON object, no explanation.\n"
        "\nInvoice Text:\n"
        f"{txt}"
    )

def ensure_total_due(invoice_header):
    if invoice_header.get("total_due") in [None, ""]:
        for field in ["invoice_total", "invoice_value", "total_before_tax", "balance_due", "amount_paid"]:
            if field in invoice_header and invoice_header[field]:
                invoice_header["total_due"] = invoice_header[field]
                break
    return invoice_header

def clean_num(val):
    if val is None:
        return None
    if isinstance(val, (int, float)):
        return float(val)
    matches = re.findall(r"[-+]?\d[\d,]*\.?\d*", str(val))
    if matches:
        cleaned = [m.replace(',', '') for m in matches if m]
        as_floats = [float(c) for c in cleaned if c.replace('.', '', 1).isdigit()]
        if as_floats:
            return max(as_floats)
    return None

def weighted_fuzzy_score(s1, s2):
    if not s1 and not s2:
        return 100
    return fuzz.token_set_ratio(str(s1).lower(), str(s2).lower())

def find_po_number_in_json(po_number, invoice_json):
    def _flatten(obj):
        fields = []
        if isinstance(obj, dict):
            for v in obj.values():
                fields.extend(_flatten(v))
        elif isinstance(obj, list):
            for item in obj:
                fields.extend(_flatten(item))
        elif obj is not None:
            fields.append(str(obj))
        return fields

    po_str = str(po_number).strip().replace(" ", "").replace(".0", "")
    try:
        po_int = str(int(float(po_number)))
    except:
        po_int = po_str

    all_strs = [str(s).strip().replace(" ", "").replace(".0", "") for s in _flatten(invoice_json)]
    for s in all_strs:
        if not s:
            continue
        if po_str and (po_str in s or s in po_str):
            return True
        if po_int and (po_int in s or s in po_int):
            return True
    return False

def find_best_po_match(inv, po_df, weight_supplier, weight_po_number, weight_currency, weight_total_due, weight_line_item):
    inv_hdr = inv["invoice_header"]
    inv_supplier = inv_hdr.get("supplier_name") or ""
    inv_po_number = inv_hdr.get("purchase_order_number") or inv_hdr.get("po_number") or inv_hdr.get("order_number") or ""
    inv_currency = inv_hdr.get("currency") or ""
    inv_total_due = clean_num(inv_hdr.get("total_due"))
    inv_line_items = inv.get("line_items", [])

    scores = []
    for idx, row in po_df.iterrows():
        po_supplier = row.get("Supplier Name", "")
        po_po_number = str(row.get("PO Number", ""))
        po_currency = row.get("Currency", "")
        po_total = clean_num(row.get("PO Total Value", ""))
        po_desc = row.get("Item Description", "")
        po_qty = str(row.get("Item Quantity", ""))
        po_unit = str(row.get("Item Unit Price", ""))
        po_line_total = clean_num(row.get("Line Item Total", ""))

        field_details = []

        s_supplier = weighted_fuzzy_score(inv_supplier, po_supplier)
        field_details.append({
            "field": "Supplier Name",
            "invoice": inv_supplier,
            "po": po_supplier,
            "score": s_supplier
        })

        s_po_number = 100 if find_po_number_in_json(po_po_number, inv) else 0
        field_details.append({
            "field": "PO Number (anywhere in JSON)",
            "invoice": "found" if s_po_number else "not found",
            "po": po_po_number,
            "score": s_po_number
        })

        s_currency = weighted_fuzzy_score(inv_currency, po_currency)
        field_details.append({
            "field": "Currency",
            "invoice": inv_currency,
            "po": po_currency,
            "score": s_currency
        })

        s_total = 100 if inv_total_due is not None and po_total is not None and abs(inv_total_due - po_total) < 2 else 0
        field_details.append({
            "field": "Total Due",
            "invoice": inv_total_due,
            "po": po_total,
            "score": s_total
        })

        # Line item logic as before
        line_item_score = 0
        line_reason = ""
        best_line_detail = None
        for line in inv_line_items:
            desc_score = weighted_fuzzy_score(line.get("description", ""), po_desc)
            qty_score = 100 if clean_num(line.get("quantity")) == clean_num(po_qty) else 0
            unit_score = 100 if clean_num(line.get("price")) == clean_num(po_unit) else 0
            amount_score = 100 if clean_num(line.get("amount")) == po_line_total else 0
            total = desc_score * 0.5 + qty_score * 0.2 + unit_score * 0.15 + amount_score * 0.15
            detail = {
                "field": "Line Item",
                "invoice": {
                    "description": line.get("description", ""),
                    "quantity": line.get("quantity", ""),
                    "price": line.get("price", ""),
                    "amount": line.get("amount", ""),
                },
                "po": {
                    "description": po_desc,
                    "quantity": po_qty,
                    "price": po_unit,
                    "amount": po_line_total,
                },
                "desc_score": desc_score,
                "qty_score": qty_score,
                "unit_score": unit_score,
                "amount_score": amount_score,
                "line_item_score": total
            }
            if total > line_item_score:
                line_item_score = total
                best_line_detail = detail
                line_reason = (
                    f"Best line item: desc_score={desc_score}, qty_score={qty_score}, "
                    f"unit_score={unit_score}, amount_score={amount_score}"
                )

        wsum = weight_supplier + weight_po_number + weight_currency + weight_total_due + weight_line_item
        total_score = (
            s_supplier * weight_supplier/100 +
            s_po_number * weight_po_number/100 +
            s_currency * weight_currency/100 +
            s_total * weight_total_due/100 +
            line_item_score * weight_line_item/100
        ) if wsum == 100 else 0

        reason = (
            f"Supplier match: {s_supplier}/100 (invoice: '{inv_supplier}' vs PO: '{po_supplier}'), "
            f"PO Number: {s_po_number}/100 ({'found anywhere in JSON' if s_po_number else 'not found'}), "
            f"Currency: {s_currency}/100 (invoice: '{inv_currency}' vs PO: '{po_currency}'), "
            f"Total Due: {'match' if s_total else 'no match'} (invoice: {inv_total_due} vs PO: {po_total}), "
            f"Line item best match: {int(line_item_score)}/100. {line_reason}"
        )

        debug = {
            "po_idx": idx,
            "po_supplier": po_supplier,
            "po_po_number": po_po_number,
            "po_total": po_total,
            "scores": field_details,
            "line_item_score": line_item_score,
            "best_line_detail": best_line_detail,
            "total_score": total_score,
            "line_reason": line_reason,
            "inv_total_due": inv_total_due
        }
        scores.append((row, total_score, reason, debug))

    scores.sort(key=lambda tup: tup[1], reverse=True)
    if not scores:
        return None, 0, "No POs found.", {}
    best_row, best_score, reason, debug = scores[0]
    return best_row, best_score, reason, debug

def extract_invoice_info(model_choice, text):
    prompt = get_extraction_prompt(model_choice, text)
    raw = query_llm(model_choice, prompt)
    if not raw:
        return None
    data = clean_json_response(raw)
    if not data:
        return None
    hdr = data.get("invoice_header", {})
    if not hdr and any(k in data for k in ("invoice_number","supplier_name","customer_name")):
        hdr = data
    for k in ("invoice_number","invoice_date","po_number","invoice_value","supplier_name","customer_name"):
        hdr.setdefault(k, None)
    if not hdr.get("supplier_name"):
        hdr["supplier_name"] = fallback_supplier(text)
    hdr = ensure_total_due(hdr)
    items = data.get("line_items", [])
    if not isinstance(items, list):
        items = []
    for itm in items:
        if not isinstance(itm, dict):
            continue
        for k in ("item_number","description","quantity","unit_price","total_price"):
            itm.setdefault(k, None)
    return {"invoice_header": hdr, "line_items": items}

def get_content_type(filename):
    mime, _ = mimetypes.guess_type(filename)
    ext = filename.lower().split('.')[-1]
    if ext == "pdf":
        return "text/plain"
    if mime is None:
        return "application/octet-stream"
    return mime

UNSTRACT_BASE = "https://llmwhisperer-api.us-central.unstract.com/api/v2"
UNSTRACT_API_KEY = os.getenv("UNSTRACT_API_KEY")

def extract_text_from_unstract(uploaded_file):
    filename = getattr(uploaded_file, "name", "uploaded_file")
    file_bytes = uploaded_file.read()
    content_type = get_content_type(filename)
    headers = {
        "unstract-key": UNSTRACT_API_KEY,
        "Content-Type": content_type,
    }
    url = f"{UNSTRACT_BASE}/whisper"
    with st.spinner("Uploading and processing document with EZOFIS AI OCR AGENT..."):
        r = requests.post(url, headers=headers, data=file_bytes)
        if r.status_code != 202:
            st.error(f"Unstract: Error uploading file: {r.status_code} - {r.text}")
            return None
        whisper_hash = r.json().get("whisper_hash")
        if not whisper_hash:
            st.error("Unstract: No whisper_hash received.")
            return None

    status_url = f"{UNSTRACT_BASE}/whisper-status?whisper_hash={whisper_hash}"
    status_placeholder = st.empty()
    for i in range(30):
        status_r = requests.get(status_url, headers={"unstract-key": UNSTRACT_API_KEY})
        if status_r.status_code != 200:
            st.error(f"Unstract: Error checking status: {status_r.status_code} - {status_r.text}")
            return None
        status = status_r.json().get("status")
        if status == "processed":
            status_placeholder.info("EZOFIS AI OCR AGENT STATUS: processed! 🎉")
            break
        status_placeholder.info(f"EZOFIS AI OCR AGENT STATUS: {status or 'waiting'}... ({i+1})")
        time.sleep(2)
    else:
        status_placeholder.error("Unstract: Timeout waiting for OCR to finish.")
        return None

    retrieve_url = f"{UNSTRACT_BASE}/whisper-retrieve?whisper_hash={whisper_hash}&text_only=true"
    r = requests.get(retrieve_url, headers={"unstract-key": UNSTRACT_API_KEY})
    if r.status_code != 200:
        st.error(f"Unstract: Error retrieving extracted text: {r.status_code} - {r.text}")
        return None
    try:
        data = r.json()
        return data.get("result_text") or r.text
    except Exception:
        return r.text

# ---------------- UI LAYOUT ----------------------
st.markdown(
    "<h1 style='font-weight:800; margin-bottom:8px;'>EZOFIS Accounts Payable Agent</h1>",
    unsafe_allow_html=True
)
st.markdown(
    "<div style='font-size:20px; margin-bottom:28px; color:#24345C;'>Modern workflow automation for finance teams</div>",
    unsafe_allow_html=True
)

# ---- Three columns layout for horizontal flow
col1, col2, col3 = st.columns([2,2,3])

# ---- Step 1: Upload POs (col1) ----
with col1:
    st.markdown("<span class='step-num'>1</span> <b>Upload Active Purchase Orders (POs)</b>", unsafe_allow_html=True)
    po_file = st.file_uploader(
        "CSV with PO number, Supplier, Items, etc.",
        type=["csv"],
        key="po_csv",
        label_visibility="collapsed"
    )
    po_df = None
    if po_file:
        po_df = pd.read_csv(po_file)
        st.success(f"Loaded {len(po_df)} records from uploaded CSV.")
        st.session_state['last_po_df'] = po_df

# ---- Step 2: Scoring Weights (col1) ----
with col1:
    st.markdown("<span class='step-num'>2</span> <b>Configure Scoring Weights</b>", unsafe_allow_html=True)
    st.markdown("Set weights for matching. Total must equal 100%.", unsafe_allow_html=True)
    def int_slider(label, value, key):
        return st.slider(label, 0, 100, value, 1, key=key, format="%d")
    weight_supplier = int_slider("Supplier Name (%)", 25, "w_supplier")
    weight_po_number = int_slider("PO Number (%)", 25, "w_po")
    weight_currency = int_slider("Currency (%)", 10, "w_curr")
    weight_total_due = int_slider("Total Due (%)", 20, "w_due")
    weight_line_item = int_slider("Line Item (%)", 20, "w_line")
    weight_sum = weight_supplier + weight_po_number + weight_currency + weight_total_due + weight_line_item
    if weight_sum != 100:
        st.warning(f"Sum of weights is {weight_sum}%. Adjust so it equals 100%.")

    st.markdown("<span class='step-num'>3</span> <b>Set Decision Thresholds</b>", unsafe_allow_html=True)
    approved_threshold = st.slider("Threshold for 'APPROVED'", min_value=0, max_value=100, value=85, format="%d")
    partial_threshold = st.slider("Threshold for 'PARTIALLY APPROVED'", min_value=0, max_value=approved_threshold-1, value=70, format="%d")

# ---- Step 4: Upload Invoice (col2) ----
with col2:
    st.markdown("<span class='step-num'>4</span> <b>Upload Invoice/Document</b>", unsafe_allow_html=True)
    inv_file = st.file_uploader(
        "Upload PDF, DOCX, XLSX, PNG, JPG, TIFF",
        type=["pdf", "docx", "xlsx", "xls", "png", "jpg", "jpeg", "tiff"],
        key="invoice_file",
        label_visibility="collapsed"
    )

# ---- Step 5: Extract Data (col2) ----
with col2:
    st.markdown("<span class='step-num'>5</span> <b>Extract Data</b>", unsafe_allow_html=True)
    if st.button("Extract"):
        if inv_file:
            with st.spinner("Extracting text from document..."):
                text = extract_text_from_unstract(inv_file)
            if text:
                mdl = "OpenAI GPT-4.1"
                extracted_info = extract_invoice_info(mdl, text)
                if extracted_info:
                    if "invoice_header" in extracted_info:
                        extracted_info["invoice_header"] = ensure_total_due(extracted_info["invoice_header"])
                    st.success("Extraction Complete")
                    st.session_state['last_extracted_info'] = extracted_info
        else:
            st.warning("Please upload an invoice/document first.")

# ---- Step 6: AP Agent Decision (col3) ----
with col3:
    st.markdown("<span class='step-num'>6</span> <b>AP Agent Decision</b>", unsafe_allow_html=True)
    if st.button("Make a decision (EZOFIS AP AGENT)"):
        extracted_info = st.session_state.get('last_extracted_info', None)
        po_df = st.session_state.get('last_po_df', None)
        if extracted_info is not None and po_df is not None:
            def po_match_tool_func(input_text):
                invoice = st.session_state.get("last_extracted_info")
                po_df = st.session_state.get("last_po_df")
                if invoice is None or po_df is None:
                    return json.dumps({
                        "decision": "REJECTED",
                        "reason": "Invoice or PO data not found.",
                        "debug": {},
                    })
                best_row, best_score, reason, debug = find_best_po_match(
                    invoice, po_df, weight_supplier, weight_po_number, weight_currency, weight_total_due, weight_line_item
                )
                if best_score > approved_threshold:
                    status = "APPROVED"
                elif best_score > partial_threshold:
                    status = "PARTIALLY APPROVED"
                else:
                    status = "REJECTED"
                return json.dumps({
                    "decision": status,
                    "reason": f"Best match score: {int(best_score)}/100. {reason}",
                    "debug": debug,
                    "po_row": best_row.to_dict() if best_row is not None else None
                })
            tools = [
                Tool(
                    name="po_match_tool",
                    func=po_match_tool_func,
                    description="Smartly match invoice to PO using all possible fields.",
                )
            ]
            decision_llm = ChatOpenAI(
                openai_api_key=get_api_key("OpenAI GPT-4.1"),
                model=MODELS["OpenAI GPT-4.1"]["model"],
                temperature=0,
                streaming=False,
            )
            agent = initialize_agent(
                tools,
                decision_llm,
                agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
                verbose=True,
            )
            prompt = (
                "You are an expert accounts payable agent. "
                "Use po_match_tool to check for the best possible match using supplier, PO number (which may appear anywhere in the invoice JSON, even within other fields), currency, line items, and total value. "
                "Weigh the importance of each field as an expert would, according to the user-configured weights. "
                "Return a JSON with decision (APPROVED, PARTIALLY APPROVED, REJECTED), reason (include field scores and reasoning), debug, and the best matched PO row.\n"
                f"Invoice JSON:\n{json.dumps(extracted_info, indent=2)}"
            )
            with st.spinner("AI is reasoning and making a decision..."):
                result = agent.run(prompt)
            # Always display debug/info
            st.markdown("<h3 style='margin-top:18px;'>AI Decision & Reason</h3>", unsafe_allow_html=True)
            try:
                result_json = json.loads(result)
                st.write(f"**Decision:** {result_json.get('decision', 'N/A')}")
                st.write(f"**Reason:** {result_json.get('reason', 'N/A')}")
                st.markdown("##### Debug & Matching Details")
                st.json(result_json.get('debug'))
                st.markdown("##### Extracted Invoice JSON")
                st.json(extracted_info)
                st.markdown("##### Matched PO Row")
                st.json(result_json.get('po_row'))
            except Exception:
                st.subheader("AI Decision & Reason")
                st.write(result)

# Always show extraction/decision debug in full for troubleshooting
if "last_api" in st.session_state:
    with st.expander("Debug"):
        st.code(st.session_state.last_api)
        st.code(st.session_state.last_raw)