import streamlit as st
import requests
import json
import re
import os
import time
import mimetypes
import pandas as pd
from fuzzywuzzy import fuzz
from sentence_transformers import SentenceTransformer
import numpy as np
from langchain_community.chat_models import ChatOpenAI
from langchain.agents import initialize_agent, Tool, AgentType
from io import StringIO
st.set_page_config(page_title="EZOFIS Accounts Payable Agent", layout="wide")
# --- Initialize embedding model only once per session ---
if 'embedding_model' not in st.session_state:
with st.spinner("Loading embedding model for PO header normalization (1x cost)..."):
st.session_state['embedding_model'] = SentenceTransformer('all-MiniLM-L6-v2')
CANON_HEADERS = [
"PO Number", "Supplier Name", "Ship To", "Bill To", "PO Date",
"Line Item Number", "Item Description", "Item Quantity",
"Item Unit Price", "Line Item Total", "PO Total Value",
"Currency", "Payment Terms", "Expected Delivery"
]
st.session_state['canon_header_vectors'] = {
h: st.session_state['embedding_model'].encode(h)
for h in CANON_HEADERS
}
st.markdown("""
""", unsafe_allow_html=True)
CANON_HEADERS = [
"PO Number", "Supplier Name", "Ship To", "Bill To", "PO Date",
"Line Item Number", "Item Description", "Item Quantity", "Item Unit Price",
"Line Item Total", "PO Total Value", "Currency", "Payment Terms", "Expected Delivery"
]
def semantic_header_normalization(df, canon_headers, canon_vectors, model):
orig_headers = df.columns.tolist()
orig_vecs = {h: model.encode(h) for h in orig_headers}
mapping = {}
for oh in orig_headers:
vec = orig_vecs[oh]
sims = {ch: np.dot(vec, canon_vectors[ch])/(np.linalg.norm(vec)*np.linalg.norm(canon_vectors[ch])) for ch in canon_headers}
best = max(sims, key=sims.get)
if sims[best] > 0.80:
mapping[oh] = best
else:
mapping[oh] = oh # Keep as is if no good match
df = df.rename(columns=mapping)
return df
MODELS = {
"OpenAI GPT-4.1": {
"model": "gpt-4-1106-preview",
"api_env": "OPENAI_API_KEY",
"openai_api_base": None,
},
"Mistral (OpenRouter)": {
"model": "mistralai/ministral-8b",
"api_env": "OPENROUTER_API_KEY",
"openai_api_base": "https://openrouter.ai/api/v1",
}
}
def get_llm(model_choice):
config = MODELS[model_choice]
api_key = os.getenv(config["api_env"])
if not api_key:
st.error(f"API key not set: {config['api_env']}")
st.stop()
return ChatOpenAI(
model=config["model"],
openai_api_key=api_key,
openai_api_base=config["openai_api_base"],
temperature=0.1,
max_tokens=2000
)
def get_extraction_prompt(txt):
return (
"You are an expert invoice parser. "
"Extract data according to the visible table structure and column headers in the invoice. "
"For every line item, only extract fields that correspond to the table columns for that row (do not include header/shipment fields in line items). "
"Merge all multi-line content within a single cell into that field (especially for the 'description' and 'notes'). "
"If you are not able to deduct the currency then use your knowldedge and predict the 'currency' field using the 'supplier_address' data. "
"Shipment/invoice-level fields such as CAR NUMBER, SHIPPING POINT, SHIPMENT NUMBER, CURRENCY, etc., must go ONLY into the 'invoice_header', not as line item fields.\n"
"Use this schema:\n"
'{\n'
' "invoice_header": {\n'
' "car_number": "string or null",\n'
' "shipment_number": "string or null",\n'
' "shipping_point": "string or null",\n'
' "currency": "string or null",\n'
' "invoice_number": "string or null",\n'
' "invoice_date": "string or null",\n'
' "order_number": "string or null",\n'
' "customer_order_number": "string or null",\n'
' "our_order_number": "string or null",\n'
' "sales_order_number": "string or null",\n'
' "purchase_order_number": "string or null",\n'
' "order_date": "string or null",\n'
' "supplier_name": "string or null",\n'
' "supplier_address": "string or null",\n'
' "supplier_phone": "string or null",\n'
' "supplier_email": "string or null",\n'
' "supplier_tax_id": "string or null",\n'
' "customer_name": "string or null",\n'
' "customer_address": "string or null",\n'
' "customer_phone": "string or null",\n'
' "customer_email": "string or null",\n'
' "customer_tax_id": "string or null",\n'
' "ship_to_name": "string or null",\n'
' "ship_to_address": "string or null",\n'
' "bill_to_name": "string or null",\n'
' "bill_to_address": "string or null",\n'
' "remit_to_name": "string or null",\n'
' "remit_to_address": "string or null",\n'
' "tax_id": "string or null",\n'
' "tax_registration_number": "string or null",\n'
' "vat_number": "string or null",\n'
' "payment_terms": "string or null",\n'
' "payment_method": "string or null",\n'
' "payment_reference": "string or null",\n'
' "bank_account_number": "string or null",\n'
' "iban": "string or null",\n'
' "swift_code": "string or null",\n'
' "total_before_tax": "string or null",\n'
' "tax_amount": "string or null",\n'
' "tax_rate": "string or null",\n'
' "shipping_charges": "string or null",\n'
' "discount": "string or null",\n'
' "total_due": "string or null",\n'
' "amount_paid": "string or null",\n'
' "balance_due": "string or null",\n'
' "due_date": "string or null",\n'
' "invoice_status": "string or null",\n'
' "reference_number": "string or null",\n'
' "project_code": "string or null",\n'
' "department": "string or null",\n'
' "contact_person": "string or null",\n'
' "notes": "string or null",\n'
' "additional_info": "string or null"\n'
' },\n'
' "line_items": [\n'
' {\n'
' "quantity": "string or null",\n'
' "units": "string or null",\n'
' "description": "string or null",\n'
' "footage": "string or null",\n'
' "price": "string or null",\n'
' "amount": "string or null",\n'
' "notes": "string or null"\n'
' }\n'
' ]\n'
'}'
"\nIf a field is missing for a line item or header, use null. "
"Do not invent fields. Do not add any header or shipment data to any line item. Return ONLY the JSON object, no explanation.\n"
"\nInvoice Text:\n"
f"{txt}"
)
def clean_json_response(text):
if not text: return None
orig = text
text = re.sub(r'```(?:json)?', '', text).strip()
start, end = text.find('{'), text.rfind('}') + 1
if start < 0 or end < 1:
st.error("Couldn't locate JSON in response."); st.code(orig); return None
frag = text[start:end]
frag = re.sub(r',\s*([}\]])', r'\1', frag)
try:
return json.loads(frag)
except json.JSONDecodeError as e:
repaired = re.sub(r'"\s*"\s*(?="[^"]+"\s*:)', '","', frag)
try:
return json.loads(repaired)
except json.JSONDecodeError:
st.error(f"JSON parse error: {e}"); st.code(frag); return None
def ensure_total_due(invoice_header):
if invoice_header.get("total_due") in [None, ""]:
for field in ["invoice_total", "invoice_value", "total_before_tax", "balance_due", "amount_paid"]:
if field in invoice_header and invoice_header[field]:
invoice_header["total_due"] = invoice_header[field]
break
return invoice_header
def clean_num(val):
if val is None: return None
if isinstance(val, (int, float)): return float(val)
matches = re.findall(r"[-+]?\d[\d,]*\.?\d*", str(val))
if matches:
cleaned = [m.replace(',', '') for m in matches if m]
as_floats = [float(c) for c in cleaned if c.replace('.', '', 1).isdigit()]
if as_floats: return max(as_floats)
return None
def weighted_fuzzy_score(s1, s2):
norm1 = str(s1).strip().lower()
norm2 = str(s2).strip().lower()
norm1 = " ".join(norm1.split())
norm2 = " ".join(norm2.split())
if not norm1 and not norm2:
return 100
if norm1 == norm2:
return 100
return fuzz.token_set_ratio(norm1, norm2)
def find_po_number_in_json(po_number, invoice_json):
def _flatten(obj):
fields = []
if isinstance(obj, dict):
for v in obj.values():
fields.extend(_flatten(v))
elif isinstance(obj, list):
for item in obj:
fields.extend(_flatten(item))
elif obj is not None:
fields.append(str(obj))
return fields
po_str = str(po_number).strip().replace(" ", "").replace(".0", "")
try:
po_int = str(int(float(po_number)))
except:
po_int = po_str
all_strs = [str(s).strip().replace(" ", "").replace(".0", "") for s in _flatten(invoice_json)]
for s in all_strs:
if not s: continue
if po_str and (po_str in s or s in po_str): return True
if po_int and (po_int in s or s in po_int): return True
return False
def drop_embedding_keys(d):
if hasattr(d, "to_dict"):
d = d.to_dict()
return {k: v for k, v in d.items() if "embedding" not in k}
# --- UI Layout ---
st.markdown("
EZOFIS Accounts Payable Agent
", unsafe_allow_html=True)
st.markdown("Modern workflow automation for finance teams
", unsafe_allow_html=True)
col1, col2, col3 = st.columns([2,2,3])
# --- col1: PO CSV, Model & Weights, Threshold Sliders ---
with col1:
st.markdown("1 Upload Active Purchase Orders (POs)", unsafe_allow_html=True)
po_file = st.file_uploader("CSV with PO number, Supplier, Items, etc.", type=["csv"], key="po_csv", label_visibility="collapsed")
if 'po_file_bytes' not in st.session_state:
st.session_state['po_file_bytes'] = None
if 'last_po_df' not in st.session_state:
st.session_state['last_po_df'] = None
if po_file is not None:
file_bytes = po_file.getvalue()
if st.session_state['po_file_bytes'] != file_bytes:
st.session_state['po_file_bytes'] = file_bytes
df = pd.read_csv(StringIO(po_file.getvalue().decode("utf-8")))
# Semantic normalization (embeddings)
df = semantic_header_normalization(
df, CANON_HEADERS,
st.session_state['canon_header_vectors'],
st.session_state['embedding_model']
)
# Precompute and store embeddings for PO line item descriptions
for c in ["Line Item Description", "Item Description", "Description"]:
if c in df.columns:
po_line_descs = df[c].astype(str).fillna("").tolist()
po_line_desc_embeds = st.session_state['embedding_model'].encode(po_line_descs)
df["_desc_embedding"] = list(po_line_desc_embeds)
break
st.session_state['last_po_df'] = df
st.success(f"Loaded {len(df)} records from uploaded CSV.")
else:
df = st.session_state['last_po_df']
if df is not None:
st.success(f"Loaded {len(df)} records from uploaded CSV.")
else:
df = None
st.markdown("2 Select Model & Scoring Weights", unsafe_allow_html=True)
mdl = st.selectbox("LLM Model", list(MODELS.keys()), key="extract_model", index=0)
def int_slider(label, value, key):
return st.slider(label, 0, 100, value, 1, key=key, format="%d")
weight_supplier = int_slider("Supplier Name (%)", 35, "w_supplier")
weight_po_number = int_slider("PO Number (%)", 35, "w_po")
weight_currency = int_slider("Currency (%)", 10, "w_curr")
weight_total_due = int_slider("Total Due (%)", 10, "w_due")
weight_line_item = int_slider("Line Item (%)", 10, "w_line")
weight_sum = weight_supplier + weight_po_number + weight_currency + weight_total_due + weight_line_item
if weight_sum != 100:
st.warning(f"Sum of weights is {weight_sum}%. Adjust so it equals 100%.")
st.markdown("3 Set Decision Thresholds", unsafe_allow_html=True)
approved_threshold = st.slider(
"Threshold for 'APPROVED'",
min_value=0,
max_value=100,
value=90,
format="%d"
)
partial_threshold = st.slider(
"Threshold for 'PARTIALLY APPROVED'",
min_value=0,
max_value=approved_threshold - 1,
value=60,
format="%d"
)
# --- col2: Invoice Upload & Extract Button ---
with col2:
st.markdown("4 Upload Invoice/Document", unsafe_allow_html=True)
inv_file = st.file_uploader("Upload PDF, DOCX, XLSX, PNG, JPG, TIFF", type=["pdf", "docx", "xlsx", "xls", "png", "jpg", "jpeg", "tiff"], key="invoice_file", label_visibility="collapsed")
st.markdown("5 Extract Data", unsafe_allow_html=True)
if st.button("Extract"):
if inv_file:
with st.spinner("Extracting text from document..."):
filename = getattr(inv_file, "name", "uploaded_file")
file_bytes = inv_file.read()
content_type = mimetypes.guess_type(filename)[0] or "application/octet-stream"
headers = {
"unstract-key": os.getenv("UNSTRACT_API_KEY"),
"Content-Type": content_type,
}
url = "https://llmwhisperer-api.us-central.unstract.com/api/v2/whisper"
r = requests.post(url, headers=headers, data=file_bytes)
if r.status_code != 202:
st.error(f"Unstract: Error uploading file: {r.status_code} - {r.text}")
else:
whisper_hash = r.json().get("whisper_hash")
if whisper_hash:
status_url = f"https://llmwhisperer-api.us-central.unstract.com/api/v2/whisper-status?whisper_hash={whisper_hash}"
status_placeholder = st.empty()
for i in range(30):
status_r = requests.get(status_url, headers={"unstract-key": os.getenv("UNSTRACT_API_KEY")})
if status_r.status_code != 200:
st.error(f"Unstract: Error checking status: {status_r.status_code} - {status_r.text}")
break
status = status_r.json().get("status")
if status == "processed":
status_placeholder.info("EZOFIS AI OCR AGENT STATUS: processed! 🎉")
break
status_placeholder.info(f"EZOFIS AI OCR AGENT STATUS: {status or 'waiting'}... ({i+1})")
time.sleep(2)
else:
status_placeholder.error("Unstract: Timeout waiting for OCR to finish.")
retrieve_url = f"https://llmwhisperer-api.us-central.unstract.com/api/v2/whisper-retrieve?whisper_hash={whisper_hash}&text_only=true"
r = requests.get(retrieve_url, headers={"unstract-key": os.getenv("UNSTRACT_API_KEY")})
if r.status_code != 200:
st.error(f"Unstract: Error retrieving extracted text: {r.status_code} - {r.text}")
else:
try:
data = r.json()
extracted_text = data.get("result_text") or r.text
except Exception:
extracted_text = r.text
with st.spinner("Fine Tuning The Extracted Output..."):
llm = get_llm(mdl)
response = llm.invoke([{"role": "user", "content": get_extraction_prompt(extracted_text)}])
result = response.content if hasattr(response, "content") else response
extracted_info = clean_json_response(result)
if extracted_info:
st.success("Extraction Complete")
if "invoice_header" in extracted_info:
extracted_info["invoice_header"] = ensure_total_due(extracted_info["invoice_header"])
st.session_state['last_extracted_info'] = extracted_info
else:
st.warning("Please upload an invoice/document first.")
# --- col3: Decision, Side-by-Side, and Line Item Hybrid Matching with Navigation ---
with col3:
st.markdown("6 AP Agent Decision (LLM Powered)", unsafe_allow_html=True)
decision_made = st.button("Make a decision (EZOFIS AP AGENT)")
if decision_made:
extracted_info = st.session_state.get('last_extracted_info', None)
po_df = st.session_state.get('last_po_df', None)
if extracted_info is not None and po_df is not None:
def po_match_tool_func(text):
inv = extracted_info
scores = []
inv_hdr = inv["invoice_header"]
inv_supplier = inv_hdr.get("supplier_name") or ""
inv_po_number = inv_hdr.get("purchase_order_number") or inv_hdr.get("po_number") or inv_hdr.get("order_number") or ""
inv_currency = inv_hdr.get("currency") or ""
inv_total_due = clean_num(inv_hdr.get("total_due"))
inv_line_items = inv.get("line_items", [])
embedding_model = st.session_state['embedding_model']
matched_po_indices = set() # <--- NEW: to track matched PO indices
# --- Hybrid Line Item Matching: Best-Match for Each Invoice Line ---
for idx, row in po_df.iterrows():
po_supplier = row.get("Supplier Name", "")
po_po_number = str(row.get("PO Number", ""))
po_currency = row.get("Currency", "")
po_total = clean_num(row.get("PO Total Value", "")) or clean_num(row.get("PO Total", ""))
po_desc = row.get("Line Item Description", "") or row.get("Item Description", "") or row.get("Description", "")
po_qty = str(row.get("Qty", "")) or str(row.get("Item Quantity", "")) or str(row.get("Quantity", ""))
po_unit = str(row.get("Rate", "")) or str(row.get("Item Unit Price", "")) or str(row.get("Unit Price", "")) or str(row.get("Price", ""))
po_line_total = clean_num(row.get("Amount", "")) or clean_num(row.get("Line Item Total", "")) or clean_num(row.get("Line Amount", ""))
po_desc_emb = embedding_model.encode(po_desc) if po_desc else None
field_details = []
s_supplier = weighted_fuzzy_score(inv_supplier, po_supplier)
field_details.append({
"field": "Supplier Name", "invoice": inv_supplier, "po": po_supplier, "score": s_supplier
})
s_po_number = 100 if find_po_number_in_json(po_po_number, inv) else 0
field_details.append({
"field": "PO Number", "invoice": po_po_number, "po": po_po_number, "score": s_po_number
})
s_currency = weighted_fuzzy_score(inv_currency, po_currency)
field_details.append({
"field": "Currency", "invoice": inv_currency, "po": po_currency, "score": s_currency
})
s_total = 100 if inv_total_due is not None and po_total is not None and abs(inv_total_due - po_total) < 2 else 0
field_details.append({
"field": "Total Due", "invoice": inv_total_due, "po": po_total, "score": s_total
})
all_line_matches = []
po_lines = po_df[po_df["PO Number"] == po_po_number]
# Hybrid: For each invoice line item, best-match to a PO line
for inv_line in inv_line_items:
best_score = 0
best_po_line = None
best_po_idx = None
desc = inv_line.get("description", "")
qty = inv_line.get("quantity", "")
price = inv_line.get("price", "")
amount = inv_line.get("amount", "")
inv_desc_emb = embedding_model.encode(desc) if desc else None
# Try all matching PO lines
for po_idx, po_line in po_lines.iterrows():
po_ldesc = po_line.get("Line Item Description", "") or po_line.get("Item Description", "") or po_line.get("Description", "")
po_lqty = str(po_line.get("Qty", "")) or str(po_line.get("Item Quantity", "")) or str(po_line.get("Quantity", ""))
po_lunit = str(po_line.get("Rate", "")) or str(po_line.get("Item Unit Price", "")) or str(po_line.get("Unit Price", "")) or str(po_line.get("Price", ""))
po_lamt = clean_num(po_line.get("Amount", "")) or clean_num(po_line.get("Line Item Total", "")) or clean_num(po_line.get("Line Amount", ""))
po_ldesc_emb = po_line.get("_desc_embedding", None)
desc_score_sem = 0
if po_ldesc_emb is not None and inv_desc_emb is not None:
desc_score_sem = float(np.dot(po_ldesc_emb, inv_desc_emb) / (np.linalg.norm(po_ldesc_emb)*np.linalg.norm(inv_desc_emb)))
desc_score_sem = int(100*desc_score_sem)
desc_score_fuz = weighted_fuzzy_score(desc, po_ldesc)
desc_score = int((desc_score_sem + desc_score_fuz)/2)
qty_score = 100 if clean_num(qty) == clean_num(po_lqty) else 0
unit_score = 100 if clean_num(price) == clean_num(po_lunit) else 0
amount_score = 100 if clean_num(amount) == po_lamt else 0
total = desc_score * 0.5 + qty_score * 0.2 + unit_score * 0.15 + amount_score * 0.15
if total > best_score:
best_score = total
best_po_line = {
"description": po_ldesc, "quantity": po_lqty,
"price": po_lunit, "amount": po_lamt
}
best_po_idx = po_idx
best_detail = {
"field": "Line Item",
"invoice": {"description": desc, "quantity": qty, "price": price, "amount": amount},
"po": best_po_line,
"desc_score": desc_score, "qty_score": qty_score,
"unit_score": unit_score, "amount_score": amount_score,
"line_item_score": total,
"po_df_index": po_idx # <--- track matched index
}
if best_po_line:
all_line_matches.append(best_detail)
matched_po_indices.add(best_po_idx) # <--- add matched index
# Choose best line (highest total) as the match for this PO
line_item_score = max([m["line_item_score"] for m in all_line_matches], default=0)
best_line_detail = max(all_line_matches, key=lambda m: m["line_item_score"], default=None)
wsum = weight_supplier + weight_po_number + weight_currency + weight_total_due + weight_line_item
total_score = (
s_supplier * weight_supplier/100 +
s_po_number * weight_po_number/100 +
s_currency * weight_currency/100 +
s_total * weight_total_due/100 +
line_item_score * weight_line_item/100
) if wsum == 100 else 0
reason = (
f"Supplier match: {s_supplier}/100 (invoice: '{inv_supplier}' vs PO: '{po_supplier}'), "
f"PO Number: {s_po_number}/100 ({'found anywhere in JSON' if s_po_number else 'not found'}), "
f"Currency: {s_currency}/100 (invoice: '{inv_currency}' vs PO: '{po_currency}'), "
f"Total Due: {'match' if s_total else 'no match'} (invoice: {inv_total_due} vs PO: {po_total}), "
f"Line item best match: {int(line_item_score)}/100."
)
debug = {
"po_idx": idx, "po_supplier": po_supplier, "po_po_number": po_po_number, "po_total": po_total,
"scores": field_details, "line_item_score": line_item_score, "best_line_detail": best_line_detail,
"all_line_matches": all_line_matches, "total_score": total_score, "inv_total_due": inv_total_due
}
scores.append((row, total_score, reason, debug))
scores.sort(key=lambda tup: tup[1], reverse=True)
if not scores:
return json.dumps({"decision": "REJECTED", "reason": "No POs found.", "debug": {}})
best_row, best_score, reason, debug = scores[0]
return json.dumps({
"score": best_score,
"approved_threshold": approved_threshold,
"partial_threshold": partial_threshold,
"reason": f"Best match score: {int(best_score)}/100. {reason}",
"debug": debug,
"po_row": drop_embedding_keys(best_row) if best_row is not None else None,
"matched_po_indices": list(matched_po_indices) # <--- return this
})
tool = Tool(
name="po_match_tool",
func=po_match_tool_func,
description="Returns JSON with score, thresholds, details. LLM must use this result with strict user logic.",
)
agent_llm = get_llm(mdl)
agent = initialize_agent(
[tool],
agent_llm,
agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
verbose=True,
max_iterations=2,
)
prompt = (
"You are an expert accounts payable agent.\n"
"You MUST use the output of po_match_tool to make the approval decision.\n"
f"User-configured thresholds are: APPROVED = {approved_threshold}, PARTIALLY APPROVED = {partial_threshold}.\n"
"You must STRICTLY follow this logic with NO exceptions and NO overrides:\n"
"- If the score from po_match_tool is greater than or equal to the APPROVED threshold, return decision = APPROVED.\n"
"- Else, if the score is greater than or equal to the PARTIALLY APPROVED threshold, return decision = PARTIALLY APPROVED.\n"
"- Else, return decision = REJECTED.\n"
"DO NOT reject just because of a field mismatch. Do not add your own rules.\n"
"Use only this logic for all decisions, based on the score and thresholds from po_match_tool.\n"
"Always return JSON using this schema, and nothing else:\n"
'{\n'
' "decision": "APPROVED" | "PARTIALLY APPROVED" | "REJECTED",\n'
' "reason": "...",\n'
' "debug": {...},\n'
' "po_row": {...},\n'
' "matched_po_indices": [index, ...]\n'
'}\n'
"First, call po_match_tool and get its JSON. Then apply the threshold rules above to its output.\n"
"Return only the decision JSON.\n"
f"Invoice JSON:\n{json.dumps(extracted_info, indent=2)}"
)
with st.spinner("AP Agent reasoning and making a decision..."):
result = agent.run(prompt)
try:
result_json = clean_json_response(result)
st.session_state['result_json'] = result_json
st.session_state['extracted_info_for_decision'] = extracted_info
st.session_state['line_item_idx'] = 0
except Exception:
st.session_state['result_json'] = None
st.session_state['extracted_info_for_decision'] = None
st.session_state['line_item_idx'] = 0
st.subheader("Decision output not in standard format.")
st.code(result)
# --- Always show results if available ---
result_json = st.session_state.get('result_json', None)
extracted_info = st.session_state.get('extracted_info_for_decision', None)
po_df = st.session_state.get('last_po_df', None) # <-- make sure this is available
if result_json and extracted_info:
status = result_json.get('decision', 'N/A')
score = result_json.get('debug', {}).get('total_score', None)
color = "#4CAF50" if status == "APPROVED" else "#FFC107" if status == "PARTIALLY APPROVED" else "#F44336"
st.markdown(f"""
{status}
Total Score: {score if score is not None else '--'}
""", unsafe_allow_html=True)
st.markdown("#### Side-by-Side Field Matching")
field_scores = result_json.get("debug", {}).get("scores", [])
if field_scores:
table_rows = ""
for fs in field_scores:
table_rows += f"| {fs['field']} | {fs['invoice']} | {fs['po']} | {fs['score']} |
"
st.markdown(
f"""
| Field | Invoice Value | PO Value | Score |
{table_rows}
""",
unsafe_allow_html=True
)
st.markdown("#### Line Item Side-by-Side Matching")
invoice_line_items = result_json.get("debug", {}).get("all_line_matches", [])
num_items = len(invoice_line_items)
key = "line_item_idx"
if key not in st.session_state or st.session_state[key] >= num_items:
st.session_state[key] = 0
if num_items > 0:
cols = st.columns([1, 3, 1])
with cols[0]:
if st.button("⬅️", key="prev_btn", disabled=st.session_state[key] == 0):
st.session_state[key] = max(0, st.session_state[key] - 1)
with cols[1]:
st.markdown(
f""
f"Line Item {st.session_state[key]+1} of {num_items}"
f"
", unsafe_allow_html=True
)
with cols[2]:
if st.button("➡️", key="next_btn", disabled=st.session_state[key] == num_items - 1):
st.session_state[key] = min(num_items - 1, st.session_state[key] + 1)
idx = st.session_state[key]
inv_line = invoice_line_items[idx]
inv = inv_line.get("invoice", {})
best_po = inv_line.get("po", {})
desc_score = inv_line.get("desc_score", 0)
qty_score = inv_line.get("qty_score", 0)
unit_score = inv_line.get("unit_score", 0)
amt_score = inv_line.get("amount_score", 0)
line_score = int(inv_line.get("line_item_score", 0))
fields = ["Description", "Quantity", "Price", "Amount"]
table_html = ""
table_html += " | "
for f in fields:
table_html += f"{f} | "
table_html += "
"
table_html += "| Invoice Value | "
table_html += f"{inv.get('description', '')} | "
table_html += f"{inv.get('quantity', '')} | "
table_html += f"{inv.get('price', '')} | "
table_html += f"{inv.get('amount', '')} |
"
table_html += "| PO Value | "
table_html += f"{best_po.get('description', '')} | "
table_html += f"{best_po.get('quantity', '')} | "
table_html += f"{best_po.get('price', '')} | "
table_html += f"{best_po.get('amount', '')} |
"
table_html += "| Score | "
table_html += f"{desc_score} | {qty_score} | {unit_score} | {amt_score} |
"
table_html += "| Line Score | | | | {} |
".format(line_score)
table_html += "
"
st.markdown(table_html, unsafe_allow_html=True)
else:
st.info("No line items found.")
# --------- PO LINE ITEMS MISSING IN INVOICE ---------
matched_po_row = result_json.get("po_row", None)
matched_indices = set(result_json.get("matched_po_indices", []))
if matched_po_row and po_df is not None:
po_no = matched_po_row.get("PO Number", None)
po_lines_df = po_df[po_df["PO Number"] == po_no]
missing_po_lines = po_lines_df.loc[~po_lines_df.index.isin(matched_indices)]
if not missing_po_lines.empty:
st.markdown("#### PO Line Items Missing in Invoice")
table_html = ""
table_html += ""
for f in ["Description", "Quantity", "Price", "Amount"]:
table_html += f"| {f} | "
table_html += "
"
for _, row in missing_po_lines.iterrows():
table_html += ""
table_html += f"| {row.get('Description', '') or row.get('Item Description', '') or row.get('Line Item Description', '')} | "
table_html += f"{row.get('Quantity', '') or row.get('Item Quantity', '') or row.get('Qty', '')} | "
table_html += f"{row.get('Price', '') or row.get('Item Unit Price', '') or row.get('Rate', '')} | "
table_html += f"{row.get('Amount', '') or row.get('Line Item Total', '') or row.get('Line Amount', '')} | "
table_html += "
"
table_html += "
"
st.markdown(table_html, unsafe_allow_html=True)
else:
st.info("No PO line items missing in invoice.")
st.markdown("#### Reason")
st.write(result_json.get('reason', 'N/A'))
st.markdown("#### Debug & Matching Details (Full JSON)")
st.json(result_json.get('debug'))
st.markdown("#### Extracted Invoice JSON")
st.json(extracted_info)
st.markdown("#### Matched PO Row")
st.json(result_json.get('po_row'))
else:
st.info("Run a decision to see results.")