Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,58 +6,122 @@ import os
|
|
| 6 |
import time
|
| 7 |
import mimetypes
|
| 8 |
import pandas as pd
|
|
|
|
| 9 |
from langchain_community.chat_models import ChatOpenAI
|
| 10 |
from langchain.agents import initialize_agent, Tool, AgentType
|
| 11 |
from fuzzywuzzy import fuzz
|
| 12 |
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
| 23 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
-
|
| 26 |
-
|
|
|
|
| 27 |
if not key:
|
| 28 |
-
st.error(
|
| 29 |
st.stop()
|
| 30 |
return key
|
| 31 |
|
| 32 |
-
def query_llm(
|
| 33 |
-
|
| 34 |
headers = {
|
| 35 |
-
"Authorization": f"Bearer {get_api_key(
|
| 36 |
"Content-Type": "application/json",
|
| 37 |
}
|
| 38 |
-
if cfg.get("extra_headers"):
|
| 39 |
-
headers.update(cfg["extra_headers"])
|
| 40 |
payload = {
|
| 41 |
-
"model":
|
| 42 |
"messages": [{"role": "user", "content": prompt}],
|
| 43 |
"temperature": 0.1,
|
| 44 |
"max_tokens": 2000,
|
| 45 |
}
|
| 46 |
-
|
| 47 |
-
payload
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
r = requests.post(cfg["api_url"], headers=headers, json=payload, timeout=90)
|
| 51 |
-
if r.status_code != 200:
|
| 52 |
-
st.error(f"🚨 API Error {r.status_code}: {r.text}")
|
| 53 |
-
return None
|
| 54 |
-
content = r.json()["choices"][0]["message"]["content"]
|
| 55 |
-
st.session_state.last_api = content
|
| 56 |
-
st.session_state.last_raw = r.text
|
| 57 |
-
return content
|
| 58 |
-
except Exception as e:
|
| 59 |
-
st.error(f"Connection error: {e}")
|
| 60 |
return None
|
|
|
|
| 61 |
|
| 62 |
def clean_json_response(text):
|
| 63 |
if not text:
|
|
@@ -82,102 +146,28 @@ def clean_json_response(text):
|
|
| 82 |
st.code(frag)
|
| 83 |
return None
|
| 84 |
|
| 85 |
-
def
|
| 86 |
-
for line in text.splitlines():
|
| 87 |
-
line = line.strip()
|
| 88 |
-
if line:
|
| 89 |
-
return line
|
| 90 |
-
return None
|
| 91 |
-
|
| 92 |
-
def get_extraction_prompt(model_choice, txt):
|
| 93 |
return (
|
| 94 |
-
"You are an expert invoice parser. "
|
| 95 |
-
"
|
| 96 |
-
"For every line item, only extract fields that correspond to the table columns for that row (do not include header/shipment fields in line items). "
|
| 97 |
-
"Merge all multi-line content within a single cell into that field (especially for the 'description' and 'notes'). "
|
| 98 |
-
"Shipment/invoice-level fields such as CAR NUMBER, SHIPPING POINT, SHIPMENT NUMBER, CURRENCY, etc., must go ONLY into the 'invoice_header', not as line item fields.\n"
|
| 99 |
"Use this schema:\n"
|
| 100 |
-
'{
|
| 101 |
-
|
| 102 |
-
' "car_number": "string or null",\n'
|
| 103 |
-
' "shipment_number": "string or null",\n'
|
| 104 |
-
' "shipping_point": "string or null",\n'
|
| 105 |
-
' "currency": "string or null",\n'
|
| 106 |
-
' "invoice_number": "string or null",\n'
|
| 107 |
-
' "invoice_date": "string or null",\n'
|
| 108 |
-
' "order_number": "string or null",\n'
|
| 109 |
-
' "customer_order_number": "string or null",\n'
|
| 110 |
-
' "our_order_number": "string or null",\n'
|
| 111 |
-
' "sales_order_number": "string or null",\n'
|
| 112 |
-
' "purchase_order_number": "string or null",\n'
|
| 113 |
-
' "order_date": "string or null",\n'
|
| 114 |
-
' "supplier_name": "string or null",\n'
|
| 115 |
-
' "supplier_address": "string or null",\n'
|
| 116 |
-
' "supplier_phone": "string or null",\n'
|
| 117 |
-
' "supplier_email": "string or null",\n'
|
| 118 |
-
' "supplier_tax_id": "string or null",\n'
|
| 119 |
-
' "customer_name": "string or null",\n'
|
| 120 |
-
' "customer_address": "string or null",\n'
|
| 121 |
-
' "customer_phone": "string or null",\n'
|
| 122 |
-
' "customer_email": "string or null",\n'
|
| 123 |
-
' "customer_tax_id": "string or null",\n'
|
| 124 |
-
' "ship_to_name": "string or null",\n'
|
| 125 |
-
' "ship_to_address": "string or null",\n'
|
| 126 |
-
' "bill_to_name": "string or null",\n'
|
| 127 |
-
' "bill_to_address": "string or null",\n'
|
| 128 |
-
' "remit_to_name": "string or null",\n'
|
| 129 |
-
' "remit_to_address": "string or null",\n'
|
| 130 |
-
' "tax_id": "string or null",\n'
|
| 131 |
-
' "tax_registration_number": "string or null",\n'
|
| 132 |
-
' "vat_number": "string or null",\n'
|
| 133 |
-
' "payment_terms": "string or null",\n'
|
| 134 |
-
' "payment_method": "string or null",\n'
|
| 135 |
-
' "payment_reference": "string or null",\n'
|
| 136 |
-
' "bank_account_number": "string or null",\n'
|
| 137 |
-
' "iban": "string or null",\n'
|
| 138 |
-
' "swift_code": "string or null",\n'
|
| 139 |
-
' "total_before_tax": "string or null",\n'
|
| 140 |
-
' "tax_amount": "string or null",\n'
|
| 141 |
-
' "tax_rate": "string or null",\n'
|
| 142 |
-
' "shipping_charges": "string or null",\n'
|
| 143 |
-
' "discount": "string or null",\n'
|
| 144 |
-
' "total_due": "string or null",\n'
|
| 145 |
-
' "amount_paid": "string or null",\n'
|
| 146 |
-
' "balance_due": "string or null",\n'
|
| 147 |
-
' "due_date": "string or null",\n'
|
| 148 |
-
' "invoice_status": "string or null",\n'
|
| 149 |
-
' "reference_number": "string or null",\n'
|
| 150 |
-
' "project_code": "string or null",\n'
|
| 151 |
-
' "department": "string or null",\n'
|
| 152 |
-
' "contact_person": "string or null",\n'
|
| 153 |
-
' "notes": "string or null",\n'
|
| 154 |
-
' "additional_info": "string or null"\n'
|
| 155 |
-
' },\n'
|
| 156 |
-
' "line_items": [\n'
|
| 157 |
-
' {\n'
|
| 158 |
-
' "quantity": "string or null",\n'
|
| 159 |
-
' "units": "string or null",\n'
|
| 160 |
-
' "description": "string or null",\n'
|
| 161 |
-
' "footage": "string or null",\n'
|
| 162 |
-
' "price": "string or null",\n'
|
| 163 |
-
' "amount": "string or null",\n'
|
| 164 |
-
' "notes": "string or null"\n'
|
| 165 |
-
' }\n'
|
| 166 |
-
' ]\n'
|
| 167 |
-
'}'
|
| 168 |
-
"\nIf a field is missing for a line item or header, use null. "
|
| 169 |
-
"Do not invent fields. Do not add any header or shipment data to any line item. Return ONLY the JSON object, no explanation.\n"
|
| 170 |
"\nInvoice Text:\n"
|
| 171 |
f"{txt}"
|
| 172 |
)
|
| 173 |
|
| 174 |
-
def
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
|
| 182 |
def clean_num(val):
|
| 183 |
if val is None:
|
|
@@ -197,246 +187,85 @@ def weighted_fuzzy_score(s1, s2):
|
|
| 197 |
return 100
|
| 198 |
return fuzz.token_set_ratio(str(s1).lower(), str(s2).lower())
|
| 199 |
|
| 200 |
-
def
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
elif isinstance(obj, list):
|
| 207 |
-
for item in obj:
|
| 208 |
-
fields.extend(_flatten(item))
|
| 209 |
-
elif obj is not None:
|
| 210 |
-
fields.append(str(obj))
|
| 211 |
-
return fields
|
| 212 |
-
|
| 213 |
-
po_str = str(po_number).strip().replace(" ", "").replace(".0", "")
|
| 214 |
-
try:
|
| 215 |
-
po_int = str(int(float(po_number)))
|
| 216 |
-
except:
|
| 217 |
-
po_int = po_str
|
| 218 |
-
|
| 219 |
-
all_strs = [str(s).strip().replace(" ", "").replace(".0", "") for s in _flatten(invoice_json)]
|
| 220 |
-
for s in all_strs:
|
| 221 |
-
if not s:
|
| 222 |
-
continue
|
| 223 |
-
if po_str and (po_str in s or s in po_str):
|
| 224 |
-
return True
|
| 225 |
-
if po_int and (po_int in s or s in po_int):
|
| 226 |
-
return True
|
| 227 |
-
return False
|
| 228 |
-
|
| 229 |
-
# --- Step 1: Upload POs CSV (very top) ---
|
| 230 |
-
st.sidebar.header("Step 1: Upload Active Purchase Orders (POs)")
|
| 231 |
-
po_file = st.sidebar.file_uploader(
|
| 232 |
-
"Upload POs CSV (must include PO number, Supplier, Items, etc.)",
|
| 233 |
-
type=["csv"],
|
| 234 |
-
key="po_csv"
|
| 235 |
-
)
|
| 236 |
-
po_df = None
|
| 237 |
-
if po_file:
|
| 238 |
-
po_df = pd.read_csv(po_file)
|
| 239 |
-
st.sidebar.success(f"Loaded {len(po_df)} rows from uploaded CSV.")
|
| 240 |
-
st.sidebar.dataframe(po_df.head())
|
| 241 |
-
st.session_state['last_po_df'] = po_df # Save PO to session
|
| 242 |
-
|
| 243 |
-
# --- Set Scoring Weights (Total = 100%) ---
|
| 244 |
-
st.sidebar.header("Set Scoring Weights (Total = 100%)")
|
| 245 |
-
def int_slider(label, value, key):
|
| 246 |
-
# A slider with number input
|
| 247 |
-
return st.sidebar.slider(label, 0, 100, value, 1, key=key, format="%d")
|
| 248 |
-
|
| 249 |
-
weight_supplier = int_slider("Supplier Name Weight (%)", 25, "w_supplier")
|
| 250 |
-
weight_po_number = int_slider("PO Number Weight (%)", 25, "w_po")
|
| 251 |
-
weight_currency = int_slider("Currency Weight (%)", 10, "w_curr")
|
| 252 |
-
weight_total_due = int_slider("Total Due Weight (%)", 20, "w_due")
|
| 253 |
-
weight_line_item = int_slider("Line Item Weight (%)", 20, "w_line")
|
| 254 |
-
weight_sum = weight_supplier + weight_po_number + weight_currency + weight_total_due + weight_line_item
|
| 255 |
-
if weight_sum != 100:
|
| 256 |
-
st.sidebar.warning(f"Sum of weights is {weight_sum}%. Adjust so it equals 100%.")
|
| 257 |
|
| 258 |
-
|
| 259 |
-
st.sidebar.header("Set Decision Thresholds")
|
| 260 |
-
approved_threshold = st.sidebar.slider("Threshold for 'APPROVED'", min_value=0, max_value=100, value=85, format="%d")
|
| 261 |
-
partial_threshold = st.sidebar.slider("Threshold for 'PARTIALLY APPROVED'", min_value=0, max_value=approved_threshold-1, value=70, format="%d")
|
| 262 |
-
|
| 263 |
-
def find_best_po_match(inv, po_df):
|
| 264 |
inv_hdr = inv["invoice_header"]
|
| 265 |
inv_supplier = inv_hdr.get("supplier_name") or ""
|
| 266 |
-
inv_po_number = inv_hdr.get("purchase_order_number") or inv_hdr.get("po_number") or inv_hdr.get("order_number") or ""
|
| 267 |
inv_currency = inv_hdr.get("currency") or ""
|
| 268 |
inv_total_due = clean_num(inv_hdr.get("total_due"))
|
| 269 |
inv_line_items = inv.get("line_items", [])
|
| 270 |
-
|
| 271 |
scores = []
|
| 272 |
for idx, row in po_df.iterrows():
|
| 273 |
po_supplier = row.get("Supplier Name", "")
|
| 274 |
po_po_number = str(row.get("PO Number", ""))
|
| 275 |
po_currency = row.get("Currency", "")
|
| 276 |
po_total = clean_num(row.get("PO Total Value", ""))
|
| 277 |
-
|
| 278 |
-
po_qty = str(row.get("Item Quantity", ""))
|
| 279 |
-
po_unit = str(row.get("Item Unit Price", ""))
|
| 280 |
-
po_line_total = clean_num(row.get("Line Item Total", ""))
|
| 281 |
-
|
| 282 |
-
field_details = []
|
| 283 |
-
|
| 284 |
s_supplier = weighted_fuzzy_score(inv_supplier, po_supplier)
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
"invoice": inv_supplier,
|
| 288 |
-
"po": po_supplier,
|
| 289 |
-
"score": s_supplier
|
| 290 |
-
})
|
| 291 |
-
|
| 292 |
-
s_po_number = 100 if find_po_number_in_json(po_po_number, inv) else 0
|
| 293 |
-
field_details.append({
|
| 294 |
-
"field": "PO Number (anywhere in JSON)",
|
| 295 |
-
"invoice": "found" if s_po_number else "not found",
|
| 296 |
-
"po": po_po_number,
|
| 297 |
-
"score": s_po_number
|
| 298 |
-
})
|
| 299 |
-
|
| 300 |
s_currency = weighted_fuzzy_score(inv_currency, po_currency)
|
| 301 |
-
field_details.append({
|
| 302 |
-
"field": "Currency",
|
| 303 |
-
"invoice": inv_currency,
|
| 304 |
-
"po": po_currency,
|
| 305 |
-
"score": s_currency
|
| 306 |
-
})
|
| 307 |
-
|
| 308 |
s_total = 100 if inv_total_due is not None and po_total is not None and abs(inv_total_due - po_total) < 2 else 0
|
| 309 |
-
|
| 310 |
-
"field": "Total Due",
|
| 311 |
-
"invoice": inv_total_due,
|
| 312 |
-
"po": po_total,
|
| 313 |
-
"score": s_total
|
| 314 |
-
})
|
| 315 |
-
|
| 316 |
-
# Line item logic as before
|
| 317 |
line_item_score = 0
|
| 318 |
-
line_reason = ""
|
| 319 |
-
best_line_detail = None
|
| 320 |
for line in inv_line_items:
|
| 321 |
-
desc_score = weighted_fuzzy_score(line.get("description", ""),
|
| 322 |
-
qty_score = 100 if clean_num(line.get("quantity")) == clean_num(
|
| 323 |
-
unit_score = 100 if clean_num(line.get("price")) == clean_num(
|
| 324 |
-
amount_score = 100 if clean_num(line.get("amount")) ==
|
| 325 |
total = desc_score * 0.5 + qty_score * 0.2 + unit_score * 0.15 + amount_score * 0.15
|
| 326 |
-
detail = {
|
| 327 |
-
"field": "Line Item",
|
| 328 |
-
"invoice": {
|
| 329 |
-
"description": line.get("description", ""),
|
| 330 |
-
"quantity": line.get("quantity", ""),
|
| 331 |
-
"price": line.get("price", ""),
|
| 332 |
-
"amount": line.get("amount", ""),
|
| 333 |
-
},
|
| 334 |
-
"po": {
|
| 335 |
-
"description": po_desc,
|
| 336 |
-
"quantity": po_qty,
|
| 337 |
-
"price": po_unit,
|
| 338 |
-
"amount": po_line_total,
|
| 339 |
-
},
|
| 340 |
-
"desc_score": desc_score,
|
| 341 |
-
"qty_score": qty_score,
|
| 342 |
-
"unit_score": unit_score,
|
| 343 |
-
"amount_score": amount_score,
|
| 344 |
-
"line_item_score": total
|
| 345 |
-
}
|
| 346 |
if total > line_item_score:
|
| 347 |
line_item_score = total
|
| 348 |
-
|
| 349 |
-
line_reason = (
|
| 350 |
-
f"Best line item: desc_score={desc_score}, qty_score={qty_score}, "
|
| 351 |
-
f"unit_score={unit_score}, amount_score={amount_score}"
|
| 352 |
-
)
|
| 353 |
-
|
| 354 |
-
wsum = weight_supplier + weight_po_number + weight_currency + weight_total_due + weight_line_item
|
| 355 |
total_score = (
|
| 356 |
-
s_supplier *
|
| 357 |
-
s_po_number *
|
| 358 |
-
s_currency *
|
| 359 |
-
s_total *
|
| 360 |
-
line_item_score *
|
| 361 |
-
)
|
| 362 |
-
|
| 363 |
reason = (
|
| 364 |
-
f"Supplier match: {s_supplier}/100
|
| 365 |
-
f"PO Number: {s_po_number}/100
|
| 366 |
-
f"Currency: {s_currency}/100
|
| 367 |
-
f"Total Due: {'match' if s_total else 'no match'}
|
| 368 |
-
f"Line item best match: {int(line_item_score)}/100.
|
| 369 |
)
|
| 370 |
-
|
| 371 |
debug = {
|
| 372 |
"po_idx": idx,
|
| 373 |
-
"
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
"
|
| 381 |
-
"inv_total_due": inv_total_due
|
| 382 |
}
|
| 383 |
scores.append((row, total_score, reason, debug))
|
| 384 |
-
|
| 385 |
scores.sort(key=lambda tup: tup[1], reverse=True)
|
| 386 |
if not scores:
|
| 387 |
return None, 0, "No POs found.", {}
|
| 388 |
best_row, best_score, reason, debug = scores[0]
|
| 389 |
return best_row, best_score, reason, debug
|
| 390 |
|
| 391 |
-
# --- Extraction, decision, and UI logic below is unchanged ---
|
| 392 |
-
|
| 393 |
-
def extract_invoice_info(model_choice, text):
|
| 394 |
-
prompt = get_extraction_prompt(model_choice, text)
|
| 395 |
-
raw = query_llm(model_choice, prompt)
|
| 396 |
-
if not raw:
|
| 397 |
-
return None
|
| 398 |
-
data = clean_json_response(raw)
|
| 399 |
-
if not data:
|
| 400 |
-
return None
|
| 401 |
-
hdr = data.get("invoice_header", {})
|
| 402 |
-
if not hdr and any(k in data for k in ("invoice_number","supplier_name","customer_name")):
|
| 403 |
-
hdr = data
|
| 404 |
-
for k in ("invoice_number","invoice_date","po_number","invoice_value","supplier_name","customer_name"):
|
| 405 |
-
hdr.setdefault(k, None)
|
| 406 |
-
if not hdr.get("supplier_name"):
|
| 407 |
-
hdr["supplier_name"] = fallback_supplier(text)
|
| 408 |
-
hdr = ensure_total_due(hdr)
|
| 409 |
-
items = data.get("line_items", [])
|
| 410 |
-
if not isinstance(items, list):
|
| 411 |
-
items = []
|
| 412 |
-
for itm in items:
|
| 413 |
-
if not isinstance(itm, dict):
|
| 414 |
-
continue
|
| 415 |
-
for k in ("item_number","description","quantity","unit_price","total_price"):
|
| 416 |
-
itm.setdefault(k, None)
|
| 417 |
-
return {"invoice_header": hdr, "line_items": items}
|
| 418 |
-
|
| 419 |
-
def get_content_type(filename):
|
| 420 |
-
mime, _ = mimetypes.guess_type(filename)
|
| 421 |
-
ext = filename.lower().split('.')[-1]
|
| 422 |
-
if ext == "pdf":
|
| 423 |
-
return "text/plain"
|
| 424 |
-
if mime is None:
|
| 425 |
-
return "application/octet-stream"
|
| 426 |
-
return mime
|
| 427 |
-
|
| 428 |
-
UNSTRACT_BASE = "https://llmwhisperer-api.us-central.unstract.com/api/v2"
|
| 429 |
-
UNSTRACT_API_KEY = os.getenv("UNSTRACT_API_KEY")
|
| 430 |
-
|
| 431 |
def extract_text_from_unstract(uploaded_file):
|
| 432 |
filename = getattr(uploaded_file, "name", "uploaded_file")
|
| 433 |
file_bytes = uploaded_file.read()
|
| 434 |
-
content_type =
|
|
|
|
|
|
|
| 435 |
headers = {
|
| 436 |
-
"unstract-key": UNSTRACT_API_KEY,
|
| 437 |
"Content-Type": content_type,
|
| 438 |
}
|
| 439 |
-
url =
|
| 440 |
with st.spinner("Uploading and processing document with EZOFIS AI OCR AGENT..."):
|
| 441 |
r = requests.post(url, headers=headers, data=file_bytes)
|
| 442 |
if r.status_code != 202:
|
|
@@ -446,11 +275,10 @@ def extract_text_from_unstract(uploaded_file):
|
|
| 446 |
if not whisper_hash:
|
| 447 |
st.error("Unstract: No whisper_hash received.")
|
| 448 |
return None
|
| 449 |
-
|
| 450 |
-
status_url = f"{UNSTRACT_BASE}/whisper-status?whisper_hash={whisper_hash}"
|
| 451 |
status_placeholder = st.empty()
|
| 452 |
for i in range(30):
|
| 453 |
-
status_r = requests.get(status_url, headers={"unstract-key": UNSTRACT_API_KEY})
|
| 454 |
if status_r.status_code != 200:
|
| 455 |
st.error(f"Unstract: Error checking status: {status_r.status_code} - {status_r.text}")
|
| 456 |
return None
|
|
@@ -463,9 +291,8 @@ def extract_text_from_unstract(uploaded_file):
|
|
| 463 |
else:
|
| 464 |
status_placeholder.error("Unstract: Timeout waiting for OCR to finish.")
|
| 465 |
return None
|
| 466 |
-
|
| 467 |
-
retrieve_url =
|
| 468 |
-
r = requests.get(retrieve_url, headers={"unstract-key": UNSTRACT_API_KEY})
|
| 469 |
if r.status_code != 200:
|
| 470 |
st.error(f"Unstract: Error retrieving extracted text: {r.status_code} - {r.text}")
|
| 471 |
return None
|
|
@@ -475,105 +302,52 @@ def extract_text_from_unstract(uploaded_file):
|
|
| 475 |
except Exception:
|
| 476 |
return r.text
|
| 477 |
|
| 478 |
-
#
|
| 479 |
-
st.title("Invoice/Document Extractor")
|
| 480 |
-
mdl = st.selectbox("Model for Extraction", list(MODELS.keys()), key="extract_model")
|
| 481 |
-
inv_file = st.file_uploader(
|
| 482 |
-
"Step 2: Upload Invoice or Document File",
|
| 483 |
-
type=["pdf", "docx", "xlsx", "xls", "png", "jpg", "jpeg", "tiff"]
|
| 484 |
-
)
|
| 485 |
|
|
|
|
| 486 |
if st.button("Extract") and inv_file:
|
| 487 |
with st.spinner("Extracting text from document using Unstract..."):
|
| 488 |
text = extract_text_from_unstract(inv_file)
|
| 489 |
if text:
|
| 490 |
-
extracted_info = extract_invoice_info(
|
| 491 |
if extracted_info:
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
st.
|
| 495 |
-
st.
|
| 496 |
-
st.
|
| 497 |
-
st.
|
| 498 |
-
st.table(extracted_info["line_items"])
|
| 499 |
st.session_state['last_extracted_info'] = extracted_info
|
|
|
|
| 500 |
|
| 501 |
extracted_info = st.session_state.get('last_extracted_info', None)
|
| 502 |
po_df = st.session_state.get('last_po_df', None)
|
| 503 |
-
|
| 504 |
-
|
| 505 |
-
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
if best_score > approved_threshold:
|
| 515 |
-
status = "APPROVED"
|
| 516 |
-
elif best_score > partial_threshold:
|
| 517 |
-
status = "PARTIALLY APPROVED"
|
| 518 |
-
else:
|
| 519 |
-
status = "REJECTED"
|
| 520 |
-
return json.dumps({
|
| 521 |
-
"decision": status,
|
| 522 |
-
"reason": f"Best match score: {int(best_score)}/100. {reason}",
|
| 523 |
-
"debug": debug,
|
| 524 |
-
"po_row": best_row.to_dict() if best_row is not None else None
|
| 525 |
-
})
|
| 526 |
-
|
| 527 |
-
if po_df is not None:
|
| 528 |
-
st.session_state["last_po_df"] = po_df
|
| 529 |
-
|
| 530 |
-
if extracted_info is not None and po_df is not None:
|
| 531 |
-
st.markdown("---")
|
| 532 |
-
st.subheader("EZOFIS AP AGENT Decision (OpenAI Only)")
|
| 533 |
if st.button("Make a decision (EZOFIS AP AGENT)"):
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
|
| 546 |
-
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
verbose=True,
|
| 552 |
-
)
|
| 553 |
-
prompt = (
|
| 554 |
-
"You are an expert accounts payable agent. "
|
| 555 |
-
"Use po_match_tool to check for the best possible match using supplier, PO number (which may appear anywhere in the invoice JSON, even within other fields), currency, line items, and total value. "
|
| 556 |
-
"Weigh the importance of each field as an expert would, according to the user-configured weights. "
|
| 557 |
-
"Return a JSON with decision (APPROVED, PARTIALLY APPROVED, REJECTED), reason (include field scores and reasoning), debug, and the best matched PO row.\n"
|
| 558 |
-
f"Invoice JSON:\n{json.dumps(extracted_info, indent=2)}"
|
| 559 |
-
)
|
| 560 |
-
with st.spinner("AI is reasoning and making a decision..."):
|
| 561 |
-
result = agent.run(prompt)
|
| 562 |
-
try:
|
| 563 |
-
result_json = json.loads(result)
|
| 564 |
-
st.write(f"**Decision:** {result_json.get('decision', 'N/A')}")
|
| 565 |
-
st.write(f"**Reason:** {result_json.get('reason', 'N/A')}")
|
| 566 |
-
with st.expander("Debug & Matching Details"):
|
| 567 |
-
st.json(result_json.get('debug'))
|
| 568 |
-
st.subheader("Extracted Invoice JSON")
|
| 569 |
-
st.json(extracted_info)
|
| 570 |
-
st.subheader("Matched PO Row")
|
| 571 |
-
st.json(result_json.get('po_row'))
|
| 572 |
-
except Exception:
|
| 573 |
-
st.subheader("AI Decision & Reason")
|
| 574 |
-
st.write(result)
|
| 575 |
-
|
| 576 |
-
if "last_api" in st.session_state:
|
| 577 |
-
with st.expander("Debug"):
|
| 578 |
-
st.code(st.session_state.last_api)
|
| 579 |
-
st.code(st.session_state.last_raw)
|
|
|
|
| 6 |
import time
|
| 7 |
import mimetypes
|
| 8 |
import pandas as pd
|
| 9 |
+
|
| 10 |
from langchain_community.chat_models import ChatOpenAI
|
| 11 |
from langchain.agents import initialize_agent, Tool, AgentType
|
| 12 |
from fuzzywuzzy import fuzz
|
| 13 |
|
| 14 |
+
# ---- Custom CSS to hide status and streamline look ----
|
| 15 |
+
st.markdown("""
|
| 16 |
+
<style>
|
| 17 |
+
header[data-testid="stHeader"] {visibility: hidden;}
|
| 18 |
+
#MainMenu, .stDeployButton {visibility: hidden;}
|
| 19 |
+
.st-bb, .st-c6, .stDataFrameContainer, .stDataFrame {background: transparent !important;}
|
| 20 |
+
.stButton>button {
|
| 21 |
+
background: linear-gradient(90deg, #1e88e5 0%, #0057b8 100%);
|
| 22 |
+
color: #fff !important;
|
| 23 |
+
border-radius: 8px !important;
|
| 24 |
+
font-weight: 600;
|
| 25 |
+
border: none;
|
| 26 |
+
box-shadow: 0 2px 8px rgba(30,136,229,0.15);
|
| 27 |
}
|
| 28 |
+
.stButton>button:hover {background: #1565c0;}
|
| 29 |
+
</style>
|
| 30 |
+
""", unsafe_allow_html=True)
|
| 31 |
+
|
| 32 |
+
# ---- Sidebar ----
|
| 33 |
+
with st.sidebar:
|
| 34 |
+
st.markdown("<div style='font-size:1.25em; font-weight:700; margin-bottom:0.2em; margin-top:0.7em;'>Step 1: Upload Active Purchase Orders (POs)</div>", unsafe_allow_html=True)
|
| 35 |
+
st.markdown("<div style='color:#eee; margin-bottom:1.1em; font-size:1em;'>Upload a POs CSV (must include PO number, Supplier, Items, etc.)</div>", unsafe_allow_html=True)
|
| 36 |
+
po_file = st.file_uploader(
|
| 37 |
+
"", type=["csv"], key="po_csv", label_visibility="collapsed"
|
| 38 |
+
)
|
| 39 |
+
po_df = None
|
| 40 |
+
if po_file:
|
| 41 |
+
po_df = pd.read_csv(po_file)
|
| 42 |
+
st.session_state['last_po_df'] = po_df
|
| 43 |
+
st.success(f"{len(po_df)} rows uploaded and active.", icon="✅")
|
| 44 |
+
else:
|
| 45 |
+
st.markdown("<span style='color:#bbc2cf; font-size:0.9em'>No PO file uploaded yet.</span>", unsafe_allow_html=True)
|
| 46 |
+
|
| 47 |
+
st.markdown("<hr style='border:0.5px solid #324259; margin:2em 0 1em 0;'/>")
|
| 48 |
+
st.markdown("<span style='color:#b6b8bc; font-size:0.93em;'>Need help? <b>Contact your admin</b></span>", unsafe_allow_html=True)
|
| 49 |
+
|
| 50 |
+
# ---- Scoring Weights Section ----
|
| 51 |
+
st.markdown('<div style="font-size:2rem;font-weight:700;color:#1e2a3a;margin-bottom:0.2em;margin-top:0.5em;">Invoice/Document Extractor</div>', unsafe_allow_html=True)
|
| 52 |
+
st.markdown('<div style="color:#6073a3; margin-bottom:1.3em;">Digitally process and approve invoices with AI-powered PO matching.</div>', unsafe_allow_html=True)
|
| 53 |
+
|
| 54 |
+
with st.container():
|
| 55 |
+
st.markdown('<div style="background:#fff;border-radius:14px;box-shadow:0 4px 32px rgba(34,48,90,0.09),0 1.5px 3.5px rgba(30,136,229,0.07);padding:2rem 2.5rem 1.5rem 2.5rem;margin-bottom:1.5em;">', unsafe_allow_html=True)
|
| 56 |
+
st.markdown("<h3>Set Scoring Weights (Total = 100%)</h3>", unsafe_allow_html=True)
|
| 57 |
+
if "scoring_weights" not in st.session_state:
|
| 58 |
+
st.session_state.scoring_weights = {
|
| 59 |
+
"Supplier": 20,
|
| 60 |
+
"PO Number": 25,
|
| 61 |
+
"Currency": 10,
|
| 62 |
+
"Total Due": 25,
|
| 63 |
+
"Line Item": 20,
|
| 64 |
+
}
|
| 65 |
+
scoring_weights = st.session_state.scoring_weights
|
| 66 |
+
total_weight = 0
|
| 67 |
+
cols = st.columns(len(scoring_weights))
|
| 68 |
+
field_keys = list(scoring_weights.keys())
|
| 69 |
+
for i, field in enumerate(field_keys):
|
| 70 |
+
val = cols[i].number_input(
|
| 71 |
+
f"{field} (%)",
|
| 72 |
+
min_value=0, max_value=100,
|
| 73 |
+
value=int(scoring_weights[field]),
|
| 74 |
+
key=f"scoring_{field}",
|
| 75 |
+
step=1,
|
| 76 |
+
format="%d"
|
| 77 |
+
)
|
| 78 |
+
scoring_weights[field] = val
|
| 79 |
+
total_weight += val
|
| 80 |
+
st.markdown(
|
| 81 |
+
f"<span style='font-size:1em; color:{'#E53935' if total_weight != 100 else '#3BB273'}; font-weight:600;'>"
|
| 82 |
+
f"Total = {total_weight}/100</span>",
|
| 83 |
+
unsafe_allow_html=True
|
| 84 |
+
)
|
| 85 |
+
if total_weight != 100:
|
| 86 |
+
st.warning("Scoring weights must sum to 100!", icon="⚠️")
|
| 87 |
+
st.markdown("</div>", unsafe_allow_html=True)
|
| 88 |
+
|
| 89 |
+
# ---- Upload Invoice/Document ----
|
| 90 |
+
with st.container():
|
| 91 |
+
st.markdown('<div style="background:#fff;border-radius:14px;box-shadow:0 4px 32px rgba(34,48,90,0.09),0 1.5px 3.5px rgba(30,136,229,0.07);padding:2rem 2.5rem 1.5rem 2.5rem;margin-bottom:1.5em;">', unsafe_allow_html=True)
|
| 92 |
+
st.markdown("<h3>Step 2: Upload Invoice or Document</h3>", unsafe_allow_html=True)
|
| 93 |
+
# -- NO model dropdown! --
|
| 94 |
+
inv_file = st.file_uploader(
|
| 95 |
+
"", type=["pdf", "docx", "xlsx", "xls", "png", "jpg", "jpeg", "tiff"], label_visibility="collapsed"
|
| 96 |
+
)
|
| 97 |
+
st.markdown("</div>", unsafe_allow_html=True)
|
| 98 |
|
| 99 |
+
# ========== BUSINESS LOGIC FUNCTIONS (INSERTS) ==========
|
| 100 |
+
def get_api_key():
|
| 101 |
+
key = os.getenv("OPENAI_API_KEY")
|
| 102 |
if not key:
|
| 103 |
+
st.error("❌ OPENAI_API_KEY not set")
|
| 104 |
st.stop()
|
| 105 |
return key
|
| 106 |
|
| 107 |
+
def query_llm(prompt):
|
| 108 |
+
api_url = "https://api.openai.com/v1/chat/completions"
|
| 109 |
headers = {
|
| 110 |
+
"Authorization": f"Bearer {get_api_key()}",
|
| 111 |
"Content-Type": "application/json",
|
| 112 |
}
|
|
|
|
|
|
|
| 113 |
payload = {
|
| 114 |
+
"model": "gpt-4-1106-preview",
|
| 115 |
"messages": [{"role": "user", "content": prompt}],
|
| 116 |
"temperature": 0.1,
|
| 117 |
"max_tokens": 2000,
|
| 118 |
}
|
| 119 |
+
with st.spinner(f"🔍 Fine Tuning The Extracted Data..."):
|
| 120 |
+
r = requests.post(api_url, headers=headers, json=payload, timeout=90)
|
| 121 |
+
if r.status_code != 200:
|
| 122 |
+
st.error(f"🚨 API Error {r.status_code}: {r.text}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
return None
|
| 124 |
+
return r.json()["choices"][0]["message"]["content"]
|
| 125 |
|
| 126 |
def clean_json_response(text):
|
| 127 |
if not text:
|
|
|
|
| 146 |
st.code(frag)
|
| 147 |
return None
|
| 148 |
|
| 149 |
+
def get_extraction_prompt(txt):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
return (
|
| 151 |
+
"You are an expert invoice parser. Extract data according to the visible table structure and column headers in the invoice. "
|
| 152 |
+
"For every line item, only extract fields that correspond to the table columns for that row. "
|
|
|
|
|
|
|
|
|
|
| 153 |
"Use this schema:\n"
|
| 154 |
+
'{ "invoice_header": {"supplier_name":"string", "po_number":"string", "currency":"string", "total_due":"string"}, "line_items": [{"description":"string", "quantity":"string", "price":"string", "amount":"string"}] }'
|
| 155 |
+
"\nIf a field is missing, use null. Return ONLY the JSON object, no explanation.\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
"\nInvoice Text:\n"
|
| 157 |
f"{txt}"
|
| 158 |
)
|
| 159 |
|
| 160 |
+
def extract_invoice_info(text):
|
| 161 |
+
prompt = get_extraction_prompt(text)
|
| 162 |
+
raw = query_llm(prompt)
|
| 163 |
+
if not raw:
|
| 164 |
+
return None
|
| 165 |
+
data = clean_json_response(raw)
|
| 166 |
+
if not data:
|
| 167 |
+
return None
|
| 168 |
+
hdr = data.get("invoice_header", {})
|
| 169 |
+
items = data.get("line_items", [])
|
| 170 |
+
return {"invoice_header": hdr, "line_items": items}
|
| 171 |
|
| 172 |
def clean_num(val):
|
| 173 |
if val is None:
|
|
|
|
| 187 |
return 100
|
| 188 |
return fuzz.token_set_ratio(str(s1).lower(), str(s2).lower())
|
| 189 |
|
| 190 |
+
def find_po_number_anywhere(inv_json, po_number):
|
| 191 |
+
if not po_number or not inv_json:
|
| 192 |
+
return False
|
| 193 |
+
po_str = str(po_number).replace(",", "").replace(".0", "")
|
| 194 |
+
flat = json.dumps(inv_json)
|
| 195 |
+
return po_str in flat.replace(",", "").replace(".0", "")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
|
| 197 |
+
def find_best_po_match(inv, po_df, weights):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
inv_hdr = inv["invoice_header"]
|
| 199 |
inv_supplier = inv_hdr.get("supplier_name") or ""
|
|
|
|
| 200 |
inv_currency = inv_hdr.get("currency") or ""
|
| 201 |
inv_total_due = clean_num(inv_hdr.get("total_due"))
|
| 202 |
inv_line_items = inv.get("line_items", [])
|
|
|
|
| 203 |
scores = []
|
| 204 |
for idx, row in po_df.iterrows():
|
| 205 |
po_supplier = row.get("Supplier Name", "")
|
| 206 |
po_po_number = str(row.get("PO Number", ""))
|
| 207 |
po_currency = row.get("Currency", "")
|
| 208 |
po_total = clean_num(row.get("PO Total Value", ""))
|
| 209 |
+
# --- SCORING FIELDS ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
s_supplier = weighted_fuzzy_score(inv_supplier, po_supplier)
|
| 211 |
+
po_number_in_json = find_po_number_anywhere(inv, po_po_number)
|
| 212 |
+
s_po_number = 100 if po_number_in_json else 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
s_currency = weighted_fuzzy_score(inv_currency, po_currency)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
s_total = 100 if inv_total_due is not None and po_total is not None and abs(inv_total_due - po_total) < 2 else 0
|
| 215 |
+
# --- LINE ITEM MATCH (basic) ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
line_item_score = 0
|
|
|
|
|
|
|
| 217 |
for line in inv_line_items:
|
| 218 |
+
desc_score = weighted_fuzzy_score(line.get("description", ""), row.get("Item Description", ""))
|
| 219 |
+
qty_score = 100 if clean_num(line.get("quantity")) == clean_num(row.get("Item Quantity", "")) else 0
|
| 220 |
+
unit_score = 100 if clean_num(line.get("price")) == clean_num(row.get("Item Unit Price", "")) else 0
|
| 221 |
+
amount_score = 100 if clean_num(line.get("amount")) == clean_num(row.get("Line Item Total", "")) else 0
|
| 222 |
total = desc_score * 0.5 + qty_score * 0.2 + unit_score * 0.15 + amount_score * 0.15
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
if total > line_item_score:
|
| 224 |
line_item_score = total
|
| 225 |
+
# -- WEIGHTED FINAL SCORE --
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
total_score = (
|
| 227 |
+
s_supplier * (weights["Supplier"]/100) +
|
| 228 |
+
s_po_number * (weights["PO Number"]/100) +
|
| 229 |
+
s_currency * (weights["Currency"]/100) +
|
| 230 |
+
s_total * (weights["Total Due"]/100) +
|
| 231 |
+
line_item_score * (weights["Line Item"]/100)
|
| 232 |
+
)
|
|
|
|
| 233 |
reason = (
|
| 234 |
+
f"Supplier match: {s_supplier}/100, "
|
| 235 |
+
f"PO Number: {s_po_number}/100, "
|
| 236 |
+
f"Currency: {s_currency}/100, "
|
| 237 |
+
f"Total Due: {'match' if s_total else 'no match'}, "
|
| 238 |
+
f"Line item best match: {int(line_item_score)}/100."
|
| 239 |
)
|
|
|
|
| 240 |
debug = {
|
| 241 |
"po_idx": idx,
|
| 242 |
+
"scores": [
|
| 243 |
+
{"field":"Supplier","score":s_supplier},
|
| 244 |
+
{"field":"PO Number (anywhere in JSON)","score":s_po_number},
|
| 245 |
+
{"field":"Currency","score":s_currency},
|
| 246 |
+
{"field":"Total Due","score":s_total},
|
| 247 |
+
{"field":"Line Item","score":line_item_score}
|
| 248 |
+
],
|
| 249 |
+
"total_score": total_score
|
|
|
|
| 250 |
}
|
| 251 |
scores.append((row, total_score, reason, debug))
|
|
|
|
| 252 |
scores.sort(key=lambda tup: tup[1], reverse=True)
|
| 253 |
if not scores:
|
| 254 |
return None, 0, "No POs found.", {}
|
| 255 |
best_row, best_score, reason, debug = scores[0]
|
| 256 |
return best_row, best_score, reason, debug
|
| 257 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
def extract_text_from_unstract(uploaded_file):
|
| 259 |
filename = getattr(uploaded_file, "name", "uploaded_file")
|
| 260 |
file_bytes = uploaded_file.read()
|
| 261 |
+
content_type = "application/octet-stream"
|
| 262 |
+
if filename.lower().endswith(".pdf"):
|
| 263 |
+
content_type = "text/plain"
|
| 264 |
headers = {
|
| 265 |
+
"unstract-key": os.getenv("UNSTRACT_API_KEY"),
|
| 266 |
"Content-Type": content_type,
|
| 267 |
}
|
| 268 |
+
url = "https://llmwhisperer-api.us-central.unstract.com/api/v2/whisper"
|
| 269 |
with st.spinner("Uploading and processing document with EZOFIS AI OCR AGENT..."):
|
| 270 |
r = requests.post(url, headers=headers, data=file_bytes)
|
| 271 |
if r.status_code != 202:
|
|
|
|
| 275 |
if not whisper_hash:
|
| 276 |
st.error("Unstract: No whisper_hash received.")
|
| 277 |
return None
|
| 278 |
+
status_url = f"https://llmwhisperer-api.us-central.unstract.com/api/v2/whisper-status?whisper_hash={whisper_hash}"
|
|
|
|
| 279 |
status_placeholder = st.empty()
|
| 280 |
for i in range(30):
|
| 281 |
+
status_r = requests.get(status_url, headers={"unstract-key": os.getenv("UNSTRACT_API_KEY")})
|
| 282 |
if status_r.status_code != 200:
|
| 283 |
st.error(f"Unstract: Error checking status: {status_r.status_code} - {status_r.text}")
|
| 284 |
return None
|
|
|
|
| 291 |
else:
|
| 292 |
status_placeholder.error("Unstract: Timeout waiting for OCR to finish.")
|
| 293 |
return None
|
| 294 |
+
retrieve_url = f"https://llmwhisperer-api.us-central.unstract.com/api/v2/whisper-retrieve?whisper_hash={whisper_hash}&text_only=true"
|
| 295 |
+
r = requests.get(retrieve_url, headers={"unstract-key": os.getenv("UNSTRACT_API_KEY")})
|
|
|
|
| 296 |
if r.status_code != 200:
|
| 297 |
st.error(f"Unstract: Error retrieving extracted text: {r.status_code} - {r.text}")
|
| 298 |
return None
|
|
|
|
| 302 |
except Exception:
|
| 303 |
return r.text
|
| 304 |
|
| 305 |
+
# ========== END BUSINESS LOGIC ==========
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 306 |
|
| 307 |
+
# ---- Extraction/Decision Main UI ----
|
| 308 |
if st.button("Extract") and inv_file:
|
| 309 |
with st.spinner("Extracting text from document using Unstract..."):
|
| 310 |
text = extract_text_from_unstract(inv_file)
|
| 311 |
if text:
|
| 312 |
+
extracted_info = extract_invoice_info(text)
|
| 313 |
if extracted_info:
|
| 314 |
+
st.success("Extraction Complete!", icon="✅")
|
| 315 |
+
st.markdown('<div style="background:#fff;border-radius:14px;box-shadow:0 4px 32px rgba(34,48,90,0.09),0 1.5px 3.5px rgba(30,136,229,0.07);padding:2rem 2.5rem 1.5rem 2.5rem;margin-bottom:1.5em;">', unsafe_allow_html=True)
|
| 316 |
+
st.markdown("#### Invoice Metadata")
|
| 317 |
+
st.json(extracted_info["invoice_header"])
|
| 318 |
+
st.markdown("#### Line Items")
|
| 319 |
+
st.json(extracted_info["line_items"])
|
|
|
|
| 320 |
st.session_state['last_extracted_info'] = extracted_info
|
| 321 |
+
st.markdown("</div>", unsafe_allow_html=True)
|
| 322 |
|
| 323 |
extracted_info = st.session_state.get('last_extracted_info', None)
|
| 324 |
po_df = st.session_state.get('last_po_df', None)
|
| 325 |
+
scoring_weights = st.session_state.get("scoring_weights", {
|
| 326 |
+
"Supplier": 20,
|
| 327 |
+
"PO Number": 25,
|
| 328 |
+
"Currency": 10,
|
| 329 |
+
"Total Due": 25,
|
| 330 |
+
"Line Item": 20,
|
| 331 |
+
})
|
| 332 |
+
|
| 333 |
+
if extracted_info is not None and po_df is not None and sum(scoring_weights.values()) == 100:
|
| 334 |
+
st.markdown('<div style="background:#fff;border-radius:14px;box-shadow:0 4px 32px rgba(34,48,90,0.09),0 1.5px 3.5px rgba(30,136,229,0.07);padding:2rem 2.5rem 1.5rem 2.5rem;margin-bottom:1.5em;">', unsafe_allow_html=True)
|
| 335 |
+
st.markdown("<h3>EZOFIS AP AGENT Decision</h3>", unsafe_allow_html=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 336 |
if st.button("Make a decision (EZOFIS AP AGENT)"):
|
| 337 |
+
# Smart PO matching
|
| 338 |
+
best_row, best_score, reason, debug = find_best_po_match(extracted_info, po_df, scoring_weights)
|
| 339 |
+
if best_score > 85:
|
| 340 |
+
status = "APPROVED"
|
| 341 |
+
elif best_score > 70:
|
| 342 |
+
status = "PARTIALLY APPROVED"
|
| 343 |
+
else:
|
| 344 |
+
status = "REJECTED"
|
| 345 |
+
st.write(f"**Decision:** {status}")
|
| 346 |
+
st.write(f"**Reason:** Best match score: {int(best_score)}/100. {reason}")
|
| 347 |
+
with st.expander("Debug & Matching Details"):
|
| 348 |
+
st.json(debug)
|
| 349 |
+
st.subheader("Extracted Invoice JSON")
|
| 350 |
+
st.json(extracted_info)
|
| 351 |
+
st.subheader("Matched PO Row")
|
| 352 |
+
st.json(best_row.to_dict() if best_row is not None else None)
|
| 353 |
+
st.markdown("</div>", unsafe_allow_html=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|