Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,14 +6,13 @@ import time
|
|
| 6 |
import mimetypes
|
| 7 |
from datetime import datetime
|
| 8 |
from fuzzywuzzy import fuzz
|
| 9 |
-
import pandas as pd
|
| 10 |
|
| 11 |
-
#
|
| 12 |
UNSTRACT_BASE = "https://llmwhisperer-api.us-central.unstract.com/api/v2"
|
| 13 |
UNSTRACT_API_KEY = os.getenv("UNSTRACT_API_KEY")
|
| 14 |
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
|
| 15 |
OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
|
| 16 |
-
|
| 17 |
|
| 18 |
st.set_page_config(page_title="EZOFIS Document Validation Agent", layout="wide")
|
| 19 |
st.markdown("""
|
|
@@ -35,7 +34,7 @@ st.markdown(
|
|
| 35 |
unsafe_allow_html=True
|
| 36 |
)
|
| 37 |
|
| 38 |
-
# ======
|
| 39 |
col_left, col_right = st.columns([1.35, 1.05])
|
| 40 |
|
| 41 |
with col_left:
|
|
@@ -75,7 +74,7 @@ with col_left:
|
|
| 75 |
accept_multiple_files=True
|
| 76 |
)
|
| 77 |
|
| 78 |
-
# Step 3: Thresholds
|
| 79 |
st.markdown("<span class='step-num'>3</span> <b>Configure Acceptance Thresholds</b>", unsafe_allow_html=True)
|
| 80 |
min_match_score = st.slider("Minimum Type Match Score (0-100)", 50, 100, 70, 1)
|
| 81 |
min_confidence = st.slider("Minimum LLM Confidence (0-100)", 50, 100, 70, 1)
|
|
@@ -110,7 +109,7 @@ Checklist for precision:
|
|
| 110 |
# Step 6: Run button
|
| 111 |
run_btn = st.button("Run Document Validation", type="primary")
|
| 112 |
|
| 113 |
-
#
|
| 114 |
|
| 115 |
def get_content_type(filename):
|
| 116 |
mime, _ = mimetypes.guess_type(filename)
|
|
@@ -173,20 +172,20 @@ def extract_text_from_unstract(uploaded_file, status_box=None):
|
|
| 173 |
except Exception:
|
| 174 |
return r.text
|
| 175 |
|
| 176 |
-
def
|
| 177 |
return f"""
|
| 178 |
{agent_instruction}
|
| 179 |
|
| 180 |
-
IMPORTANT:
|
| 181 |
|
| 182 |
-
Analyze the following extracted document text and
|
| 183 |
{json.dumps(checklist)}
|
| 184 |
|
| 185 |
-
Respond with this JSON:
|
| 186 |
{{
|
| 187 |
"document_type": "...", // e.g. Ontario Health Card, BC Services Card
|
| 188 |
"expiry_date": "...", // ISO format if possible
|
| 189 |
-
"is_expired": true/false,
|
| 190 |
"looks_genuine": true/false,
|
| 191 |
"confidence": <score 0-100>,
|
| 192 |
"checklist_matched": true/false,
|
|
@@ -197,8 +196,8 @@ Document Text:
|
|
| 197 |
{doc_text[:4000]}
|
| 198 |
""".strip()
|
| 199 |
|
| 200 |
-
def
|
| 201 |
-
prompt =
|
| 202 |
headers = {
|
| 203 |
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
|
| 204 |
"HTTP-Referer": "https://chat.openai.com",
|
|
@@ -206,7 +205,7 @@ def query_gemma_llm(doc_text, checklist, agent_instruction, current_date, status
|
|
| 206 |
"Content-Type": "application/json",
|
| 207 |
}
|
| 208 |
data = {
|
| 209 |
-
"model":
|
| 210 |
"messages": [{"role": "user", "content": prompt}],
|
| 211 |
"temperature": 0.1,
|
| 212 |
"max_tokens": 1024
|
|
@@ -252,7 +251,7 @@ Respond ONLY as: {{ "accepted": true/false, "reason": "..." }}
|
|
| 252 |
"Content-Type": "application/json",
|
| 253 |
}
|
| 254 |
data = {
|
| 255 |
-
"model":
|
| 256 |
"messages": [{"role": "user", "content": verdict_prompt}],
|
| 257 |
"temperature": 0.1,
|
| 258 |
"max_tokens": 256
|
|
@@ -281,7 +280,7 @@ def fuzzy_match_type(detected_type, checklist_types):
|
|
| 281 |
best_score = score
|
| 282 |
return best_type, best_score
|
| 283 |
|
| 284 |
-
#
|
| 285 |
|
| 286 |
def show_validation_card(result):
|
| 287 |
accepted = result["Accepted"] == "Yes"
|
|
@@ -295,7 +294,9 @@ def show_validation_card(result):
|
|
| 295 |
|
| 296 |
st.markdown(f"""
|
| 297 |
<div style="border-radius:16px;border:2px solid #A020F0; margin-bottom:32px; background:#f9f7ff;padding:18px 22px 22px 22px;box-shadow:0 3px 16px #0001;">
|
| 298 |
-
<div style="font-size:14px;font-weight:
|
|
|
|
|
|
|
| 299 |
<table style="width:100%;border:none;margin-bottom:12px;">
|
| 300 |
<tr>
|
| 301 |
<td style="width:40%;font-size:17px;font-weight:700;">Decision:</td>
|
|
@@ -333,7 +334,7 @@ def show_validation_card(result):
|
|
| 333 |
</div>
|
| 334 |
""", unsafe_allow_html=True)
|
| 335 |
|
| 336 |
-
#
|
| 337 |
if 'run_btn' not in locals():
|
| 338 |
run_btn = False
|
| 339 |
|
|
@@ -343,7 +344,12 @@ if run_btn and uploaded_files:
|
|
| 343 |
|
| 344 |
with col_right:
|
| 345 |
for uploaded_file in uploaded_files:
|
| 346 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 347 |
status_box = st.empty()
|
| 348 |
debug = {}
|
| 349 |
|
|
@@ -358,7 +364,7 @@ if run_btn and uploaded_files:
|
|
| 358 |
continue
|
| 359 |
|
| 360 |
# Step 2: LLM Validation
|
| 361 |
-
llm_json, llm_raw, llm_prompt =
|
| 362 |
debug['LLM_prompt'] = llm_prompt
|
| 363 |
debug['LLM_raw_response'] = llm_raw
|
| 364 |
debug['LLM_parsed_json'] = llm_json
|
|
@@ -377,10 +383,14 @@ if run_btn and uploaded_files:
|
|
| 377 |
checklist_matched = False
|
| 378 |
|
| 379 |
llm_conf = llm_json.get("confidence", 0)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 380 |
accepted = (
|
| 381 |
checklist_matched and
|
| 382 |
llm_json.get("looks_genuine", False) and
|
| 383 |
-
not
|
| 384 |
(llm_conf >= min_confidence)
|
| 385 |
)
|
| 386 |
|
|
@@ -393,13 +403,12 @@ if run_btn and uploaded_files:
|
|
| 393 |
)
|
| 394 |
if not llm_json.get("looks_genuine", False):
|
| 395 |
reason.append("Document does not look genuine.")
|
| 396 |
-
if
|
| 397 |
reason.append("Document is expired.")
|
| 398 |
|
| 399 |
reason.append(f"Genuineness confidence: {llm_conf}.")
|
| 400 |
reason.append(llm_json.get("verdict", ""))
|
| 401 |
|
| 402 |
-
# Advanced agent: If confidence is in a "gray zone", ask the LLM for a final self-verdict
|
| 403 |
verdict_json, verdict_raw, verdict_prompt = advanced_llm_verdict(llm_json, min_confidence, status_box)
|
| 404 |
debug['LLM_self_verdict_prompt'] = verdict_prompt
|
| 405 |
debug['LLM_self_verdict_raw'] = verdict_raw
|
|
@@ -416,7 +425,7 @@ if run_btn and uploaded_files:
|
|
| 416 |
"Checklist Match": matched_type if checklist_matched else "-",
|
| 417 |
"Type Score": match_score,
|
| 418 |
"Expiry Date": llm_json.get("expiry_date", "-"),
|
| 419 |
-
"Expired": "Yes" if
|
| 420 |
"Genuine": "Yes" if llm_json.get("looks_genuine", False) else "No",
|
| 421 |
"Confidence": llm_conf,
|
| 422 |
"Accepted": "Yes" if accepted else "No",
|
|
|
|
| 6 |
import mimetypes
|
| 7 |
from datetime import datetime
|
| 8 |
from fuzzywuzzy import fuzz
|
|
|
|
| 9 |
|
| 10 |
+
# ====== CONFIG ======
|
| 11 |
UNSTRACT_BASE = "https://llmwhisperer-api.us-central.unstract.com/api/v2"
|
| 12 |
UNSTRACT_API_KEY = os.getenv("UNSTRACT_API_KEY")
|
| 13 |
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
|
| 14 |
OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
|
| 15 |
+
MISTRAL_MODEL = "mistralai/mistral-8b-instruct"
|
| 16 |
|
| 17 |
st.set_page_config(page_title="EZOFIS Document Validation Agent", layout="wide")
|
| 18 |
st.markdown("""
|
|
|
|
| 34 |
unsafe_allow_html=True
|
| 35 |
)
|
| 36 |
|
| 37 |
+
# ====== UI LAYOUT ======
|
| 38 |
col_left, col_right = st.columns([1.35, 1.05])
|
| 39 |
|
| 40 |
with col_left:
|
|
|
|
| 74 |
accept_multiple_files=True
|
| 75 |
)
|
| 76 |
|
| 77 |
+
# Step 3: Thresholds
|
| 78 |
st.markdown("<span class='step-num'>3</span> <b>Configure Acceptance Thresholds</b>", unsafe_allow_html=True)
|
| 79 |
min_match_score = st.slider("Minimum Type Match Score (0-100)", 50, 100, 70, 1)
|
| 80 |
min_confidence = st.slider("Minimum LLM Confidence (0-100)", 50, 100, 70, 1)
|
|
|
|
| 109 |
# Step 6: Run button
|
| 110 |
run_btn = st.button("Run Document Validation", type="primary")
|
| 111 |
|
| 112 |
+
# ====== HELPER FUNCTIONS ======
|
| 113 |
|
| 114 |
def get_content_type(filename):
|
| 115 |
mime, _ = mimetypes.guess_type(filename)
|
|
|
|
| 172 |
except Exception:
|
| 173 |
return r.text
|
| 174 |
|
| 175 |
+
def build_mistral_prompt(doc_text, checklist, agent_instruction, current_date):
|
| 176 |
return f"""
|
| 177 |
{agent_instruction}
|
| 178 |
|
| 179 |
+
IMPORTANT: Today's date for validation is: {current_date}. You MUST use this exact date, NOT today's system date, when checking if a document is expired.
|
| 180 |
|
| 181 |
+
Analyze the following extracted document text and the checklist JSON:
|
| 182 |
{json.dumps(checklist)}
|
| 183 |
|
| 184 |
+
Respond with this JSON (your response will be evaluated automatically):
|
| 185 |
{{
|
| 186 |
"document_type": "...", // e.g. Ontario Health Card, BC Services Card
|
| 187 |
"expiry_date": "...", // ISO format if possible
|
| 188 |
+
"is_expired": true/false, // must be true if expiry_date is before {current_date}
|
| 189 |
"looks_genuine": true/false,
|
| 190 |
"confidence": <score 0-100>,
|
| 191 |
"checklist_matched": true/false,
|
|
|
|
| 196 |
{doc_text[:4000]}
|
| 197 |
""".strip()
|
| 198 |
|
| 199 |
+
def query_mistral_llm(doc_text, checklist, agent_instruction, current_date, status_box=None):
|
| 200 |
+
prompt = build_mistral_prompt(doc_text, checklist, agent_instruction, current_date)
|
| 201 |
headers = {
|
| 202 |
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
|
| 203 |
"HTTP-Referer": "https://chat.openai.com",
|
|
|
|
| 205 |
"Content-Type": "application/json",
|
| 206 |
}
|
| 207 |
data = {
|
| 208 |
+
"model": MISTRAL_MODEL,
|
| 209 |
"messages": [{"role": "user", "content": prompt}],
|
| 210 |
"temperature": 0.1,
|
| 211 |
"max_tokens": 1024
|
|
|
|
| 251 |
"Content-Type": "application/json",
|
| 252 |
}
|
| 253 |
data = {
|
| 254 |
+
"model": MISTRAL_MODEL,
|
| 255 |
"messages": [{"role": "user", "content": verdict_prompt}],
|
| 256 |
"temperature": 0.1,
|
| 257 |
"max_tokens": 256
|
|
|
|
| 280 |
best_score = score
|
| 281 |
return best_type, best_score
|
| 282 |
|
| 283 |
+
# ====== CARD RENDERING FUNCTION ======
|
| 284 |
|
| 285 |
def show_validation_card(result):
|
| 286 |
accepted = result["Accepted"] == "Yes"
|
|
|
|
| 294 |
|
| 295 |
st.markdown(f"""
|
| 296 |
<div style="border-radius:16px;border:2px solid #A020F0; margin-bottom:32px; background:#f9f7ff;padding:18px 22px 22px 22px;box-shadow:0 3px 16px #0001;">
|
| 297 |
+
<div style="font-size:14px;font-weight:600;letter-spacing:0.3px;margin-bottom:10px;color:#333;">
|
| 298 |
+
{result['File']}
|
| 299 |
+
</div>
|
| 300 |
<table style="width:100%;border:none;margin-bottom:12px;">
|
| 301 |
<tr>
|
| 302 |
<td style="width:40%;font-size:17px;font-weight:700;">Decision:</td>
|
|
|
|
| 334 |
</div>
|
| 335 |
""", unsafe_allow_html=True)
|
| 336 |
|
| 337 |
+
# ====== MAIN PROCESSING LOOP ======
|
| 338 |
if 'run_btn' not in locals():
|
| 339 |
run_btn = False
|
| 340 |
|
|
|
|
| 344 |
|
| 345 |
with col_right:
|
| 346 |
for uploaded_file in uploaded_files:
|
| 347 |
+
st.markdown(
|
| 348 |
+
f"<div style='font-size:15.5px;font-weight:500;color:#424242;margin:14px 0 2px 0;'>"
|
| 349 |
+
f"Validating: <span style='color:#A020F0'>{uploaded_file.name}</span>"
|
| 350 |
+
f"</div>",
|
| 351 |
+
unsafe_allow_html=True
|
| 352 |
+
)
|
| 353 |
status_box = st.empty()
|
| 354 |
debug = {}
|
| 355 |
|
|
|
|
| 364 |
continue
|
| 365 |
|
| 366 |
# Step 2: LLM Validation
|
| 367 |
+
llm_json, llm_raw, llm_prompt = query_mistral_llm(doc_text, checklist, agent_instruction, date_str, status_box)
|
| 368 |
debug['LLM_prompt'] = llm_prompt
|
| 369 |
debug['LLM_raw_response'] = llm_raw
|
| 370 |
debug['LLM_parsed_json'] = llm_json
|
|
|
|
| 383 |
checklist_matched = False
|
| 384 |
|
| 385 |
llm_conf = llm_json.get("confidence", 0)
|
| 386 |
+
# Robustly handle is_expired
|
| 387 |
+
is_expired = llm_json.get("is_expired", False)
|
| 388 |
+
if isinstance(is_expired, str):
|
| 389 |
+
is_expired = is_expired.lower() == "true"
|
| 390 |
accepted = (
|
| 391 |
checklist_matched and
|
| 392 |
llm_json.get("looks_genuine", False) and
|
| 393 |
+
not is_expired and
|
| 394 |
(llm_conf >= min_confidence)
|
| 395 |
)
|
| 396 |
|
|
|
|
| 403 |
)
|
| 404 |
if not llm_json.get("looks_genuine", False):
|
| 405 |
reason.append("Document does not look genuine.")
|
| 406 |
+
if is_expired:
|
| 407 |
reason.append("Document is expired.")
|
| 408 |
|
| 409 |
reason.append(f"Genuineness confidence: {llm_conf}.")
|
| 410 |
reason.append(llm_json.get("verdict", ""))
|
| 411 |
|
|
|
|
| 412 |
verdict_json, verdict_raw, verdict_prompt = advanced_llm_verdict(llm_json, min_confidence, status_box)
|
| 413 |
debug['LLM_self_verdict_prompt'] = verdict_prompt
|
| 414 |
debug['LLM_self_verdict_raw'] = verdict_raw
|
|
|
|
| 425 |
"Checklist Match": matched_type if checklist_matched else "-",
|
| 426 |
"Type Score": match_score,
|
| 427 |
"Expiry Date": llm_json.get("expiry_date", "-"),
|
| 428 |
+
"Expired": "Yes" if is_expired else "No",
|
| 429 |
"Genuine": "Yes" if llm_json.get("looks_genuine", False) else "No",
|
| 430 |
"Confidence": llm_conf,
|
| 431 |
"Accepted": "Yes" if accepted else "No",
|