DOC_VALID_AGENT

Sleeping

App Files Files Community

Seth0330 commited on Jun 21, 2025

Commit

3a41351

verified ·

1 Parent(s): c572e2b

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -24

app.py CHANGED Viewed

@@ -6,14 +6,13 @@ import time
 import mimetypes
 from datetime import datetime
 from fuzzywuzzy import fuzz
-import pandas as pd
-# ========== CONFIG ==========
 UNSTRACT_BASE = "https://llmwhisperer-api.us-central.unstract.com/api/v2"
 UNSTRACT_API_KEY = os.getenv("UNSTRACT_API_KEY")
 OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
 OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
-GEMMA_MODEL = "mistralai/ministral-8b"
 st.set_page_config(page_title="EZOFIS Document Validation Agent", layout="wide")
 st.markdown("""
@@ -35,7 +34,7 @@ st.markdown(
     unsafe_allow_html=True
 )
-# ====== SIDE-BY-SIDE LAYOUT ======
 col_left, col_right = st.columns([1.35, 1.05])
 with col_left:
@@ -75,7 +74,7 @@ with col_left:
         accept_multiple_files=True
     )
-    # Step 3: Thresholds (SLIDERS HERE)
     st.markdown("<span class='step-num'>3</span> <b>Configure Acceptance Thresholds</b>", unsafe_allow_html=True)
     min_match_score = st.slider("Minimum Type Match Score (0-100)", 50, 100, 70, 1)
     min_confidence = st.slider("Minimum LLM Confidence (0-100)", 50, 100, 70, 1)
@@ -110,7 +109,7 @@ Checklist for precision:
     # Step 6: Run button
     run_btn = st.button("Run Document Validation", type="primary")
-# ========== FUNCTIONS ==========
 def get_content_type(filename):
     mime, _ = mimetypes.guess_type(filename)
@@ -173,20 +172,20 @@ def extract_text_from_unstract(uploaded_file, status_box=None):
     except Exception:
         return r.text
-def build_prompt(doc_text, checklist, agent_instruction, current_date):
     return f"""
 {agent_instruction}
-IMPORTANT: The current date is: {current_date}. Use this value, NOT today's date in your environment, when checking if a document has expired.
-Analyze the following extracted document text and this checklist JSON:
 {json.dumps(checklist)}
-Respond with this JSON:
 {{
   "document_type": "...",          // e.g. Ontario Health Card, BC Services Card
   "expiry_date": "...",            // ISO format if possible
-  "is_expired": true/false,
   "looks_genuine": true/false,
   "confidence": <score 0-100>,
   "checklist_matched": true/false,
@@ -197,8 +196,8 @@ Document Text:
 {doc_text[:4000]}
     """.strip()
-def query_gemma_llm(doc_text, checklist, agent_instruction, current_date, status_box=None):
-    prompt = build_prompt(doc_text, checklist, agent_instruction, current_date)
     headers = {
         "Authorization": f"Bearer {OPENROUTER_API_KEY}",
         "HTTP-Referer": "https://chat.openai.com",
@@ -206,7 +205,7 @@ def query_gemma_llm(doc_text, checklist, agent_instruction, current_date, status
         "Content-Type": "application/json",
     }
     data = {
-        "model": GEMMA_MODEL,
         "messages": [{"role": "user", "content": prompt}],
         "temperature": 0.1,
         "max_tokens": 1024
@@ -252,7 +251,7 @@ Respond ONLY as: {{ "accepted": true/false, "reason": "..." }}
         "Content-Type": "application/json",
     }
     data = {
-        "model": GEMMA_MODEL,
         "messages": [{"role": "user", "content": verdict_prompt}],
         "temperature": 0.1,
         "max_tokens": 256
@@ -281,7 +280,7 @@ def fuzzy_match_type(detected_type, checklist_types):
             best_score = score
     return best_type, best_score
-# ========== CARD RENDERING FUNCTION ==========
 def show_validation_card(result):
     accepted = result["Accepted"] == "Yes"
@@ -295,7 +294,9 @@ def show_validation_card(result):
     st.markdown(f"""
     <div style="border-radius:16px;border:2px solid #A020F0; margin-bottom:32px; background:#f9f7ff;padding:18px 22px 22px 22px;box-shadow:0 3px 16px #0001;">
-      <div style="font-size:14px;font-weight:bold;letter-spacing:1px;margin-bottom:6px;">{result['File']}</div>
       <table style="width:100%;border:none;margin-bottom:12px;">
         <tr>
           <td style="width:40%;font-size:17px;font-weight:700;">Decision:</td>
@@ -333,7 +334,7 @@ def show_validation_card(result):
     </div>
     """, unsafe_allow_html=True)
-# ========== PROCESSING ==========
 if 'run_btn' not in locals():
     run_btn = False
@@ -343,7 +344,12 @@ if run_btn and uploaded_files:
     with col_right:
         for uploaded_file in uploaded_files:
-            st.subheader(f"Validating: {uploaded_file.name}")
             status_box = st.empty()
             debug = {}
@@ -358,7 +364,7 @@ if run_btn and uploaded_files:
                 continue
             # Step 2: LLM Validation
-            llm_json, llm_raw, llm_prompt = query_gemma_llm(doc_text, checklist, agent_instruction, date_str, status_box)
             debug['LLM_prompt'] = llm_prompt
             debug['LLM_raw_response'] = llm_raw
             debug['LLM_parsed_json'] = llm_json
@@ -377,10 +383,14 @@ if run_btn and uploaded_files:
                 checklist_matched = False
             llm_conf = llm_json.get("confidence", 0)
             accepted = (
                 checklist_matched and
                 llm_json.get("looks_genuine", False) and
-                not llm_json.get("is_expired", False) and
                 (llm_conf >= min_confidence)
             )
@@ -393,13 +403,12 @@ if run_btn and uploaded_files:
                 )
                 if not llm_json.get("looks_genuine", False):
                     reason.append("Document does not look genuine.")
-                if llm_json.get("is_expired", False):
                     reason.append("Document is expired.")
             reason.append(f"Genuineness confidence: {llm_conf}.")
             reason.append(llm_json.get("verdict", ""))
-            # Advanced agent: If confidence is in a "gray zone", ask the LLM for a final self-verdict
             verdict_json, verdict_raw, verdict_prompt = advanced_llm_verdict(llm_json, min_confidence, status_box)
             debug['LLM_self_verdict_prompt'] = verdict_prompt
             debug['LLM_self_verdict_raw'] = verdict_raw
@@ -416,7 +425,7 @@ if run_btn and uploaded_files:
                 "Checklist Match": matched_type if checklist_matched else "-",
                 "Type Score": match_score,
                 "Expiry Date": llm_json.get("expiry_date", "-"),
-                "Expired": "Yes" if llm_json.get("is_expired", False) else "No",
                 "Genuine": "Yes" if llm_json.get("looks_genuine", False) else "No",
                 "Confidence": llm_conf,
                 "Accepted": "Yes" if accepted else "No",

 import mimetypes
 from datetime import datetime
 from fuzzywuzzy import fuzz
+# ====== CONFIG ======
 UNSTRACT_BASE = "https://llmwhisperer-api.us-central.unstract.com/api/v2"
 UNSTRACT_API_KEY = os.getenv("UNSTRACT_API_KEY")
 OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
 OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
+MISTRAL_MODEL = "mistralai/mistral-8b-instruct"
 st.set_page_config(page_title="EZOFIS Document Validation Agent", layout="wide")
 st.markdown("""
     unsafe_allow_html=True
 )
+# ====== UI LAYOUT ======
 col_left, col_right = st.columns([1.35, 1.05])
 with col_left:
         accept_multiple_files=True
     )
+    # Step 3: Thresholds
     st.markdown("<span class='step-num'>3</span> <b>Configure Acceptance Thresholds</b>", unsafe_allow_html=True)
     min_match_score = st.slider("Minimum Type Match Score (0-100)", 50, 100, 70, 1)
     min_confidence = st.slider("Minimum LLM Confidence (0-100)", 50, 100, 70, 1)
     # Step 6: Run button
     run_btn = st.button("Run Document Validation", type="primary")
+# ====== HELPER FUNCTIONS ======
 def get_content_type(filename):
     mime, _ = mimetypes.guess_type(filename)
     except Exception:
         return r.text
+def build_mistral_prompt(doc_text, checklist, agent_instruction, current_date):
     return f"""
 {agent_instruction}
+IMPORTANT: Today's date for validation is: {current_date}. You MUST use this exact date, NOT today's system date, when checking if a document is expired.
+Analyze the following extracted document text and the checklist JSON:
 {json.dumps(checklist)}
+Respond with this JSON (your response will be evaluated automatically):
 {{
   "document_type": "...",          // e.g. Ontario Health Card, BC Services Card
   "expiry_date": "...",            // ISO format if possible
+  "is_expired": true/false,        // must be true if expiry_date is before {current_date}
   "looks_genuine": true/false,
   "confidence": <score 0-100>,
   "checklist_matched": true/false,
 {doc_text[:4000]}
     """.strip()
+def query_mistral_llm(doc_text, checklist, agent_instruction, current_date, status_box=None):
+    prompt = build_mistral_prompt(doc_text, checklist, agent_instruction, current_date)
     headers = {
         "Authorization": f"Bearer {OPENROUTER_API_KEY}",
         "HTTP-Referer": "https://chat.openai.com",
         "Content-Type": "application/json",
     }
     data = {
+        "model": MISTRAL_MODEL,
         "messages": [{"role": "user", "content": prompt}],
         "temperature": 0.1,
         "max_tokens": 1024
         "Content-Type": "application/json",
     }
     data = {
+        "model": MISTRAL_MODEL,
         "messages": [{"role": "user", "content": verdict_prompt}],
         "temperature": 0.1,
         "max_tokens": 256
             best_score = score
     return best_type, best_score
+# ====== CARD RENDERING FUNCTION ======
 def show_validation_card(result):
     accepted = result["Accepted"] == "Yes"
     st.markdown(f"""
     <div style="border-radius:16px;border:2px solid #A020F0; margin-bottom:32px; background:#f9f7ff;padding:18px 22px 22px 22px;box-shadow:0 3px 16px #0001;">
+      <div style="font-size:14px;font-weight:600;letter-spacing:0.3px;margin-bottom:10px;color:#333;">
+        {result['File']}
+      </div>
       <table style="width:100%;border:none;margin-bottom:12px;">
         <tr>
           <td style="width:40%;font-size:17px;font-weight:700;">Decision:</td>
     </div>
     """, unsafe_allow_html=True)
+# ====== MAIN PROCESSING LOOP ======
 if 'run_btn' not in locals():
     run_btn = False
     with col_right:
         for uploaded_file in uploaded_files:
+            st.markdown(
+                f"<div style='font-size:15.5px;font-weight:500;color:#424242;margin:14px 0 2px 0;'>"
+                f"Validating: <span style='color:#A020F0'>{uploaded_file.name}</span>"
+                f"</div>",
+                unsafe_allow_html=True
+            )
             status_box = st.empty()
             debug = {}
                 continue
             # Step 2: LLM Validation
+            llm_json, llm_raw, llm_prompt = query_mistral_llm(doc_text, checklist, agent_instruction, date_str, status_box)
             debug['LLM_prompt'] = llm_prompt
             debug['LLM_raw_response'] = llm_raw
             debug['LLM_parsed_json'] = llm_json
                 checklist_matched = False
             llm_conf = llm_json.get("confidence", 0)
+            # Robustly handle is_expired
+            is_expired = llm_json.get("is_expired", False)
+            if isinstance(is_expired, str):
+                is_expired = is_expired.lower() == "true"
             accepted = (
                 checklist_matched and
                 llm_json.get("looks_genuine", False) and
+                not is_expired and
                 (llm_conf >= min_confidence)
             )
                 )
                 if not llm_json.get("looks_genuine", False):
                     reason.append("Document does not look genuine.")
+                if is_expired:
                     reason.append("Document is expired.")
             reason.append(f"Genuineness confidence: {llm_conf}.")
             reason.append(llm_json.get("verdict", ""))
             verdict_json, verdict_raw, verdict_prompt = advanced_llm_verdict(llm_json, min_confidence, status_box)
             debug['LLM_self_verdict_prompt'] = verdict_prompt
             debug['LLM_self_verdict_raw'] = verdict_raw
                 "Checklist Match": matched_type if checklist_matched else "-",
                 "Type Score": match_score,
                 "Expiry Date": llm_json.get("expiry_date", "-"),
+                "Expired": "Yes" if is_expired else "No",
                 "Genuine": "Yes" if llm_json.get("looks_genuine", False) else "No",
                 "Confidence": llm_conf,
                 "Accepted": "Yes" if accepted else "No",