DOC_VALID_AGENT

Sleeping

App Files Files Community

Seth0330 commited on Jun 21, 2025

Commit

21e2212

verified ·

1 Parent(s): ea4e11e

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -58

app.py CHANGED Viewed

@@ -1,10 +1,10 @@
 import streamlit as st
 import requests
 import json
-import re
 import os
 import time
 import mimetypes
 from fuzzywuzzy import fuzz
 import pandas as pd
@@ -18,10 +18,6 @@ GEMMA_MODEL = "google/gemma-3-4b-it:free"
 st.set_page_config(page_title="EZOFIS Document Validation Agent", layout="wide")
 st.markdown("""
     <style>
-    .block-card {
-        background: #fff; border-radius: 20px; box-shadow: 0 2px 16px rgba(25,39,64,0.05);
-        padding: 32px 26px 24px 26px; margin-bottom: 24px;
-    }
     .step-num {background: #A020F0; color: #fff; border-radius: 999px;
         padding: 6px 13px; font-weight: 700; margin-right: 14px; font-size: 20px;
         display: inline-block; vertical-align: middle;}
@@ -38,10 +34,80 @@ st.markdown(
     unsafe_allow_html=True
 )
 st.markdown(
-    "<div style='font-size:20px; margin-bottom:28px; color:#24345C;'>AI-driven, agentic document acceptance for mortgage applications.</div>",
     unsafe_allow_html=True
 )
 # ========== FUNCTIONS ==========
 def get_content_type(filename):
@@ -105,16 +171,11 @@ def extract_text_from_unstract(uploaded_file, status_box=None):
     except Exception:
         return r.text
-def build_prompt(doc_text, checklist):
     return f"""
-You are a careful, expert document validation agent for mortgage and finance workflows.
-Before you answer, do this: Carefully scan the document for ANY evidence of regional/provincial or country-specific card types (such as "Ontario Health Card", "Medicare Card", "Insurance Card", "SIN", "Driver's License", "Passport", etc.)—be as specific as possible using visible card titles, authority names, or issuer logos.
-Checklist for precision:
-- Prefer the **most specific** document type (e.g. "Ontario Health Card" over just "Identification Card" or "Provincial ID").
-- If there is any ambiguity, include relevant keywords from the card (like "Health", "Medicare", "OHIP", "SIN", "Social Insurance", "Driver", etc.) in the output type.
-- If still not sure, show your best guess but include all possible hints from the document text.
 Analyze the following extracted document text and this checklist JSON:
 {json.dumps(checklist)}
@@ -134,8 +195,8 @@ Document Text:
 {doc_text[:4000]}
     """.strip()
-def query_gemma_llm(doc_text, checklist, status_box=None):
-    prompt = build_prompt(doc_text, checklist)
     headers = {
         "Authorization": f"Bearer {OPENROUTER_API_KEY}",
         "HTTP-Referer": "https://chat.openai.com",  # for OpenRouter
@@ -173,7 +234,6 @@ def query_gemma_llm(doc_text, checklist, status_box=None):
         return None, result, prompt
 def advanced_llm_verdict(llm_json, min_confidence, status_box=None):
-    # Only trigger if confidence is in gray zone: [min_confidence, min_confidence+15)
     conf = llm_json.get("confidence", 0)
     if conf < min_confidence or conf >= min_confidence + 15:
         return None, None, None
@@ -220,45 +280,6 @@ def fuzzy_match_type(detected_type, checklist_types):
             best_score = score
     return best_type, best_score
-# ========== UI ==========
-sample_checklist = '''{
-  "required_documents": [
-    {"type": "Driver's License", "description": "Government-issued photo ID"},
-    {"type": "Passport", "description": "Valid passport"},
-    {"type": "SIN Card", "description": "Social Insurance Number document"},
-    {"type": "Bank Statement", "description": "Last 3 months bank statement"},
-    {"type": "Employment Letter", "description": "Signed letter from employer"},
-    {"type": "Pay Stub", "description": "Most recent pay stub"},
-    {"type": "Proof of Address", "description": "Utility bill or lease"}
-  ]
-}'''
-st.markdown("<span class='step-num'>1</span> <b>Paste Mortgage Checklist (JSON)</b>", unsafe_allow_html=True)
-checklist_text = st.text_area(
-    "Paste or edit your mortgage checklist JSON below:",
-    value=sample_checklist,
-    height=200,
-    key="doc_checklist_json"
-)
-try:
-    checklist = json.loads(checklist_text)
-    required_types = [doc["type"] for doc in checklist["required_documents"]]
-except Exception as e:
-    st.error("Invalid checklist JSON.")
-    st.stop()
-st.markdown("<span class='step-num'>2</span> <b>Upload Document(s) to Validate</b>", unsafe_allow_html=True)
-uploaded_files = st.file_uploader(
-    "Upload PDF, DOCX, XLSX, PNG, JPG, TIFF, etc.",
-    type=["pdf", "docx", "xlsx", "xls", "png", "jpg", "jpeg", "tiff"],
-    key="mortgage_files",
-    accept_multiple_files=True
-)
-st.markdown("<span class='step-num'>3</span> <b>Configure Acceptance Thresholds</b>", unsafe_allow_html=True)
-min_match_score = st.slider("Minimum Type Match Score (0-100)", 50, 100, 70, 1)
-min_confidence = st.slider("Minimum LLM Confidence (0-100)", 50, 100, 70, 1)
 # ========== PROCESSING ==========
 if st.button("Run Document Validation", type="primary") and uploaded_files:
     results = []
@@ -280,7 +301,7 @@ if st.button("Run Document Validation", type="primary") and uploaded_files:
             continue
         # Step 2: LLM Validation
-        llm_json, llm_raw, llm_prompt = query_gemma_llm(doc_text, checklist, status_box)
         debug['LLM_prompt'] = llm_prompt
         debug['LLM_raw_response'] = llm_raw
         debug['LLM_parsed_json'] = llm_json
@@ -294,7 +315,6 @@ if st.button("Run Document Validation", type="primary") and uploaded_files:
         detected_type = llm_json.get("document_type", "")
         matched_type, match_score = fuzzy_match_type(detected_type, required_types)
-        # Accept only if LLM states checklist_matched, looks genuine, and not expired, and confidence high enough
         checklist_matched = llm_json.get("checklist_matched", False)
         if checklist_matched and match_score < min_match_score:
             checklist_matched = False

 import streamlit as st
 import requests
 import json
 import os
 import time
 import mimetypes
+from datetime import datetime
 from fuzzywuzzy import fuzz
 import pandas as pd
 st.set_page_config(page_title="EZOFIS Document Validation Agent", layout="wide")
 st.markdown("""
     <style>
     .step-num {background: #A020F0; color: #fff; border-radius: 999px;
         padding: 6px 13px; font-weight: 700; margin-right: 14px; font-size: 20px;
         display: inline-block; vertical-align: middle;}
     unsafe_allow_html=True
 )
 st.markdown(
+    "<div style='font-size:20px; margin-bottom:28px; color:#24345C;'>AI-driven, agentic document acceptance for mortgage and finance workflows.</div>",
     unsafe_allow_html=True
 )
+# ========== UI ==========
+# --- Step 0: Agent Instructions ---
+st.markdown("<span class='step-num'>0</span> <b>Instruct Agent</b>", unsafe_allow_html=True)
+sample_instruction = """You are a careful, expert document validation agent for mortgage and finance workflows.
+Before you answer, do this: Carefully scan the document for ANY evidence of regional/provincial or country-specific card types (such as "Ontario Health Card", "Medicare Card", "Insurance Card", "SIN", "Driver's License", "Passport", etc.)—be as specific as possible using visible card titles, authority names, or issuer logos.
+Checklist for precision:
+- Prefer the **most specific** document type (e.g. "Ontario Health Card" over just "Identification Card" or "Provincial ID").
+- If there is any ambiguity, include relevant keywords from the card (like "Health", "Medicare", "OHIP", "SIN", "Social Insurance", "Driver", etc.) in the output type.
+- If still not sure, show your best guess but include all possible hints from the document text."""
+agent_instruction = st.text_area(
+    "Instructions for the Document Validation Agent (edit as needed):",
+    value=sample_instruction,
+    height=240,
+    key="agent_instruction"
+)
+# --- Step 0b: Current Date for Expiry ---
+st.markdown("<span class='step-num'>0b</span> <b>Set Current Date for Expiry Validation</b>", unsafe_allow_html=True)
+current_date = st.date_input(
+    "Current date to be used by the agent for expiry checking",
+    value=datetime.now().date(),
+    key="current_date"
+)
+date_str = str(current_date)
+# --- Step 1: Checklist JSON input ---
+sample_checklist = '''{
+  "required_documents": [
+    {"type": "Driver's License", "description": "Government-issued photo ID"},
+    {"type": "Passport", "description": "Valid passport"},
+    {"type": "SIN Card", "description": "Social Insurance Number document"},
+    {"type": "Bank Statement", "description": "Last 3 months bank statement"},
+    {"type": "Employment Letter", "description": "Signed letter from employer"},
+    {"type": "Pay Stub", "description": "Most recent pay stub"},
+    {"type": "Proof of Address", "description": "Utility bill or lease"},
+    {"type": "Ontario Health Card", "description": "Provincial health insurance card"}
+  ]
+}'''
+st.markdown("<span class='step-num'>1</span> <b>Paste Mortgage Checklist (JSON)</b>", unsafe_allow_html=True)
+checklist_text = st.text_area(
+    "Paste or edit your mortgage checklist JSON below:",
+    value=sample_checklist,
+    height=200,
+    key="doc_checklist_json"
+)
+try:
+    checklist = json.loads(checklist_text)
+    required_types = [doc["type"] for doc in checklist["required_documents"]]
+except Exception as e:
+    st.error("Invalid checklist JSON.")
+    st.stop()
+# --- Step 2: Document upload ---
+st.markdown("<span class='step-num'>2</span> <b>Upload Document(s) to Validate</b>", unsafe_allow_html=True)
+uploaded_files = st.file_uploader(
+    "Upload PDF, DOCX, XLSX, PNG, JPG, TIFF, etc.",
+    type=["pdf", "docx", "xlsx", "xls", "png", "jpg", "jpeg", "tiff"],
+    key="mortgage_files",
+    accept_multiple_files=True
+)
+# --- Step 3: Thresholds ---
+st.markdown("<span class='step-num'>3</span> <b>Configure Acceptance Thresholds</b>", unsafe_allow_html=True)
+min_match_score = st.slider("Minimum Type Match Score (0-100)", 50, 100, 70, 1)
+min_confidence = st.slider("Minimum LLM Confidence (0-100)", 50, 100, 70, 1)
 # ========== FUNCTIONS ==========
 def get_content_type(filename):
     except Exception:
         return r.text
+def build_prompt(doc_text, checklist, agent_instruction, current_date):
     return f"""
+{agent_instruction}
+IMPORTANT: The current date is: {current_date}. Use this value, NOT today's date in your environment, when checking if a document has expired.
 Analyze the following extracted document text and this checklist JSON:
 {json.dumps(checklist)}
 {doc_text[:4000]}
     """.strip()
+def query_gemma_llm(doc_text, checklist, agent_instruction, current_date, status_box=None):
+    prompt = build_prompt(doc_text, checklist, agent_instruction, current_date)
     headers = {
         "Authorization": f"Bearer {OPENROUTER_API_KEY}",
         "HTTP-Referer": "https://chat.openai.com",  # for OpenRouter
         return None, result, prompt
 def advanced_llm_verdict(llm_json, min_confidence, status_box=None):
     conf = llm_json.get("confidence", 0)
     if conf < min_confidence or conf >= min_confidence + 15:
         return None, None, None
             best_score = score
     return best_type, best_score
 # ========== PROCESSING ==========
 if st.button("Run Document Validation", type="primary") and uploaded_files:
     results = []
             continue
         # Step 2: LLM Validation
+        llm_json, llm_raw, llm_prompt = query_gemma_llm(doc_text, checklist, agent_instruction, date_str, status_box)
         debug['LLM_prompt'] = llm_prompt
         debug['LLM_raw_response'] = llm_raw
         debug['LLM_parsed_json'] = llm_json
         detected_type = llm_json.get("document_type", "")
         matched_type, match_score = fuzzy_match_type(detected_type, required_types)
         checklist_matched = llm_json.get("checklist_matched", False)
         if checklist_matched and match_score < min_match_score:
             checklist_matched = False