Seth0330 commited on
Commit
21e2212
·
verified ·
1 Parent(s): ea4e11e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -58
app.py CHANGED
@@ -1,10 +1,10 @@
1
  import streamlit as st
2
  import requests
3
  import json
4
- import re
5
  import os
6
  import time
7
  import mimetypes
 
8
  from fuzzywuzzy import fuzz
9
  import pandas as pd
10
 
@@ -18,10 +18,6 @@ GEMMA_MODEL = "google/gemma-3-4b-it:free"
18
  st.set_page_config(page_title="EZOFIS Document Validation Agent", layout="wide")
19
  st.markdown("""
20
  <style>
21
- .block-card {
22
- background: #fff; border-radius: 20px; box-shadow: 0 2px 16px rgba(25,39,64,0.05);
23
- padding: 32px 26px 24px 26px; margin-bottom: 24px;
24
- }
25
  .step-num {background: #A020F0; color: #fff; border-radius: 999px;
26
  padding: 6px 13px; font-weight: 700; margin-right: 14px; font-size: 20px;
27
  display: inline-block; vertical-align: middle;}
@@ -38,10 +34,80 @@ st.markdown(
38
  unsafe_allow_html=True
39
  )
40
  st.markdown(
41
- "<div style='font-size:20px; margin-bottom:28px; color:#24345C;'>AI-driven, agentic document acceptance for mortgage applications.</div>",
42
  unsafe_allow_html=True
43
  )
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  # ========== FUNCTIONS ==========
46
 
47
  def get_content_type(filename):
@@ -105,16 +171,11 @@ def extract_text_from_unstract(uploaded_file, status_box=None):
105
  except Exception:
106
  return r.text
107
 
108
- def build_prompt(doc_text, checklist):
109
  return f"""
110
- You are a careful, expert document validation agent for mortgage and finance workflows.
111
 
112
- Before you answer, do this: Carefully scan the document for ANY evidence of regional/provincial or country-specific card types (such as "Ontario Health Card", "Medicare Card", "Insurance Card", "SIN", "Driver's License", "Passport", etc.)—be as specific as possible using visible card titles, authority names, or issuer logos.
113
-
114
- Checklist for precision:
115
- - Prefer the **most specific** document type (e.g. "Ontario Health Card" over just "Identification Card" or "Provincial ID").
116
- - If there is any ambiguity, include relevant keywords from the card (like "Health", "Medicare", "OHIP", "SIN", "Social Insurance", "Driver", etc.) in the output type.
117
- - If still not sure, show your best guess but include all possible hints from the document text.
118
 
119
  Analyze the following extracted document text and this checklist JSON:
120
  {json.dumps(checklist)}
@@ -134,8 +195,8 @@ Document Text:
134
  {doc_text[:4000]}
135
  """.strip()
136
 
137
- def query_gemma_llm(doc_text, checklist, status_box=None):
138
- prompt = build_prompt(doc_text, checklist)
139
  headers = {
140
  "Authorization": f"Bearer {OPENROUTER_API_KEY}",
141
  "HTTP-Referer": "https://chat.openai.com", # for OpenRouter
@@ -173,7 +234,6 @@ def query_gemma_llm(doc_text, checklist, status_box=None):
173
  return None, result, prompt
174
 
175
  def advanced_llm_verdict(llm_json, min_confidence, status_box=None):
176
- # Only trigger if confidence is in gray zone: [min_confidence, min_confidence+15)
177
  conf = llm_json.get("confidence", 0)
178
  if conf < min_confidence or conf >= min_confidence + 15:
179
  return None, None, None
@@ -220,45 +280,6 @@ def fuzzy_match_type(detected_type, checklist_types):
220
  best_score = score
221
  return best_type, best_score
222
 
223
- # ========== UI ==========
224
- sample_checklist = '''{
225
- "required_documents": [
226
- {"type": "Driver's License", "description": "Government-issued photo ID"},
227
- {"type": "Passport", "description": "Valid passport"},
228
- {"type": "SIN Card", "description": "Social Insurance Number document"},
229
- {"type": "Bank Statement", "description": "Last 3 months bank statement"},
230
- {"type": "Employment Letter", "description": "Signed letter from employer"},
231
- {"type": "Pay Stub", "description": "Most recent pay stub"},
232
- {"type": "Proof of Address", "description": "Utility bill or lease"}
233
- ]
234
- }'''
235
-
236
- st.markdown("<span class='step-num'>1</span> <b>Paste Mortgage Checklist (JSON)</b>", unsafe_allow_html=True)
237
- checklist_text = st.text_area(
238
- "Paste or edit your mortgage checklist JSON below:",
239
- value=sample_checklist,
240
- height=200,
241
- key="doc_checklist_json"
242
- )
243
- try:
244
- checklist = json.loads(checklist_text)
245
- required_types = [doc["type"] for doc in checklist["required_documents"]]
246
- except Exception as e:
247
- st.error("Invalid checklist JSON.")
248
- st.stop()
249
-
250
- st.markdown("<span class='step-num'>2</span> <b>Upload Document(s) to Validate</b>", unsafe_allow_html=True)
251
- uploaded_files = st.file_uploader(
252
- "Upload PDF, DOCX, XLSX, PNG, JPG, TIFF, etc.",
253
- type=["pdf", "docx", "xlsx", "xls", "png", "jpg", "jpeg", "tiff"],
254
- key="mortgage_files",
255
- accept_multiple_files=True
256
- )
257
-
258
- st.markdown("<span class='step-num'>3</span> <b>Configure Acceptance Thresholds</b>", unsafe_allow_html=True)
259
- min_match_score = st.slider("Minimum Type Match Score (0-100)", 50, 100, 70, 1)
260
- min_confidence = st.slider("Minimum LLM Confidence (0-100)", 50, 100, 70, 1)
261
-
262
  # ========== PROCESSING ==========
263
  if st.button("Run Document Validation", type="primary") and uploaded_files:
264
  results = []
@@ -280,7 +301,7 @@ if st.button("Run Document Validation", type="primary") and uploaded_files:
280
  continue
281
 
282
  # Step 2: LLM Validation
283
- llm_json, llm_raw, llm_prompt = query_gemma_llm(doc_text, checklist, status_box)
284
  debug['LLM_prompt'] = llm_prompt
285
  debug['LLM_raw_response'] = llm_raw
286
  debug['LLM_parsed_json'] = llm_json
@@ -294,7 +315,6 @@ if st.button("Run Document Validation", type="primary") and uploaded_files:
294
  detected_type = llm_json.get("document_type", "")
295
  matched_type, match_score = fuzzy_match_type(detected_type, required_types)
296
 
297
- # Accept only if LLM states checklist_matched, looks genuine, and not expired, and confidence high enough
298
  checklist_matched = llm_json.get("checklist_matched", False)
299
  if checklist_matched and match_score < min_match_score:
300
  checklist_matched = False
 
1
  import streamlit as st
2
  import requests
3
  import json
 
4
  import os
5
  import time
6
  import mimetypes
7
+ from datetime import datetime
8
  from fuzzywuzzy import fuzz
9
  import pandas as pd
10
 
 
18
  st.set_page_config(page_title="EZOFIS Document Validation Agent", layout="wide")
19
  st.markdown("""
20
  <style>
 
 
 
 
21
  .step-num {background: #A020F0; color: #fff; border-radius: 999px;
22
  padding: 6px 13px; font-weight: 700; margin-right: 14px; font-size: 20px;
23
  display: inline-block; vertical-align: middle;}
 
34
  unsafe_allow_html=True
35
  )
36
  st.markdown(
37
+ "<div style='font-size:20px; margin-bottom:28px; color:#24345C;'>AI-driven, agentic document acceptance for mortgage and finance workflows.</div>",
38
  unsafe_allow_html=True
39
  )
40
 
41
+ # ========== UI ==========
42
+
43
+ # --- Step 0: Agent Instructions ---
44
+ st.markdown("<span class='step-num'>0</span> <b>Instruct Agent</b>", unsafe_allow_html=True)
45
+ sample_instruction = """You are a careful, expert document validation agent for mortgage and finance workflows.
46
+
47
+ Before you answer, do this: Carefully scan the document for ANY evidence of regional/provincial or country-specific card types (such as "Ontario Health Card", "Medicare Card", "Insurance Card", "SIN", "Driver's License", "Passport", etc.)—be as specific as possible using visible card titles, authority names, or issuer logos.
48
+
49
+ Checklist for precision:
50
+ - Prefer the **most specific** document type (e.g. "Ontario Health Card" over just "Identification Card" or "Provincial ID").
51
+ - If there is any ambiguity, include relevant keywords from the card (like "Health", "Medicare", "OHIP", "SIN", "Social Insurance", "Driver", etc.) in the output type.
52
+ - If still not sure, show your best guess but include all possible hints from the document text."""
53
+ agent_instruction = st.text_area(
54
+ "Instructions for the Document Validation Agent (edit as needed):",
55
+ value=sample_instruction,
56
+ height=240,
57
+ key="agent_instruction"
58
+ )
59
+
60
+ # --- Step 0b: Current Date for Expiry ---
61
+ st.markdown("<span class='step-num'>0b</span> <b>Set Current Date for Expiry Validation</b>", unsafe_allow_html=True)
62
+ current_date = st.date_input(
63
+ "Current date to be used by the agent for expiry checking",
64
+ value=datetime.now().date(),
65
+ key="current_date"
66
+ )
67
+ date_str = str(current_date)
68
+
69
+ # --- Step 1: Checklist JSON input ---
70
+ sample_checklist = '''{
71
+ "required_documents": [
72
+ {"type": "Driver's License", "description": "Government-issued photo ID"},
73
+ {"type": "Passport", "description": "Valid passport"},
74
+ {"type": "SIN Card", "description": "Social Insurance Number document"},
75
+ {"type": "Bank Statement", "description": "Last 3 months bank statement"},
76
+ {"type": "Employment Letter", "description": "Signed letter from employer"},
77
+ {"type": "Pay Stub", "description": "Most recent pay stub"},
78
+ {"type": "Proof of Address", "description": "Utility bill or lease"},
79
+ {"type": "Ontario Health Card", "description": "Provincial health insurance card"}
80
+ ]
81
+ }'''
82
+
83
+ st.markdown("<span class='step-num'>1</span> <b>Paste Mortgage Checklist (JSON)</b>", unsafe_allow_html=True)
84
+ checklist_text = st.text_area(
85
+ "Paste or edit your mortgage checklist JSON below:",
86
+ value=sample_checklist,
87
+ height=200,
88
+ key="doc_checklist_json"
89
+ )
90
+ try:
91
+ checklist = json.loads(checklist_text)
92
+ required_types = [doc["type"] for doc in checklist["required_documents"]]
93
+ except Exception as e:
94
+ st.error("Invalid checklist JSON.")
95
+ st.stop()
96
+
97
+ # --- Step 2: Document upload ---
98
+ st.markdown("<span class='step-num'>2</span> <b>Upload Document(s) to Validate</b>", unsafe_allow_html=True)
99
+ uploaded_files = st.file_uploader(
100
+ "Upload PDF, DOCX, XLSX, PNG, JPG, TIFF, etc.",
101
+ type=["pdf", "docx", "xlsx", "xls", "png", "jpg", "jpeg", "tiff"],
102
+ key="mortgage_files",
103
+ accept_multiple_files=True
104
+ )
105
+
106
+ # --- Step 3: Thresholds ---
107
+ st.markdown("<span class='step-num'>3</span> <b>Configure Acceptance Thresholds</b>", unsafe_allow_html=True)
108
+ min_match_score = st.slider("Minimum Type Match Score (0-100)", 50, 100, 70, 1)
109
+ min_confidence = st.slider("Minimum LLM Confidence (0-100)", 50, 100, 70, 1)
110
+
111
  # ========== FUNCTIONS ==========
112
 
113
  def get_content_type(filename):
 
171
  except Exception:
172
  return r.text
173
 
174
+ def build_prompt(doc_text, checklist, agent_instruction, current_date):
175
  return f"""
176
+ {agent_instruction}
177
 
178
+ IMPORTANT: The current date is: {current_date}. Use this value, NOT today's date in your environment, when checking if a document has expired.
 
 
 
 
 
179
 
180
  Analyze the following extracted document text and this checklist JSON:
181
  {json.dumps(checklist)}
 
195
  {doc_text[:4000]}
196
  """.strip()
197
 
198
+ def query_gemma_llm(doc_text, checklist, agent_instruction, current_date, status_box=None):
199
+ prompt = build_prompt(doc_text, checklist, agent_instruction, current_date)
200
  headers = {
201
  "Authorization": f"Bearer {OPENROUTER_API_KEY}",
202
  "HTTP-Referer": "https://chat.openai.com", # for OpenRouter
 
234
  return None, result, prompt
235
 
236
  def advanced_llm_verdict(llm_json, min_confidence, status_box=None):
 
237
  conf = llm_json.get("confidence", 0)
238
  if conf < min_confidence or conf >= min_confidence + 15:
239
  return None, None, None
 
280
  best_score = score
281
  return best_type, best_score
282
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
  # ========== PROCESSING ==========
284
  if st.button("Run Document Validation", type="primary") and uploaded_files:
285
  results = []
 
301
  continue
302
 
303
  # Step 2: LLM Validation
304
+ llm_json, llm_raw, llm_prompt = query_gemma_llm(doc_text, checklist, agent_instruction, date_str, status_box)
305
  debug['LLM_prompt'] = llm_prompt
306
  debug['LLM_raw_response'] = llm_raw
307
  debug['LLM_parsed_json'] = llm_json
 
315
  detected_type = llm_json.get("document_type", "")
316
  matched_type, match_score = fuzzy_match_type(detected_type, required_types)
317
 
 
318
  checklist_matched = llm_json.get("checklist_matched", False)
319
  if checklist_matched and match_score < min_match_score:
320
  checklist_matched = False