Seth0330 commited on
Commit
3a41351
·
verified ·
1 Parent(s): c572e2b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -24
app.py CHANGED
@@ -6,14 +6,13 @@ import time
6
  import mimetypes
7
  from datetime import datetime
8
  from fuzzywuzzy import fuzz
9
- import pandas as pd
10
 
11
- # ========== CONFIG ==========
12
  UNSTRACT_BASE = "https://llmwhisperer-api.us-central.unstract.com/api/v2"
13
  UNSTRACT_API_KEY = os.getenv("UNSTRACT_API_KEY")
14
  OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
15
  OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
16
- GEMMA_MODEL = "mistralai/ministral-8b"
17
 
18
  st.set_page_config(page_title="EZOFIS Document Validation Agent", layout="wide")
19
  st.markdown("""
@@ -35,7 +34,7 @@ st.markdown(
35
  unsafe_allow_html=True
36
  )
37
 
38
- # ====== SIDE-BY-SIDE LAYOUT ======
39
  col_left, col_right = st.columns([1.35, 1.05])
40
 
41
  with col_left:
@@ -75,7 +74,7 @@ with col_left:
75
  accept_multiple_files=True
76
  )
77
 
78
- # Step 3: Thresholds (SLIDERS HERE)
79
  st.markdown("<span class='step-num'>3</span> <b>Configure Acceptance Thresholds</b>", unsafe_allow_html=True)
80
  min_match_score = st.slider("Minimum Type Match Score (0-100)", 50, 100, 70, 1)
81
  min_confidence = st.slider("Minimum LLM Confidence (0-100)", 50, 100, 70, 1)
@@ -110,7 +109,7 @@ Checklist for precision:
110
  # Step 6: Run button
111
  run_btn = st.button("Run Document Validation", type="primary")
112
 
113
- # ========== FUNCTIONS ==========
114
 
115
  def get_content_type(filename):
116
  mime, _ = mimetypes.guess_type(filename)
@@ -173,20 +172,20 @@ def extract_text_from_unstract(uploaded_file, status_box=None):
173
  except Exception:
174
  return r.text
175
 
176
- def build_prompt(doc_text, checklist, agent_instruction, current_date):
177
  return f"""
178
  {agent_instruction}
179
 
180
- IMPORTANT: The current date is: {current_date}. Use this value, NOT today's date in your environment, when checking if a document has expired.
181
 
182
- Analyze the following extracted document text and this checklist JSON:
183
  {json.dumps(checklist)}
184
 
185
- Respond with this JSON:
186
  {{
187
  "document_type": "...", // e.g. Ontario Health Card, BC Services Card
188
  "expiry_date": "...", // ISO format if possible
189
- "is_expired": true/false,
190
  "looks_genuine": true/false,
191
  "confidence": <score 0-100>,
192
  "checklist_matched": true/false,
@@ -197,8 +196,8 @@ Document Text:
197
  {doc_text[:4000]}
198
  """.strip()
199
 
200
- def query_gemma_llm(doc_text, checklist, agent_instruction, current_date, status_box=None):
201
- prompt = build_prompt(doc_text, checklist, agent_instruction, current_date)
202
  headers = {
203
  "Authorization": f"Bearer {OPENROUTER_API_KEY}",
204
  "HTTP-Referer": "https://chat.openai.com",
@@ -206,7 +205,7 @@ def query_gemma_llm(doc_text, checklist, agent_instruction, current_date, status
206
  "Content-Type": "application/json",
207
  }
208
  data = {
209
- "model": GEMMA_MODEL,
210
  "messages": [{"role": "user", "content": prompt}],
211
  "temperature": 0.1,
212
  "max_tokens": 1024
@@ -252,7 +251,7 @@ Respond ONLY as: {{ "accepted": true/false, "reason": "..." }}
252
  "Content-Type": "application/json",
253
  }
254
  data = {
255
- "model": GEMMA_MODEL,
256
  "messages": [{"role": "user", "content": verdict_prompt}],
257
  "temperature": 0.1,
258
  "max_tokens": 256
@@ -281,7 +280,7 @@ def fuzzy_match_type(detected_type, checklist_types):
281
  best_score = score
282
  return best_type, best_score
283
 
284
- # ========== CARD RENDERING FUNCTION ==========
285
 
286
  def show_validation_card(result):
287
  accepted = result["Accepted"] == "Yes"
@@ -295,7 +294,9 @@ def show_validation_card(result):
295
 
296
  st.markdown(f"""
297
  <div style="border-radius:16px;border:2px solid #A020F0; margin-bottom:32px; background:#f9f7ff;padding:18px 22px 22px 22px;box-shadow:0 3px 16px #0001;">
298
- <div style="font-size:14px;font-weight:bold;letter-spacing:1px;margin-bottom:6px;">{result['File']}</div>
 
 
299
  <table style="width:100%;border:none;margin-bottom:12px;">
300
  <tr>
301
  <td style="width:40%;font-size:17px;font-weight:700;">Decision:</td>
@@ -333,7 +334,7 @@ def show_validation_card(result):
333
  </div>
334
  """, unsafe_allow_html=True)
335
 
336
- # ========== PROCESSING ==========
337
  if 'run_btn' not in locals():
338
  run_btn = False
339
 
@@ -343,7 +344,12 @@ if run_btn and uploaded_files:
343
 
344
  with col_right:
345
  for uploaded_file in uploaded_files:
346
- st.subheader(f"Validating: {uploaded_file.name}")
 
 
 
 
 
347
  status_box = st.empty()
348
  debug = {}
349
 
@@ -358,7 +364,7 @@ if run_btn and uploaded_files:
358
  continue
359
 
360
  # Step 2: LLM Validation
361
- llm_json, llm_raw, llm_prompt = query_gemma_llm(doc_text, checklist, agent_instruction, date_str, status_box)
362
  debug['LLM_prompt'] = llm_prompt
363
  debug['LLM_raw_response'] = llm_raw
364
  debug['LLM_parsed_json'] = llm_json
@@ -377,10 +383,14 @@ if run_btn and uploaded_files:
377
  checklist_matched = False
378
 
379
  llm_conf = llm_json.get("confidence", 0)
 
 
 
 
380
  accepted = (
381
  checklist_matched and
382
  llm_json.get("looks_genuine", False) and
383
- not llm_json.get("is_expired", False) and
384
  (llm_conf >= min_confidence)
385
  )
386
 
@@ -393,13 +403,12 @@ if run_btn and uploaded_files:
393
  )
394
  if not llm_json.get("looks_genuine", False):
395
  reason.append("Document does not look genuine.")
396
- if llm_json.get("is_expired", False):
397
  reason.append("Document is expired.")
398
 
399
  reason.append(f"Genuineness confidence: {llm_conf}.")
400
  reason.append(llm_json.get("verdict", ""))
401
 
402
- # Advanced agent: If confidence is in a "gray zone", ask the LLM for a final self-verdict
403
  verdict_json, verdict_raw, verdict_prompt = advanced_llm_verdict(llm_json, min_confidence, status_box)
404
  debug['LLM_self_verdict_prompt'] = verdict_prompt
405
  debug['LLM_self_verdict_raw'] = verdict_raw
@@ -416,7 +425,7 @@ if run_btn and uploaded_files:
416
  "Checklist Match": matched_type if checklist_matched else "-",
417
  "Type Score": match_score,
418
  "Expiry Date": llm_json.get("expiry_date", "-"),
419
- "Expired": "Yes" if llm_json.get("is_expired", False) else "No",
420
  "Genuine": "Yes" if llm_json.get("looks_genuine", False) else "No",
421
  "Confidence": llm_conf,
422
  "Accepted": "Yes" if accepted else "No",
 
6
  import mimetypes
7
  from datetime import datetime
8
  from fuzzywuzzy import fuzz
 
9
 
10
+ # ====== CONFIG ======
11
  UNSTRACT_BASE = "https://llmwhisperer-api.us-central.unstract.com/api/v2"
12
  UNSTRACT_API_KEY = os.getenv("UNSTRACT_API_KEY")
13
  OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
14
  OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
15
+ MISTRAL_MODEL = "mistralai/mistral-8b-instruct"
16
 
17
  st.set_page_config(page_title="EZOFIS Document Validation Agent", layout="wide")
18
  st.markdown("""
 
34
  unsafe_allow_html=True
35
  )
36
 
37
+ # ====== UI LAYOUT ======
38
  col_left, col_right = st.columns([1.35, 1.05])
39
 
40
  with col_left:
 
74
  accept_multiple_files=True
75
  )
76
 
77
+ # Step 3: Thresholds
78
  st.markdown("<span class='step-num'>3</span> <b>Configure Acceptance Thresholds</b>", unsafe_allow_html=True)
79
  min_match_score = st.slider("Minimum Type Match Score (0-100)", 50, 100, 70, 1)
80
  min_confidence = st.slider("Minimum LLM Confidence (0-100)", 50, 100, 70, 1)
 
109
  # Step 6: Run button
110
  run_btn = st.button("Run Document Validation", type="primary")
111
 
112
+ # ====== HELPER FUNCTIONS ======
113
 
114
  def get_content_type(filename):
115
  mime, _ = mimetypes.guess_type(filename)
 
172
  except Exception:
173
  return r.text
174
 
175
+ def build_mistral_prompt(doc_text, checklist, agent_instruction, current_date):
176
  return f"""
177
  {agent_instruction}
178
 
179
+ IMPORTANT: Today's date for validation is: {current_date}. You MUST use this exact date, NOT today's system date, when checking if a document is expired.
180
 
181
+ Analyze the following extracted document text and the checklist JSON:
182
  {json.dumps(checklist)}
183
 
184
+ Respond with this JSON (your response will be evaluated automatically):
185
  {{
186
  "document_type": "...", // e.g. Ontario Health Card, BC Services Card
187
  "expiry_date": "...", // ISO format if possible
188
+ "is_expired": true/false, // must be true if expiry_date is before {current_date}
189
  "looks_genuine": true/false,
190
  "confidence": <score 0-100>,
191
  "checklist_matched": true/false,
 
196
  {doc_text[:4000]}
197
  """.strip()
198
 
199
+ def query_mistral_llm(doc_text, checklist, agent_instruction, current_date, status_box=None):
200
+ prompt = build_mistral_prompt(doc_text, checklist, agent_instruction, current_date)
201
  headers = {
202
  "Authorization": f"Bearer {OPENROUTER_API_KEY}",
203
  "HTTP-Referer": "https://chat.openai.com",
 
205
  "Content-Type": "application/json",
206
  }
207
  data = {
208
+ "model": MISTRAL_MODEL,
209
  "messages": [{"role": "user", "content": prompt}],
210
  "temperature": 0.1,
211
  "max_tokens": 1024
 
251
  "Content-Type": "application/json",
252
  }
253
  data = {
254
+ "model": MISTRAL_MODEL,
255
  "messages": [{"role": "user", "content": verdict_prompt}],
256
  "temperature": 0.1,
257
  "max_tokens": 256
 
280
  best_score = score
281
  return best_type, best_score
282
 
283
+ # ====== CARD RENDERING FUNCTION ======
284
 
285
  def show_validation_card(result):
286
  accepted = result["Accepted"] == "Yes"
 
294
 
295
  st.markdown(f"""
296
  <div style="border-radius:16px;border:2px solid #A020F0; margin-bottom:32px; background:#f9f7ff;padding:18px 22px 22px 22px;box-shadow:0 3px 16px #0001;">
297
+ <div style="font-size:14px;font-weight:600;letter-spacing:0.3px;margin-bottom:10px;color:#333;">
298
+ {result['File']}
299
+ </div>
300
  <table style="width:100%;border:none;margin-bottom:12px;">
301
  <tr>
302
  <td style="width:40%;font-size:17px;font-weight:700;">Decision:</td>
 
334
  </div>
335
  """, unsafe_allow_html=True)
336
 
337
+ # ====== MAIN PROCESSING LOOP ======
338
  if 'run_btn' not in locals():
339
  run_btn = False
340
 
 
344
 
345
  with col_right:
346
  for uploaded_file in uploaded_files:
347
+ st.markdown(
348
+ f"<div style='font-size:15.5px;font-weight:500;color:#424242;margin:14px 0 2px 0;'>"
349
+ f"Validating: <span style='color:#A020F0'>{uploaded_file.name}</span>"
350
+ f"</div>",
351
+ unsafe_allow_html=True
352
+ )
353
  status_box = st.empty()
354
  debug = {}
355
 
 
364
  continue
365
 
366
  # Step 2: LLM Validation
367
+ llm_json, llm_raw, llm_prompt = query_mistral_llm(doc_text, checklist, agent_instruction, date_str, status_box)
368
  debug['LLM_prompt'] = llm_prompt
369
  debug['LLM_raw_response'] = llm_raw
370
  debug['LLM_parsed_json'] = llm_json
 
383
  checklist_matched = False
384
 
385
  llm_conf = llm_json.get("confidence", 0)
386
+ # Robustly handle is_expired
387
+ is_expired = llm_json.get("is_expired", False)
388
+ if isinstance(is_expired, str):
389
+ is_expired = is_expired.lower() == "true"
390
  accepted = (
391
  checklist_matched and
392
  llm_json.get("looks_genuine", False) and
393
+ not is_expired and
394
  (llm_conf >= min_confidence)
395
  )
396
 
 
403
  )
404
  if not llm_json.get("looks_genuine", False):
405
  reason.append("Document does not look genuine.")
406
+ if is_expired:
407
  reason.append("Document is expired.")
408
 
409
  reason.append(f"Genuineness confidence: {llm_conf}.")
410
  reason.append(llm_json.get("verdict", ""))
411
 
 
412
  verdict_json, verdict_raw, verdict_prompt = advanced_llm_verdict(llm_json, min_confidence, status_box)
413
  debug['LLM_self_verdict_prompt'] = verdict_prompt
414
  debug['LLM_self_verdict_raw'] = verdict_raw
 
425
  "Checklist Match": matched_type if checklist_matched else "-",
426
  "Type Score": match_score,
427
  "Expiry Date": llm_json.get("expiry_date", "-"),
428
+ "Expired": "Yes" if is_expired else "No",
429
  "Genuine": "Yes" if llm_json.get("looks_genuine", False) else "No",
430
  "Confidence": llm_conf,
431
  "Accepted": "Yes" if accepted else "No",