DOC_VALID_AGENT

Sleeping

App Files Files Community

Seth0330 commited on Jun 21, 2025

Commit

1c49f02

verified ·

1 Parent(s): bd13fee

Update app.py

Browse files

Files changed (1) hide show

app.py +113 -123

app.py CHANGED Viewed

@@ -27,7 +27,6 @@ st.markdown("""
         padding: 10px 32px !important; font-weight: 700; border: none !important; font-size: 18px !important;
         margin-top: 12px !important;
     }
-    /* Style result table headers */
     .styled-table th {
         background: #f3ecff !important;
         color: #42318d !important;
@@ -42,12 +41,6 @@ st.markdown("""
         word-break: break-word;
         max-width: 220px;
     }
-    .accepted-row {
-        background: #e7ffe7 !important;
-    }
-    .rejected-row {
-        background: #fff1f0 !important;
-    }
     </style>
 """, unsafe_allow_html=True)
@@ -57,7 +50,6 @@ st.markdown(
 )
 # ====== SIDE-BY-SIDE LAYOUT ======
 col_left, col_right = st.columns([1.35, 1.05])
 with col_left:
@@ -97,7 +89,7 @@ with col_left:
         accept_multiple_files=True
     )
-    # Step 3: Thresholds (SLIDERS MOVED HERE)
     st.markdown("<span class='step-num'>3</span> <b>Configure Acceptance Thresholds</b>", unsafe_allow_html=True)
     min_match_score = st.slider("Minimum Type Match Score (0-100)", 50, 100, 70, 1)
     min_confidence = st.slider("Minimum LLM Confidence (0-100)", 50, 100, 70, 1)
@@ -132,7 +124,6 @@ Checklist for precision:
     # Step 6: Run button
     run_btn = st.button("Run Document Validation", type="primary")
 # ========== FUNCTIONS ==========
 def get_content_type(filename):
@@ -224,7 +215,7 @@ def query_gemma_llm(doc_text, checklist, agent_instruction, current_date, status
     prompt = build_prompt(doc_text, checklist, agent_instruction, current_date)
     headers = {
         "Authorization": f"Bearer {OPENROUTER_API_KEY}",
-        "HTTP-Referer": "https://chat.openai.com",  # for OpenRouter
         "X-Title": "EZOFIS-Doc-Validator",
         "Content-Type": "application/json",
     }
@@ -242,7 +233,6 @@ def query_gemma_llm(doc_text, checklist, agent_instruction, current_date, status
             status_box.error(f"OpenRouter error: {resp.status_code}: {resp.text}")
         return None, None, prompt
     result = resp.json()["choices"][0]["message"]["content"]
-    # Extract only JSON
     start = result.find("{")
     end = result.rfind("}") + 1
     if start == -1 or end == 0:
@@ -321,118 +311,118 @@ if run_btn and uploaded_files:
     results = []
     debug_data = []
-    for uploaded_file in uploaded_files:
-        st.subheader(f"Validating: {uploaded_file.name}")
-        status_box = st.empty()
-        debug = {}
-        # Step 1: OCR
-        doc_text = extract_text_from_unstract(uploaded_file, status_box)
-        debug['OCR_extracted_text'] = doc_text
-        if not doc_text:
-            status_box.error("Skipping due to OCR extraction error.")
-            debug['error'] = "OCR extraction error"
-            debug_data.append({uploaded_file.name: debug})
-            continue
-        # Step 2: LLM Validation
-        llm_json, llm_raw, llm_prompt = query_gemma_llm(doc_text, checklist, agent_instruction, date_str, status_box)
-        debug['LLM_prompt'] = llm_prompt
-        debug['LLM_raw_response'] = llm_raw
-        debug['LLM_parsed_json'] = llm_json
-        if not llm_json:
-            status_box.error("Skipping due to LLM error.")
-            debug['error'] = "LLM processing error"
             debug_data.append({uploaded_file.name: debug})
-            continue
-        detected_type = llm_json.get("document_type", "")
-        matched_type, match_score = fuzzy_match_type(detected_type, required_types)
-        checklist_matched = llm_json.get("checklist_matched", False)
-        if checklist_matched and match_score < min_match_score:
-            checklist_matched = False
-        llm_conf = llm_json.get("confidence", 0)
-        accepted = (
-            checklist_matched and
-            llm_json.get("looks_genuine", False) and
-            not llm_json.get("is_expired", False) and
-            (llm_conf >= min_confidence)
-        )
-        reason = []
-        if not checklist_matched:
-            reason.append("No matching checklist item found. Document rejected.")
         else:
-            reason.append(
-                f"Document type '{detected_type}' matched checklist '{matched_type}' with score {match_score}/100."
-            )
-            if not llm_json.get("looks_genuine", False):
-                reason.append("Document does not look genuine.")
-            if llm_json.get("is_expired", False):
-                reason.append("Document is expired.")
-        reason.append(f"Genuineness confidence: {llm_conf}.")
-        reason.append(llm_json.get("verdict", ""))
-        # Advanced agent: If confidence is in a "gray zone", ask the LLM for a final self-verdict
-        verdict_json, verdict_raw, verdict_prompt = advanced_llm_verdict(llm_json, min_confidence, status_box)
-        debug['LLM_self_verdict_prompt'] = verdict_prompt
-        debug['LLM_self_verdict_raw'] = verdict_raw
-        debug['LLM_self_verdict_json'] = verdict_json
-        if verdict_json:
-            accepted = verdict_json.get("accepted", False)
-            reason.append(f"LLM Self-verdict: {verdict_json.get('reason','')}")
-            status_box.info("Final decision (gray zone) taken by LLM self-verdict.")
-        results.append({
-            "File": uploaded_file.name,
-            "Detected Type": detected_type,
-            "Checklist Match": matched_type if checklist_matched else "-",
-            "Type Score": match_score,
-            "Expiry Date": llm_json.get("expiry_date", "-"),
-            "Expired": "Yes" if llm_json.get("is_expired", False) else "No",
-            "Genuine": "Yes" if llm_json.get("looks_genuine", False) else "No",
-            "Confidence": llm_conf,
-            "Accepted": "Yes" if accepted else "No",
-            "Reason": " ".join(reason)
-        })
-        debug['Checklist_match_details'] = {
-            "detected_type": detected_type,
-            "matched_type": matched_type,
-            "match_score": match_score,
-            "checklist_matched": checklist_matched,
-            "accepted": accepted
-        }
-        debug_data.append({uploaded_file.name: debug})
-        status_box.success("Validation complete. See result below.")
-    # ==== Results table with custom styling ====
-    if results:
-        st.success("All validations complete.")
-        df = pd.DataFrame(results)
-        # Convert to HTML with classes for styling
-        def style_row(row):
-            color = "#e7ffe7" if row["Accepted"] == "Yes" else "#fff1f0"
-            return [f"background-color: {color}"]*len(row)
-        styled_df = df.style.apply(style_row, axis=1)\
-            .set_table_attributes('class="styled-table"')\
-            .set_properties(**{
-                'font-size': '15px',
-                'word-break': 'break-word',
-                'border': '1px solid #ddd'
-            })
-        st.markdown('<h4 style="margin-top:28px;">Validation Results</h4>', unsafe_allow_html=True)
-        st.write(styled_df.to_html(escape=False), unsafe_allow_html=True)
-    else:
-        st.warning("No valid results.")
-    with st.expander("Debug Panel (per document)"):
-        for doc_debug in debug_data:
-            for fname, dbg in doc_debug.items():
-                st.markdown(f"**{fname}**")
-                st.json(dbg)

         padding: 10px 32px !important; font-weight: 700; border: none !important; font-size: 18px !important;
         margin-top: 12px !important;
     }
     .styled-table th {
         background: #f3ecff !important;
         color: #42318d !important;
         word-break: break-word;
         max-width: 220px;
     }
     </style>
 """, unsafe_allow_html=True)
 )
 # ====== SIDE-BY-SIDE LAYOUT ======
 col_left, col_right = st.columns([1.35, 1.05])
 with col_left:
         accept_multiple_files=True
     )
+    # Step 3: Thresholds (SLIDERS HERE)
     st.markdown("<span class='step-num'>3</span> <b>Configure Acceptance Thresholds</b>", unsafe_allow_html=True)
     min_match_score = st.slider("Minimum Type Match Score (0-100)", 50, 100, 70, 1)
     min_confidence = st.slider("Minimum LLM Confidence (0-100)", 50, 100, 70, 1)
     # Step 6: Run button
     run_btn = st.button("Run Document Validation", type="primary")
 # ========== FUNCTIONS ==========
 def get_content_type(filename):
     prompt = build_prompt(doc_text, checklist, agent_instruction, current_date)
     headers = {
         "Authorization": f"Bearer {OPENROUTER_API_KEY}",
+        "HTTP-Referer": "https://chat.openai.com",
         "X-Title": "EZOFIS-Doc-Validator",
         "Content-Type": "application/json",
     }
             status_box.error(f"OpenRouter error: {resp.status_code}: {resp.text}")
         return None, None, prompt
     result = resp.json()["choices"][0]["message"]["content"]
     start = result.find("{")
     end = result.rfind("}") + 1
     if start == -1 or end == 0:
     results = []
     debug_data = []
+    with col_right:
+        for uploaded_file in uploaded_files:
+            st.subheader(f"Validating: {uploaded_file.name}")
+            status_box = st.empty()
+            debug = {}
+            # Step 1: OCR
+            doc_text = extract_text_from_unstract(uploaded_file, status_box)
+            debug['OCR_extracted_text'] = doc_text
+            if not doc_text:
+                status_box.error("Skipping due to OCR extraction error.")
+                debug['error'] = "OCR extraction error"
+                debug_data.append({uploaded_file.name: debug})
+                continue
+            # Step 2: LLM Validation
+            llm_json, llm_raw, llm_prompt = query_gemma_llm(doc_text, checklist, agent_instruction, date_str, status_box)
+            debug['LLM_prompt'] = llm_prompt
+            debug['LLM_raw_response'] = llm_raw
+            debug['LLM_parsed_json'] = llm_json
+            if not llm_json:
+                status_box.error("Skipping due to LLM error.")
+                debug['error'] = "LLM processing error"
+                debug_data.append({uploaded_file.name: debug})
+                continue
+            detected_type = llm_json.get("document_type", "")
+            matched_type, match_score = fuzzy_match_type(detected_type, required_types)
+            checklist_matched = llm_json.get("checklist_matched", False)
+            if checklist_matched and match_score < min_match_score:
+                checklist_matched = False
+            llm_conf = llm_json.get("confidence", 0)
+            accepted = (
+                checklist_matched and
+                llm_json.get("looks_genuine", False) and
+                not llm_json.get("is_expired", False) and
+                (llm_conf >= min_confidence)
+            )
+            reason = []
+            if not checklist_matched:
+                reason.append("No matching checklist item found. Document rejected.")
+            else:
+                reason.append(
+                    f"Document type '{detected_type}' matched checklist '{matched_type}' with score {match_score}/100."
+                )
+                if not llm_json.get("looks_genuine", False):
+                    reason.append("Document does not look genuine.")
+                if llm_json.get("is_expired", False):
+                    reason.append("Document is expired.")
+            reason.append(f"Genuineness confidence: {llm_conf}.")
+            reason.append(llm_json.get("verdict", ""))
+            # Advanced agent: If confidence is in a "gray zone", ask the LLM for a final self-verdict
+            verdict_json, verdict_raw, verdict_prompt = advanced_llm_verdict(llm_json, min_confidence, status_box)
+            debug['LLM_self_verdict_prompt'] = verdict_prompt
+            debug['LLM_self_verdict_raw'] = verdict_raw
+            debug['LLM_self_verdict_json'] = verdict_json
+            if verdict_json:
+                accepted = verdict_json.get("accepted", False)
+                reason.append(f"LLM Self-verdict: {verdict_json.get('reason','')}")
+                status_box.info("Final decision (gray zone) taken by LLM self-verdict.")
+            results.append({
+                "File": uploaded_file.name,
+                "Detected Type": detected_type,
+                "Checklist Match": matched_type if checklist_matched else "-",
+                "Type Score": match_score,
+                "Expiry Date": llm_json.get("expiry_date", "-"),
+                "Expired": "Yes" if llm_json.get("is_expired", False) else "No",
+                "Genuine": "Yes" if llm_json.get("looks_genuine", False) else "No",
+                "Confidence": llm_conf,
+                "Accepted": "Yes" if accepted else "No",
+                "Reason": " ".join(reason)
+            })
+            debug['Checklist_match_details'] = {
+                "detected_type": detected_type,
+                "matched_type": matched_type,
+                "match_score": match_score,
+                "checklist_matched": checklist_matched,
+                "accepted": accepted
+            }
             debug_data.append({uploaded_file.name: debug})
+            status_box.success("Validation complete. See result below.")
+        # ==== Results table with custom styling ====
+        if results:
+            st.success("All validations complete.")
+            df = pd.DataFrame(results)
+            def style_row(row):
+                color = "#e7ffe7" if row["Accepted"] == "Yes" else "#fff1f0"
+                return [f"background-color: {color}"]*len(row)
+            styled_df = df.style.apply(style_row, axis=1)\
+                .set_table_attributes('class="styled-table"')\
+                .set_properties(**{
+                    'font-size': '15px',
+                    'word-break': 'break-word',
+                    'border': '1px solid #ddd'
+                })
+            st.markdown('<h4 style="margin-top:28px;">Validation Results</h4>', unsafe_allow_html=True)
+            st.write(styled_df.to_html(escape=False), unsafe_allow_html=True)
         else:
+            st.warning("No valid results.")
+        with st.expander("Debug Panel (per document)"):
+            for doc_debug in debug_data:
+                for fname, dbg in doc_debug.items():
+                    st.markdown(f"**{fname}**")
+                    st.json(dbg)