VENDOR_ONBOARDING_AGENT

Sleeping

App Files Files Community

Seth0330 commited on Jul 7, 2025

Commit

5cdcfa2

verified ·

1 Parent(s): 839c059

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -20

app.py CHANGED Viewed

@@ -141,21 +141,18 @@ with col1:
     st.markdown("<span class='step-num'>4</span> <b>Agent Strictness Controls</b>", unsafe_allow_html=True)
     doc_match_threshold = st.slider("Document Match Strictness (Semantic Similarity)", 0.5, 0.95, 0.65, 0.01,
         help="How closely a field or file must match the checklist requirement. Higher = stricter.")
-    mandatory_doc_pct = st.slider("Mandatory Document Strictness (%)", 70, 100, 100, 1,
         help="Minimum percentage of mandatory docs required for onboarding.")
     field_value_strict = st.slider("Field Value Strictness (0=Any, 1=Only Files/Numbers)", 0, 1, 0, 1,
         help="Require files/formal values (1) or allow 'Yes', 'N/A', etc. (0).")
     compliance_strict = st.slider("Compliance Registry Strictness (0=Allow, 1=Reject)", 0, 1, 1, 1,
         help="Reject if company not found in Canada registry (1), or allow with warning (0).")
-    st.markdown("<span class='step-num'>5</span> <b>Agent Instructions (Business Logic Only)</b>", unsafe_allow_html=True)
-    user_editable_instructions = """You are a senior supplier onboarding analyst.
-- For each compliance requirement, you are provided the best-matching (key, value) field pair from the supplier's data (determined by AI semantic similarity, not just spelling).
 - If the value is a file (e.g., .pdf or .docx), treat this as strong evidence and DO NOT mark as missing unless there is a clear reason.
-- Only mark as missing if nothing plausible is provided anywhere in the data, or if the evidence is clearly invalid.
-"""
-    agent_instruction = st.text_area("Edit agent instruction prompt (business logic only):", value=user_editable_instructions, height=170, key="agent_instruction")
 with col2:
     st.markdown("<span class='step-num'>6</span> <b>Run Supplier Onboarding Agent</b>", unsafe_allow_html=True)
@@ -232,7 +229,6 @@ with col2:
                     best_match_text = supplier_pairs[best_idx]
                     best_score = sim_scores[best_idx]
                     is_provided = best_score >= doc_match_threshold
-                    # Field value strictness: Only accept files/numbers if slider is at 1
                     if field_value_strict == 1:
                         file_like = bool(re.search(r'\.(pdf|docx?|xls|csv|jpg|jpeg|png)$', str(best_match_text).lower()))
                         numeric_like = bool(re.match(r'^\d+(\.\d+)?$', str(best_match_text)))
@@ -250,7 +246,6 @@ with col2:
                         "status": "Provided" if is_provided else "Missing"
                     })
-                # Pre-Lens: Strictness logic before LLM
                 n_mandatory = sum(1 for f in findings if f["mandatory"])
                 n_mandatory_provided = sum(1 for f in findings if f["mandatory"] and f["status"] == "Provided")
                 pct_mandatory = 100 * n_mandatory_provided / n_mandatory if n_mandatory else 100
@@ -280,16 +275,8 @@ with col2:
                     early_reject = True
                     reasons.append("Company not found in Corporations Canada registry.")
-                if early_reject:
-                    agent_json = {
-                        "decision": "REJECTED",
-                        "reason": "; ".join(reasons),
-                        "red_flags": reasons,
-                        "missing_documents": [f["document"] for f in findings if f["mandatory"] and f["status"] == "Missing"],
-                        "next_steps": ["Request all missing mandatory documents and registry proof before proceeding."]
-                    }
-                else:
-                    llm_return_format = """
 Return JSON:
 {
   "decision": "ONBOARDED" | "REJECTED" | "PENDING",
@@ -299,7 +286,17 @@ Return JSON:
   "next_steps": [ ... ]
 }
 """
                     llm_prompt = f"""
 {agent_instruction}
 Supplier main info: {json.dumps(main_info, indent=2)}

     st.markdown("<span class='step-num'>4</span> <b>Agent Strictness Controls</b>", unsafe_allow_html=True)
     doc_match_threshold = st.slider("Document Match Strictness (Semantic Similarity)", 0.5, 0.95, 0.65, 0.01,
         help="How closely a field or file must match the checklist requirement. Higher = stricter.")
+    mandatory_doc_pct = st.slider("Mandatory Document Strictness (%)", 20, 100, 100, 1,
         help="Minimum percentage of mandatory docs required for onboarding.")
     field_value_strict = st.slider("Field Value Strictness (0=Any, 1=Only Files/Numbers)", 0, 1, 0, 1,
         help="Require files/formal values (1) or allow 'Yes', 'N/A', etc. (0).")
     compliance_strict = st.slider("Compliance Registry Strictness (0=Allow, 1=Reject)", 0, 1, 1, 1,
         help="Reject if company not found in Canada registry (1), or allow with warning (0).")
+    st.markdown("<span class='step-num'>5</span> <b>Agent Instructions</b>", unsafe_allow_html=True)
+    user_editable_instructions = """- For each compliance requirement, you are provided the best-matching (key, value) field pair from the supplier's data (determined by AI semantic similarity, not just spelling).
 - If the value is a file (e.g., .pdf or .docx), treat this as strong evidence and DO NOT mark as missing unless there is a clear reason.
+- Only mark as missing if nothing plausible is provided anywhere in the data, or if the evidence is clearly invalid."""
+    agent_instruction = st.text_area("Edit agent instruction prompt:", value=user_editable_instructions, height=130, key="agent_instruction")
 with col2:
     st.markdown("<span class='step-num'>6</span> <b>Run Supplier Onboarding Agent</b>", unsafe_allow_html=True)
                     best_match_text = supplier_pairs[best_idx]
                     best_score = sim_scores[best_idx]
                     is_provided = best_score >= doc_match_threshold
                     if field_value_strict == 1:
                         file_like = bool(re.search(r'\.(pdf|docx?|xls|csv|jpg|jpeg|png)$', str(best_match_text).lower()))
                         numeric_like = bool(re.match(r'^\d+(\.\d+)?$', str(best_match_text)))
                         "status": "Provided" if is_provided else "Missing"
                     })
                 n_mandatory = sum(1 for f in findings if f["mandatory"])
                 n_mandatory_provided = sum(1 for f in findings if f["mandatory"] and f["status"] == "Provided")
                 pct_mandatory = 100 * n_mandatory_provided / n_mandatory if n_mandatory else 100
                     early_reject = True
                     reasons.append("Company not found in Corporations Canada registry.")
+                persona_instruction = "You are a senior supplier onboarding analyst.\n\n"
+                llm_return_format = """
 Return JSON:
 {
   "decision": "ONBOARDED" | "REJECTED" | "PENDING",
   "next_steps": [ ... ]
 }
 """
+                if early_reject:
+                    agent_json = {
+                        "decision": "REJECTED",
+                        "reason": "; ".join(reasons),
+                        "red_flags": reasons,
+                        "missing_documents": [f["document"] for f in findings if f["mandatory"] and f["status"] == "Missing"],
+                        "next_steps": ["Request all missing mandatory documents and registry proof before proceeding."]
+                    }
+                else:
                     llm_prompt = f"""
+{persona_instruction}
 {agent_instruction}
 Supplier main info: {json.dumps(main_info, indent=2)}