Spaces:

heerjtdev
/

NLI

Running

App Files Files Community

heerjtdev commited on Dec 31, 2025

Commit

aa9013a

verified ·

1 Parent(s): f05d1e8

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -25

app.py CHANGED Viewed

@@ -3,14 +3,18 @@ import torch
 import torch.nn.functional as F
 from sentence_transformers import SentenceTransformer, CrossEncoder, util
-# Load models (Hugging Face will cache these)
-sim_model = SentenceTransformer('all-MiniLM-L6-v2')
-nli_model = CrossEncoder('cross-encoder/nli-distilroberta-base')
 def evaluate_response(kb, question, user_answer):
     # --- GATE 1: RELEVANCE ---
-    q_emb = sim_model.encode(question, convert_to_tensor=True)
-    a_emb = sim_model.encode(user_answer, convert_to_tensor=True)
     relevance_score = util.cos_sim(q_emb, a_emb).item()
     # --- GATE 2: FACTUALITY ---
@@ -26,35 +30,41 @@ def evaluate_response(kb, question, user_answer):
     # --- DECISION LOGIC ---
     if verdict == "CONTRADICTION" and confidence > 60:
         status = "❌ INCORRECT (Fact Mismatch)"
     elif verdict == "ENTAILMENT" and confidence > 45:
         status = "✅ CORRECT (Directly Supported)"
     elif relevance_score > 0.30 and verdict != "CONTRADICTION":
         status = "✅ CORRECT (Inferred)"
     else:
         status = "❌ IRRELEVANT / WRONG"
     return status, f"{relevance_score:.2f}", f"{verdict} ({confidence:.1f}%)"
-# Build the Gradio Interface
-demo = gr.Interface(
-    fn=evaluate_response,
-    inputs=[
-        gr.Textbox(label="Knowledge Base (Context)", lines=5),
-        gr.Textbox(label="Question"),
-        gr.Textbox(label="User Answer")
-    ],
-    outputs=[
-        gr.Label(label="Final Verdict"),
-        gr.Textbox(label="Relevance Score"),
-        gr.Textbox(label="NLI Raw Output")
-    ],
-    title="AI Answer Checker",
-    description="Evaluate user answers against a Knowledge Base using Semantic Similarity and NLI.",
-    examples=[
-        ["Profits dropped by 5% in 2023.", "Was the company more profitable?", "Yes, it was much more profitable."],
-        ["Michael Collins stayed in the command module while Neil walked on the moon.", "What happened to Michael Collins?", "He stayed in the command module."]
-    ]
-)
 if __name__ == "__main__":
     demo.launch()

 import torch.nn.functional as F
 from sentence_transformers import SentenceTransformer, CrossEncoder, util
+# Force CPU usage for the Free Tier
+device = "cpu"
+# Load models
+print("Loading models on CPU...")
+sim_model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
+nli_model = CrossEncoder('cross-encoder/nli-distilroberta-base', device=device)
 def evaluate_response(kb, question, user_answer):
     # --- GATE 1: RELEVANCE ---
+    q_emb = sim_model.encode(question, convert_to_tensor=True, device=device)
+    a_emb = sim_model.encode(user_answer, convert_to_tensor=True, device=device)
     relevance_score = util.cos_sim(q_emb, a_emb).item()
     # --- GATE 2: FACTUALITY ---
     # --- DECISION LOGIC ---
     if verdict == "CONTRADICTION" and confidence > 60:
         status = "❌ INCORRECT (Fact Mismatch)"
+        color = "#ff4b4b"
     elif verdict == "ENTAILMENT" and confidence > 45:
         status = "✅ CORRECT (Directly Supported)"
+        color = "#2ecc71"
     elif relevance_score > 0.30 and verdict != "CONTRADICTION":
         status = "✅ CORRECT (Inferred)"
+        color = "#f1c40f"
     else:
         status = "❌ IRRELEVANT / WRONG"
+        color = "#95a5a6"
     return status, f"{relevance_score:.2f}", f"{verdict} ({confidence:.1f}%)"
+# Interactive UI
+with gr.Blocks(title="AI Answer Checker") as demo:
+    gr.Markdown("# 🧠 Smart Answer Verifier")
+    gr.Markdown("Test how well an answer matches the context provided.")
+    with gr.Row():
+        with gr.Column():
+            kb_input = gr.Textbox(label="Knowledge Base (Context)", placeholder="Paste your text here...", lines=6)
+            q_input = gr.Textbox(label="The Question", placeholder="What do you want to ask?")
+            ans_input = gr.Textbox(label="User's Answer", placeholder="What did the user say?")
+            btn = gr.Button("Analyze Answer", variant="primary")
+        with gr.Column():
+            verdict_out = gr.Textbox(label="Final Verdict")
+            rel_out = gr.Label(label="Relevance Score (0 to 1)")
+            nli_out = gr.Label(label="NLI Confidence")
+    btn.click(
+        fn=evaluate_response,
+        inputs=[kb_input, q_input, ans_input],
+        outputs=[verdict_out, rel_out, nli_out]
+    )
 if __name__ == "__main__":
     demo.launch()