Spaces:

heerjtdev
/

NLI

Running

App Files Files Community

heerjtdev commited on 20 days ago

Commit

b267053

verified ·

1 Parent(s): e78173e

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -33

app.py CHANGED Viewed

@@ -3,68 +3,62 @@ import torch
 import torch.nn.functional as F
 from sentence_transformers import SentenceTransformer, CrossEncoder, util
-# Force CPU usage for the Free Tier
 device = "cpu"
-# Load models
-print("Loading models on CPU...")
 sim_model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
-nli_model = CrossEncoder('cross-encoder/nli-distilroberta-base', device=device)
 def evaluate_response(kb, question, user_answer):
-    # --- GATE 1: RELEVANCE ---
     q_emb = sim_model.encode(question, convert_to_tensor=True, device=device)
     a_emb = sim_model.encode(user_answer, convert_to_tensor=True, device=device)
     relevance_score = util.cos_sim(q_emb, a_emb).item()
-    # --- GATE 2: FACTUALITY ---
-    hypothesis = f"The answer to the question '{question}' is '{user_answer}'"
     logits = nli_model.predict([(kb, hypothesis)])
     probabilities = F.softmax(torch.tensor(logits), dim=1).tolist()[0]
     labels = ["CONTRADICTION", "ENTAILMENT", "NEUTRAL"]
     max_idx = torch.tensor(logits).argmax().item()
     verdict = labels[max_idx]
     confidence = probabilities[max_idx] * 100
-    # --- DECISION LOGIC ---
-    if verdict == "CONTRADICTION" and confidence > 60:
         status = "❌ INCORRECT (Fact Mismatch)"
-        color = "#ff4b4b"
-    elif verdict == "ENTAILMENT" and confidence > 45:
         status = "✅ CORRECT (Directly Supported)"
-        color = "#2ecc71"
-    elif relevance_score > 0.30 and verdict != "CONTRADICTION":
         status = "✅ CORRECT (Inferred)"
-        color = "#f1c40f"
     else:
-        status = "❌ IRRELEVANT / WRONG"
-        color = "#95a5a6"
     return status, f"{relevance_score:.2f}", f"{verdict} ({confidence:.1f}%)"
-# Interactive UI
-with gr.Blocks(title="AI Answer Checker") as demo:
-    gr.Markdown("# 🧠 Smart Answer Verifier")
-    gr.Markdown("Test how well an answer matches the context provided.")
     with gr.Row():
         with gr.Column():
-            kb_input = gr.Textbox(label="Knowledge Base (Context)", placeholder="Paste your text here...", lines=6)
-            q_input = gr.Textbox(label="The Question", placeholder="What do you want to ask?")
-            ans_input = gr.Textbox(label="User's Answer", placeholder="What did the user say?")
-            btn = gr.Button("Analyze Answer", variant="primary")
         with gr.Column():
-            verdict_out = gr.Textbox(label="Final Verdict")
-            rel_out = gr.Label(label="Relevance Score (0 to 1)")
-            nli_out = gr.Label(label="NLI Confidence")
-    btn.click(
-        fn=evaluate_response,
-        inputs=[kb_input, q_input, ans_input],
-        outputs=[verdict_out, rel_out, nli_out]
-    )
 if __name__ == "__main__":
     demo.launch()

 import torch.nn.functional as F
 from sentence_transformers import SentenceTransformer, CrossEncoder, util
+# Optimized for Free Tier CPU
 device = "cpu"
+# UPGRADED MODELS
+# 1. Similarity: Lightweight and fast
 sim_model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
+# 2. Reasoning: DeBERTa-v3-base is significantly better at logic than DistilRoBERTa
+nli_model = CrossEncoder('cross-encoder/nli-deberta-v3-base', device=device)
 def evaluate_response(kb, question, user_answer):
+    # GATE 1: RELEVANCE
     q_emb = sim_model.encode(question, convert_to_tensor=True, device=device)
     a_emb = sim_model.encode(user_answer, convert_to_tensor=True, device=device)
     relevance_score = util.cos_sim(q_emb, a_emb).item()
+    # GATE 2: FACTUALITY (The Reasoning Step)
+    hypothesis = f"Question: {question} Answer: {user_answer}"
     logits = nli_model.predict([(kb, hypothesis)])
     probabilities = F.softmax(torch.tensor(logits), dim=1).tolist()[0]
+    # DeBERTa-v3 Label Mapping: 0: contradiction, 1: entailment, 2: neutral
     labels = ["CONTRADICTION", "ENTAILMENT", "NEUTRAL"]
     max_idx = torch.tensor(logits).argmax().item()
     verdict = labels[max_idx]
     confidence = probabilities[max_idx] * 100
+    # UPGRADED DECISION LOGIC
+    # We trust DeBERTa more, so we can be slightly more rigid with its logic
+    if verdict == "CONTRADICTION" and confidence > 55:
         status = "❌ INCORRECT (Fact Mismatch)"
+    elif verdict == "ENTAILMENT" and confidence > 40:
         status = "✅ CORRECT (Directly Supported)"
+    elif relevance_score > 0.35 and verdict == "NEUTRAL":
         status = "✅ CORRECT (Inferred)"
     else:
+        status = "❌ IRRELEVANT / LOGICALLY WEAK"
     return status, f"{relevance_score:.2f}", f"{verdict} ({confidence:.1f}%)"
+# Interface setup (same as before)
+with gr.Blocks(title="Advanced Reasoning Verifier") as demo:
+    gr.Markdown("# 🧠 Advanced Answer Verifier (DeBERTa-v3)")
+    gr.Markdown("Using high-performance Cross-Encoders for superior logical reasoning.")
     with gr.Row():
         with gr.Column():
+            kb_input = gr.Textbox(label="Knowledge Base", lines=6)
+            q_input = gr.Textbox(label="Question")
+            ans_input = gr.Textbox(label="User Answer")
+            btn = gr.Button("Analyze", variant="primary")
         with gr.Column():
+            verdict_out = gr.Textbox(label="Verdict")
+            rel_out = gr.Label(label="Similarity")
+            nli_out = gr.Label(label="NLI Reasoning")
+    btn.click(fn=evaluate_response, inputs=[kb_input, q_input, ans_input], outputs=[verdict_out, rel_out, nli_out])
 if __name__ == "__main__":
     demo.launch()