heerjtdev commited on
Commit
2b3f70d
Β·
verified Β·
1 Parent(s): b267053

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -48
app.py CHANGED
@@ -3,62 +3,55 @@ import torch
3
  import torch.nn.functional as F
4
  from sentence_transformers import SentenceTransformer, CrossEncoder, util
5
 
6
- # Optimized for Free Tier CPU
7
- device = "cpu"
 
 
8
 
9
- # UPGRADED MODELS
10
- # 1. Similarity: Lightweight and fast
11
- sim_model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
12
- # 2. Reasoning: DeBERTa-v3-base is significantly better at logic than DistilRoBERTa
13
- nli_model = CrossEncoder('cross-encoder/nli-deberta-v3-base', device=device)
14
 
15
  def evaluate_response(kb, question, user_answer):
16
- # GATE 1: RELEVANCE
17
- q_emb = sim_model.encode(question, convert_to_tensor=True, device=device)
18
- a_emb = sim_model.encode(user_answer, convert_to_tensor=True, device=device)
19
- relevance_score = util.cos_sim(q_emb, a_emb).item()
20
-
21
- # GATE 2: FACTUALITY (The Reasoning Step)
22
- hypothesis = f"Question: {question} Answer: {user_answer}"
 
 
 
23
  logits = nli_model.predict([(kb, hypothesis)])
24
- probabilities = F.softmax(torch.tensor(logits), dim=1).tolist()[0]
25
-
26
- # DeBERTa-v3 Label Mapping: 0: contradiction, 1: entailment, 2: neutral
27
  labels = ["CONTRADICTION", "ENTAILMENT", "NEUTRAL"]
28
  max_idx = torch.tensor(logits).argmax().item()
29
  verdict = labels[max_idx]
30
- confidence = probabilities[max_idx] * 100
31
-
32
- # UPGRADED DECISION LOGIC
33
- # We trust DeBERTa more, so we can be slightly more rigid with its logic
34
- if verdict == "CONTRADICTION" and confidence > 55:
35
- status = "❌ INCORRECT (Fact Mismatch)"
36
- elif verdict == "ENTAILMENT" and confidence > 40:
37
- status = "βœ… CORRECT (Directly Supported)"
38
- elif relevance_score > 0.35 and verdict == "NEUTRAL":
39
- status = "βœ… CORRECT (Inferred)"
40
  else:
41
- status = "❌ IRRELEVANT / LOGICALLY WEAK"
42
-
43
- return status, f"{relevance_score:.2f}", f"{verdict} ({confidence:.1f}%)"
44
-
45
- # Interface setup (same as before)
46
- with gr.Blocks(title="Advanced Reasoning Verifier") as demo:
47
- gr.Markdown("# 🧠 Advanced Answer Verifier (DeBERTa-v3)")
48
- gr.Markdown("Using high-performance Cross-Encoders for superior logical reasoning.")
49
-
50
- with gr.Row():
51
- with gr.Column():
52
- kb_input = gr.Textbox(label="Knowledge Base", lines=6)
53
- q_input = gr.Textbox(label="Question")
54
- ans_input = gr.Textbox(label="User Answer")
55
- btn = gr.Button("Analyze", variant="primary")
56
- with gr.Column():
57
- verdict_out = gr.Textbox(label="Verdict")
58
- rel_out = gr.Label(label="Similarity")
59
- nli_out = gr.Label(label="NLI Reasoning")
60
-
61
- btn.click(fn=evaluate_response, inputs=[kb_input, q_input, ans_input], outputs=[verdict_out, rel_out, nli_out])
62
 
63
  if __name__ == "__main__":
64
  demo.launch()
 
3
  import torch.nn.functional as F
4
  from sentence_transformers import SentenceTransformer, CrossEncoder, util
5
 
6
+ # Use ModernBERT-based NLI for maximum speed on Free Tier CPU
7
+ # This model is 20% faster and 40% lighter than standard DeBERTa
8
+ reasoning_model_name = 'dleemiller/finecat-nli-l'
9
+ similarity_model_name = 'all-MiniLM-L6-v2'
10
 
11
+ print("Initializing 2025 Lightweight Suite...")
12
+ sim_model = SentenceTransformer(similarity_model_name, device="cpu")
13
+ nli_model = CrossEncoder(reasoning_model_name, device="cpu")
 
 
14
 
15
  def evaluate_response(kb, question, user_answer):
16
+ # 1. Topic Relevance (Bi-Encoder)
17
+ # We check if the answer even belongs in the same universe as the question
18
+ q_emb = sim_model.encode(question, convert_to_tensor=True)
19
+ a_emb = sim_model.encode(user_answer, convert_to_tensor=True)
20
+ rel_score = util.cos_sim(q_emb, a_emb).item()
21
+
22
+ # 2. Structured Reasoning (Cross-Encoder)
23
+ # We format the hypothesis to force the model to evaluate the ANSWER specifically
24
+ hypothesis = f"Based on the context, the answer to '{question}' is '{user_answer}'."
25
+
26
  logits = nli_model.predict([(kb, hypothesis)])
27
+ probs = F.softmax(torch.tensor(logits), dim=1).tolist()[0]
28
+
29
+ # Label mapping for FineCat/DeBERTa: 0: contradiction, 1: entailment, 2: neutral
30
  labels = ["CONTRADICTION", "ENTAILMENT", "NEUTRAL"]
31
  max_idx = torch.tensor(logits).argmax().item()
32
  verdict = labels[max_idx]
33
+ conf = probs[max_idx] * 100
34
+
35
+ # 3. Precision Logic Gate
36
+ if verdict == "CONTRADICTION" and conf > 40:
37
+ status = "❌ INCORRECT (Logic Conflict)"
38
+ elif verdict == "ENTAILMENT" and conf > 35:
39
+ status = "βœ… CORRECT (Confirmed)"
40
+ elif rel_score > 0.40 and verdict != "CONTRADICTION":
41
+ status = "βœ… CORRECT (Likely/Inferred)"
 
42
  else:
43
+ status = "❌ WRONG / IRRELEVANT"
44
+
45
+ return status, f"{rel_score:.2f}", f"{verdict} ({conf:.1f}%)"
46
+
47
+ # UI Setup remains the same
48
+ demo = gr.Interface(
49
+ fn=evaluate_response,
50
+ inputs=["text", "text", "text"],
51
+ outputs=[gr.Textbox(label="Verdict"), gr.Label(label="Topic Similarity"), gr.Label(label="NLI Reasoning")],
52
+ title="Lightweight Reasoning Engine v3",
53
+ description="Using ModernBERT-distilled NLI for 2025-standard reasoning on CPU."
54
+ )
 
 
 
 
 
 
 
 
 
55
 
56
  if __name__ == "__main__":
57
  demo.launch()