heerjtdev commited on
Commit
aa9013a
Β·
verified Β·
1 Parent(s): f05d1e8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -25
app.py CHANGED
@@ -3,14 +3,18 @@ import torch
3
  import torch.nn.functional as F
4
  from sentence_transformers import SentenceTransformer, CrossEncoder, util
5
 
6
- # Load models (Hugging Face will cache these)
7
- sim_model = SentenceTransformer('all-MiniLM-L6-v2')
8
- nli_model = CrossEncoder('cross-encoder/nli-distilroberta-base')
 
 
 
 
9
 
10
  def evaluate_response(kb, question, user_answer):
11
  # --- GATE 1: RELEVANCE ---
12
- q_emb = sim_model.encode(question, convert_to_tensor=True)
13
- a_emb = sim_model.encode(user_answer, convert_to_tensor=True)
14
  relevance_score = util.cos_sim(q_emb, a_emb).item()
15
 
16
  # --- GATE 2: FACTUALITY ---
@@ -26,35 +30,41 @@ def evaluate_response(kb, question, user_answer):
26
  # --- DECISION LOGIC ---
27
  if verdict == "CONTRADICTION" and confidence > 60:
28
  status = "❌ INCORRECT (Fact Mismatch)"
 
29
  elif verdict == "ENTAILMENT" and confidence > 45:
30
  status = "βœ… CORRECT (Directly Supported)"
 
31
  elif relevance_score > 0.30 and verdict != "CONTRADICTION":
32
  status = "βœ… CORRECT (Inferred)"
 
33
  else:
34
  status = "❌ IRRELEVANT / WRONG"
 
35
 
36
  return status, f"{relevance_score:.2f}", f"{verdict} ({confidence:.1f}%)"
37
 
38
- # Build the Gradio Interface
39
- demo = gr.Interface(
40
- fn=evaluate_response,
41
- inputs=[
42
- gr.Textbox(label="Knowledge Base (Context)", lines=5),
43
- gr.Textbox(label="Question"),
44
- gr.Textbox(label="User Answer")
45
- ],
46
- outputs=[
47
- gr.Label(label="Final Verdict"),
48
- gr.Textbox(label="Relevance Score"),
49
- gr.Textbox(label="NLI Raw Output")
50
- ],
51
- title="AI Answer Checker",
52
- description="Evaluate user answers against a Knowledge Base using Semantic Similarity and NLI.",
53
- examples=[
54
- ["Profits dropped by 5% in 2023.", "Was the company more profitable?", "Yes, it was much more profitable."],
55
- ["Michael Collins stayed in the command module while Neil walked on the moon.", "What happened to Michael Collins?", "He stayed in the command module."]
56
- ]
57
- )
 
 
58
 
59
  if __name__ == "__main__":
60
  demo.launch()
 
3
  import torch.nn.functional as F
4
  from sentence_transformers import SentenceTransformer, CrossEncoder, util
5
 
6
+ # Force CPU usage for the Free Tier
7
+ device = "cpu"
8
+
9
+ # Load models
10
+ print("Loading models on CPU...")
11
+ sim_model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
12
+ nli_model = CrossEncoder('cross-encoder/nli-distilroberta-base', device=device)
13
 
14
  def evaluate_response(kb, question, user_answer):
15
  # --- GATE 1: RELEVANCE ---
16
+ q_emb = sim_model.encode(question, convert_to_tensor=True, device=device)
17
+ a_emb = sim_model.encode(user_answer, convert_to_tensor=True, device=device)
18
  relevance_score = util.cos_sim(q_emb, a_emb).item()
19
 
20
  # --- GATE 2: FACTUALITY ---
 
30
  # --- DECISION LOGIC ---
31
  if verdict == "CONTRADICTION" and confidence > 60:
32
  status = "❌ INCORRECT (Fact Mismatch)"
33
+ color = "#ff4b4b"
34
  elif verdict == "ENTAILMENT" and confidence > 45:
35
  status = "βœ… CORRECT (Directly Supported)"
36
+ color = "#2ecc71"
37
  elif relevance_score > 0.30 and verdict != "CONTRADICTION":
38
  status = "βœ… CORRECT (Inferred)"
39
+ color = "#f1c40f"
40
  else:
41
  status = "❌ IRRELEVANT / WRONG"
42
+ color = "#95a5a6"
43
 
44
  return status, f"{relevance_score:.2f}", f"{verdict} ({confidence:.1f}%)"
45
 
46
+ # Interactive UI
47
+ with gr.Blocks(title="AI Answer Checker") as demo:
48
+ gr.Markdown("# 🧠 Smart Answer Verifier")
49
+ gr.Markdown("Test how well an answer matches the context provided.")
50
+
51
+ with gr.Row():
52
+ with gr.Column():
53
+ kb_input = gr.Textbox(label="Knowledge Base (Context)", placeholder="Paste your text here...", lines=6)
54
+ q_input = gr.Textbox(label="The Question", placeholder="What do you want to ask?")
55
+ ans_input = gr.Textbox(label="User's Answer", placeholder="What did the user say?")
56
+ btn = gr.Button("Analyze Answer", variant="primary")
57
+
58
+ with gr.Column():
59
+ verdict_out = gr.Textbox(label="Final Verdict")
60
+ rel_out = gr.Label(label="Relevance Score (0 to 1)")
61
+ nli_out = gr.Label(label="NLI Confidence")
62
+
63
+ btn.click(
64
+ fn=evaluate_response,
65
+ inputs=[kb_input, q_input, ans_input],
66
+ outputs=[verdict_out, rel_out, nli_out]
67
+ )
68
 
69
  if __name__ == "__main__":
70
  demo.launch()