Update app.py
Browse files
app.py
CHANGED
|
@@ -3,14 +3,18 @@ import torch
|
|
| 3 |
import torch.nn.functional as F
|
| 4 |
from sentence_transformers import SentenceTransformer, CrossEncoder, util
|
| 5 |
|
| 6 |
-
#
|
| 7 |
-
|
| 8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
def evaluate_response(kb, question, user_answer):
|
| 11 |
# --- GATE 1: RELEVANCE ---
|
| 12 |
-
q_emb = sim_model.encode(question, convert_to_tensor=True)
|
| 13 |
-
a_emb = sim_model.encode(user_answer, convert_to_tensor=True)
|
| 14 |
relevance_score = util.cos_sim(q_emb, a_emb).item()
|
| 15 |
|
| 16 |
# --- GATE 2: FACTUALITY ---
|
|
@@ -26,35 +30,41 @@ def evaluate_response(kb, question, user_answer):
|
|
| 26 |
# --- DECISION LOGIC ---
|
| 27 |
if verdict == "CONTRADICTION" and confidence > 60:
|
| 28 |
status = "β INCORRECT (Fact Mismatch)"
|
|
|
|
| 29 |
elif verdict == "ENTAILMENT" and confidence > 45:
|
| 30 |
status = "β
CORRECT (Directly Supported)"
|
|
|
|
| 31 |
elif relevance_score > 0.30 and verdict != "CONTRADICTION":
|
| 32 |
status = "β
CORRECT (Inferred)"
|
|
|
|
| 33 |
else:
|
| 34 |
status = "β IRRELEVANT / WRONG"
|
|
|
|
| 35 |
|
| 36 |
return status, f"{relevance_score:.2f}", f"{verdict} ({confidence:.1f}%)"
|
| 37 |
|
| 38 |
-
#
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
gr.
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
|
|
|
|
|
|
| 58 |
|
| 59 |
if __name__ == "__main__":
|
| 60 |
demo.launch()
|
|
|
|
| 3 |
import torch.nn.functional as F
|
| 4 |
from sentence_transformers import SentenceTransformer, CrossEncoder, util
|
| 5 |
|
| 6 |
+
# Force CPU usage for the Free Tier
|
| 7 |
+
device = "cpu"
|
| 8 |
+
|
| 9 |
+
# Load models
|
| 10 |
+
print("Loading models on CPU...")
|
| 11 |
+
sim_model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
|
| 12 |
+
nli_model = CrossEncoder('cross-encoder/nli-distilroberta-base', device=device)
|
| 13 |
|
| 14 |
def evaluate_response(kb, question, user_answer):
|
| 15 |
# --- GATE 1: RELEVANCE ---
|
| 16 |
+
q_emb = sim_model.encode(question, convert_to_tensor=True, device=device)
|
| 17 |
+
a_emb = sim_model.encode(user_answer, convert_to_tensor=True, device=device)
|
| 18 |
relevance_score = util.cos_sim(q_emb, a_emb).item()
|
| 19 |
|
| 20 |
# --- GATE 2: FACTUALITY ---
|
|
|
|
| 30 |
# --- DECISION LOGIC ---
|
| 31 |
if verdict == "CONTRADICTION" and confidence > 60:
|
| 32 |
status = "β INCORRECT (Fact Mismatch)"
|
| 33 |
+
color = "#ff4b4b"
|
| 34 |
elif verdict == "ENTAILMENT" and confidence > 45:
|
| 35 |
status = "β
CORRECT (Directly Supported)"
|
| 36 |
+
color = "#2ecc71"
|
| 37 |
elif relevance_score > 0.30 and verdict != "CONTRADICTION":
|
| 38 |
status = "β
CORRECT (Inferred)"
|
| 39 |
+
color = "#f1c40f"
|
| 40 |
else:
|
| 41 |
status = "β IRRELEVANT / WRONG"
|
| 42 |
+
color = "#95a5a6"
|
| 43 |
|
| 44 |
return status, f"{relevance_score:.2f}", f"{verdict} ({confidence:.1f}%)"
|
| 45 |
|
| 46 |
+
# Interactive UI
|
| 47 |
+
with gr.Blocks(title="AI Answer Checker") as demo:
|
| 48 |
+
gr.Markdown("# π§ Smart Answer Verifier")
|
| 49 |
+
gr.Markdown("Test how well an answer matches the context provided.")
|
| 50 |
+
|
| 51 |
+
with gr.Row():
|
| 52 |
+
with gr.Column():
|
| 53 |
+
kb_input = gr.Textbox(label="Knowledge Base (Context)", placeholder="Paste your text here...", lines=6)
|
| 54 |
+
q_input = gr.Textbox(label="The Question", placeholder="What do you want to ask?")
|
| 55 |
+
ans_input = gr.Textbox(label="User's Answer", placeholder="What did the user say?")
|
| 56 |
+
btn = gr.Button("Analyze Answer", variant="primary")
|
| 57 |
+
|
| 58 |
+
with gr.Column():
|
| 59 |
+
verdict_out = gr.Textbox(label="Final Verdict")
|
| 60 |
+
rel_out = gr.Label(label="Relevance Score (0 to 1)")
|
| 61 |
+
nli_out = gr.Label(label="NLI Confidence")
|
| 62 |
+
|
| 63 |
+
btn.click(
|
| 64 |
+
fn=evaluate_response,
|
| 65 |
+
inputs=[kb_input, q_input, ans_input],
|
| 66 |
+
outputs=[verdict_out, rel_out, nli_out]
|
| 67 |
+
)
|
| 68 |
|
| 69 |
if __name__ == "__main__":
|
| 70 |
demo.launch()
|