aaditya-raj commited on
Commit
5972ae0
·
verified ·
1 Parent(s): 60f4d84

Update evaluator_module.py

Browse files
Files changed (1) hide show
  1. evaluator_module.py +39 -20
evaluator_module.py CHANGED
@@ -5,6 +5,7 @@ import json
5
  import torch
6
  from collections import defaultdict
7
  import spacy
 
8
  from transformers import pipeline
9
  from sentence_transformers import SentenceTransformer
10
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
@@ -35,6 +36,8 @@ class AetherScoreEvaluator:
35
  self.sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
36
 
37
  # for hallucination
 
 
38
  self.nli_tokenizer = AutoTokenizer.from_pretrained("prajjwal1/bert-mini-mnli")
39
  self.nli_model = AutoModelForSequenceClassification.from_pretrained("prajjwal1/bert-mini-mnli")
40
 
@@ -47,39 +50,55 @@ class AetherScoreEvaluator:
47
 
48
  def _evaluate_with_llm_judge(self, prompt: str, response: str) -> dict:
49
  """
50
- Hallucination detection using Embedding similarity + NLI.
51
- Returns scores + detailed explanations for both hallucination and assumption.
 
 
 
 
52
  """
53
- # Step 1: Embedding similarity (prompt → response)
54
  emb_sim = self._semantic_similarity(prompt, response)
55
-
56
- # Step 2: NLI check (entailment, neutral, contradiction)
57
- inputs = self.nli_tokenizer.encode_plus(
58
- prompt, response, return_tensors="pt", truncation=True
59
- )
60
  with torch.no_grad():
61
  logits = self.nli_model(**inputs).logits
62
  probs = torch.softmax(logits, dim=-1).cpu().numpy()[0]
63
  entailment, neutral, contradiction = probs[2], probs[1], probs[0]
64
-
65
- # Step 3: Compute hallucination & assumption
66
- halluc_score = 1 - ((entailment * 0.7) + (emb_sim * 0.3))
67
- assumption_score = 1 - halluc_score # alternative proxy
68
-
69
- # Step 4: Explanations
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  halluc_expl = (
71
- f"Embedding similarity={emb_sim:.2f}, "
72
- f"NLI entailment={entailment:.2f}, neutral={neutral:.2f}, contradiction={contradiction:.2f}"
73
  )
74
  assumption_expl = (
75
- f"Assumption control leverages NLI neutrality={neutral:.2f}. "
76
- "Higher neutrality indicates more uncertainty; lower means fewer assumptions."
77
  )
78
-
79
  return {
80
  "hallucination_score": (float(halluc_score), halluc_expl),
81
  "assumption_control": (float(assumption_score), assumption_expl),
82
- }
83
 
84
  # Single Evaluation # Inputs-->> Prompt, Agent Response, Expected Answer(Optional), Agent Name and Task type( General, QA, Summarizaton)etc
85
  def evaluate_single(self, prompt: str, response: str, expected_answer: Optional[str] = None, task_type: str = "general") -> Dict:
 
5
  import torch
6
  from collections import defaultdict
7
  import spacy
8
+ import evaluate
9
  from transformers import pipeline
10
  from sentence_transformers import SentenceTransformer
11
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
 
36
  self.sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
37
 
38
  # for hallucination
39
+ self.rouge = evaluate.load("rouge")
40
+ self.sacrebleu = evaluate.load("sacrebleu")
41
  self.nli_tokenizer = AutoTokenizer.from_pretrained("prajjwal1/bert-mini-mnli")
42
  self.nli_model = AutoModelForSequenceClassification.from_pretrained("prajjwal1/bert-mini-mnli")
43
 
 
50
 
51
  def _evaluate_with_llm_judge(self, prompt: str, response: str) -> dict:
52
  """
53
+ Hallucination detection using:
54
+ - NLI (entailment, neutral, contradiction)
55
+ - Embedding similarity
56
+ - ROUGE-L
57
+ - SacreBLEU
58
+ Assumption control derived from NLI neutrality.
59
  """
60
+ # Step 1: Embedding similarity
61
  emb_sim = self._semantic_similarity(prompt, response)
62
+
63
+ # Step 2: NLI inference
64
+ inputs = self.nli_tokenizer.encode_plus(prompt, response, return_tensors="pt", truncation=True)
 
 
65
  with torch.no_grad():
66
  logits = self.nli_model(**inputs).logits
67
  probs = torch.softmax(logits, dim=-1).cpu().numpy()[0]
68
  entailment, neutral, contradiction = probs[2], probs[1], probs[0]
69
+
70
+ # Step 3: ROUGE-L
71
+ rouge_l = self.rouge.compute(predictions=[response], references=[prompt])["rougeL"]
72
+
73
+ # Step 4: SacreBLEU (normalized 0–1)
74
+ sacrebleu = self.sacrebleu.compute(predictions=[response], references=[[prompt]])["score"] / 100.0
75
+
76
+ # Step 5: Weighted hallucination score
77
+ weights = {"entailment": 0.4, "embedding": 0.2, "rouge": 0.2, "sacrebleu": 0.2}
78
+ halluc_score = 1 - (
79
+ weights["entailment"] * entailment +
80
+ weights["embedding"] * emb_sim +
81
+ weights["rouge"] * rouge_l +
82
+ weights["sacrebleu"] * sacrebleu
83
+ )
84
+
85
+ # Step 6: Assumption control from neutrality
86
+ assumption_score = 1 - neutral
87
+
88
+ # Step 7: Explanations
89
  halluc_expl = (
90
+ f"Entailment={entailment:.2f}, Embedding={emb_sim:.2f}, "
91
+ f"ROUGE-L={rouge_l:.2f}, SacreBLEU={sacrebleu:.2f}, Neutral={neutral:.2f}"
92
  )
93
  assumption_expl = (
94
+ f"Assumption control is derived from NLI neutrality={neutral:.2f}. "
95
+ "Lower neutrality stronger confidence."
96
  )
97
+
98
  return {
99
  "hallucination_score": (float(halluc_score), halluc_expl),
100
  "assumption_control": (float(assumption_score), assumption_expl),
101
+ }
102
 
103
  # Single Evaluation # Inputs-->> Prompt, Agent Response, Expected Answer(Optional), Agent Name and Task type( General, QA, Summarizaton)etc
104
  def evaluate_single(self, prompt: str, response: str, expected_answer: Optional[str] = None, task_type: str = "general") -> Dict: