File size: 1,508 Bytes
f4c70c8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
from src.llm.llm_client import LLMClient

class HallucinationGrader:
    def __init__(self, llm_client: LLMClient):
        self.llm = llm_client

    def grade(self, context: str, answer: str) -> dict:
        """
        Returns hallucination score based on token overlap.
        """
        # 1. Check for refusal
        if "not enough information" in answer.lower():
            return {"score": 0.0, "grounded": True}

        # 2. Key Term Overlap
        # Normalize and tokenize
        def tokenize(text):
            import re
            text = text.lower()
            tokens = re.findall(r'\w+', text)
            # Remove stopwords
            stop_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'is', 'are', 'was', 'were'}
            return set([t for t in tokens if t not in stop_words])

        answer_tokens = tokenize(answer)
        context_tokens = tokenize(context)

        if not answer_tokens:
             return {"score": 0.1, "grounded": True} # Default for empty answer

        # Calculate overlap
        intersection = answer_tokens.intersection(context_tokens)
        overlap_ratio = len(intersection) / len(answer_tokens)
        
        # User Rule: if overlap < 0.25 -> 1.0 (Hallucination)
        # Else -> 0.1 (Grounded) -- User requested 0.1 specifically
        
        if overlap_ratio < 0.25:
             return {"score": 1.0, "grounded": False}
        else:
             return {"score": 0.1, "grounded": True}