File size: 2,899 Bytes
e71fabd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import pytest
import tempfile
import os
from core.eval import RAGEvaluator
from core.retrieval import RAGManager
from core.index import VectorStore
from core.utils import Chunk, generate_id

class TestRAGEvaluator:
    def test_calculate_hit_at_k_exact_match(self):
        """Test Hit@k calculation with exact match"""
        persist_dir = tempfile.mkdtemp()
        rag_manager = RAGManager(persist_directory=persist_dir)
        evaluator = RAGEvaluator(rag_manager)
        
        retrieved = ["This is a test document about emergency procedures"]
        ground_truth = ["This is a test document about emergency procedures"]
        hit = evaluator._calculate_hit_at_k(retrieved, ground_truth, k=1)
        
        assert hit == 1.0
    
    def test_calculate_hit_at_k_no_match(self):
        """Test Hit@k when no match found"""
        persist_dir = tempfile.mkdtemp()
        rag_manager = RAGManager(persist_directory=persist_dir)
        evaluator = RAGEvaluator(rag_manager)
        
        retrieved = ["Unrelated document"]
        ground_truth = ["Expected document"]
        hit = evaluator._calculate_hit_at_k(retrieved, ground_truth, k=1)
        
        assert hit == 0.0
    
    def test_calculate_mrr(self):
        """Test MRR calculation"""
        persist_dir = tempfile.mkdtemp()
        rag_manager = RAGManager(persist_directory=persist_dir)
        evaluator = RAGEvaluator(rag_manager)
        
        # Mock documents_match to return True for first match
        retrieved = ["Wrong doc", "Correct doc"]
        ground_truth = ["Correct doc"]
        
        # Use semantic matching - will use embeddings
        # For this test, we'll just verify the method exists and handles input
        mrr = evaluator._calculate_mrr(retrieved, ground_truth)
        assert isinstance(mrr, float)
        assert 0.0 <= mrr <= 1.0
    
    def test_documents_match(self):
        """Test document matching logic"""
        persist_dir = tempfile.mkdtemp()
        rag_manager = RAGManager(persist_directory=persist_dir)
        evaluator = RAGEvaluator(rag_manager)
        
        doc1 = "This is a test document about emergency procedures"
        doc2 = "This is a test document about emergency procedures"
        doc3 = "Completely different content about something else"
        
        # Should match exactly (same content should have high similarity)
        match_same = evaluator._documents_match(doc1, doc2)
        # match should be a boolean-like value (numpy.bool_ or bool)
        assert match_same == True or match_same == False
        
        # Since documents are identical, they should match
        assert bool(match_same) == True
        
        # Different documents might or might not match depending on threshold
        match_diff = evaluator._documents_match(doc1, doc3)
        assert bool(match_diff) == True or bool(match_diff) == False