Spaces:

softblackhole
/

rag-evaluation-system

Sleeping

File size: 2,899 Bytes

e71fabd

import pytest
import tempfile
import os
from core.eval import RAGEvaluator
from core.retrieval import RAGManager
from core.index import VectorStore
from core.utils import Chunk, generate_id

class TestRAGEvaluator:
    def test_calculate_hit_at_k_exact_match(self):
        """Test Hit@k calculation with exact match"""
        persist_dir = tempfile.mkdtemp()
        rag_manager = RAGManager(persist_directory=persist_dir)
        evaluator = RAGEvaluator(rag_manager)
        
        retrieved = ["This is a test document about emergency procedures"]
        ground_truth = ["This is a test document about emergency procedures"]
        hit = evaluator._calculate_hit_at_k(retrieved, ground_truth, k=1)
        
        assert hit == 1.0
    
    def test_calculate_hit_at_k_no_match(self):
        """Test Hit@k when no match found"""
        persist_dir = tempfile.mkdtemp()
        rag_manager = RAGManager(persist_directory=persist_dir)
        evaluator = RAGEvaluator(rag_manager)
        
        retrieved = ["Unrelated document"]
        ground_truth = ["Expected document"]
        hit = evaluator._calculate_hit_at_k(retrieved, ground_truth, k=1)
        
        assert hit == 0.0
    
    def test_calculate_mrr(self):
        """Test MRR calculation"""
        persist_dir = tempfile.mkdtemp()
        rag_manager = RAGManager(persist_directory=persist_dir)
        evaluator = RAGEvaluator(rag_manager)
        
        # Mock documents_match to return True for first match
        retrieved = ["Wrong doc", "Correct doc"]
        ground_truth = ["Correct doc"]
        
        # Use semantic matching - will use embeddings
        # For this test, we'll just verify the method exists and handles input
        mrr = evaluator._calculate_mrr(retrieved, ground_truth)
        assert isinstance(mrr, float)
        assert 0.0 <= mrr <= 1.0
    
    def test_documents_match(self):
        """Test document matching logic"""
        persist_dir = tempfile.mkdtemp()
        rag_manager = RAGManager(persist_directory=persist_dir)
        evaluator = RAGEvaluator(rag_manager)
        
        doc1 = "This is a test document about emergency procedures"
        doc2 = "This is a test document about emergency procedures"
        doc3 = "Completely different content about something else"
        
        # Should match exactly (same content should have high similarity)
        match_same = evaluator._documents_match(doc1, doc2)
        # match should be a boolean-like value (numpy.bool_ or bool)
        assert match_same == True or match_same == False
        
        # Since documents are identical, they should match
        assert bool(match_same) == True
        
        # Different documents might or might not match depending on threshold
        match_diff = evaluator._documents_match(doc1, doc3)
        assert bool(match_diff) == True or bool(match_diff) == False