File size: 2,104 Bytes
b6f9fa8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import pytest
from src.modules.faithfulness import score_faithfulness
from src.modules.source_credibility import score_source_credibility
from src.modules.contradiction import score_contradiction
from src.evaluation.aggregator import aggregate

def test_source_credibility():
    chunks = [
        {"chunk_id": "c1", "pub_type": "research_abstract", "title": "Mock Paper"},
        {"chunk_id": "c2", "pub_type": "exam_question", "title": "Mock Exam Q"}
    ]
    results = score_source_credibility(chunks)
    assert results.score > 0.0
    assert 0.3 <= results.score <= 0.5
    assert results.details["chunk_count"] == 2

def test_faithfulness_nli():
    res_entail = score_faithfulness(
        answer="The sky is blue.",
        context_docs=["The sky is colored blue today."]
    )
    assert res_entail.score >= 0.8
    
    res_contra = score_faithfulness(
        answer="The sky is red.",
        context_docs=["The sky is completely blue and not red."]
    )
    assert res_contra.score <= 0.2

def test_aggregator_logic():
    # Mock config
    test_cfg = {
        "evaluation": {
            "weights": {
                "faithfulness": 0.4,
                "entity_accuracy": 0.2,
                "source_credibility": 0.2,
                "contradiction_risk": 0.2,
                "ragas_composite": 0.0
            }
        }
    }
    
    module_results = {
        "faithfulness": {"score": 1.0},
        "entity_verifier": {"score": 1.0},
        "source_credibility": {"score": 0.5},
        "contradiction": {"score": 1.0},
    }
    
    class MockResult:
        def __init__(self, score, error=None):
            self.score = score
            self.error = error
            self.latency_ms = 10
            
    res = aggregate(
        faithfulness_result=MockResult(1.0),
        entity_result=MockResult(1.0),
        source_result=MockResult(0.5),
        contradiction_result=MockResult(1.0),
        weights=test_cfg["evaluation"]["weights"]
    )
    assert abs(res.score - 0.9) < 0.01
    assert res.details["hrs"] == 10
    assert res.details["risk_band"] == "LOW"