Spaces:
Running
Running
File size: 2,104 Bytes
b6f9fa8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | import pytest
from src.modules.faithfulness import score_faithfulness
from src.modules.source_credibility import score_source_credibility
from src.modules.contradiction import score_contradiction
from src.evaluation.aggregator import aggregate
def test_source_credibility():
chunks = [
{"chunk_id": "c1", "pub_type": "research_abstract", "title": "Mock Paper"},
{"chunk_id": "c2", "pub_type": "exam_question", "title": "Mock Exam Q"}
]
results = score_source_credibility(chunks)
assert results.score > 0.0
assert 0.3 <= results.score <= 0.5
assert results.details["chunk_count"] == 2
def test_faithfulness_nli():
res_entail = score_faithfulness(
answer="The sky is blue.",
context_docs=["The sky is colored blue today."]
)
assert res_entail.score >= 0.8
res_contra = score_faithfulness(
answer="The sky is red.",
context_docs=["The sky is completely blue and not red."]
)
assert res_contra.score <= 0.2
def test_aggregator_logic():
# Mock config
test_cfg = {
"evaluation": {
"weights": {
"faithfulness": 0.4,
"entity_accuracy": 0.2,
"source_credibility": 0.2,
"contradiction_risk": 0.2,
"ragas_composite": 0.0
}
}
}
module_results = {
"faithfulness": {"score": 1.0},
"entity_verifier": {"score": 1.0},
"source_credibility": {"score": 0.5},
"contradiction": {"score": 1.0},
}
class MockResult:
def __init__(self, score, error=None):
self.score = score
self.error = error
self.latency_ms = 10
res = aggregate(
faithfulness_result=MockResult(1.0),
entity_result=MockResult(1.0),
source_result=MockResult(0.5),
contradiction_result=MockResult(1.0),
weights=test_cfg["evaluation"]["weights"]
)
assert abs(res.score - 0.9) < 0.01
assert res.details["hrs"] == 10
assert res.details["risk_band"] == "LOW"
|