File size: 3,329 Bytes
6bef416
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
from __future__ import annotations

import pandas as pd
import pytest

from src.data import DataBundle, DataContractError, schema_report, validate_bundle


# These row-count tests are packaged-artifact integrity snapshots.
# If the bundled synthetic CSVs are intentionally regenerated, update these expected values
# together with docs/artifact_manifest.md checksums.
def test_bundle_eval_row_count(bundle: DataBundle) -> None:
    assert len(bundle.eval_runs) == 3824


def test_bundle_retrieval_row_count(bundle: DataBundle) -> None:
    assert len(bundle.retrieval_events) == 93375


def test_bundle_documents_row_count(bundle: DataBundle) -> None:
    assert len(bundle.documents) == 658


def test_bundle_chunks_row_count(bundle: DataBundle) -> None:
    assert len(bundle.chunks) == 5237


def test_bundle_scenarios_row_count(bundle: DataBundle) -> None:
    assert len(bundle.scenarios) == 62


def test_schema_report_all_pass(bundle: DataBundle) -> None:
    report = schema_report(bundle)
    assert set(report["status"]) == {"pass"}


def test_eval_example_id_unique(bundle: DataBundle) -> None:
    assert bundle.eval_runs["example_id"].is_unique


def test_document_id_unique(bundle: DataBundle) -> None:
    assert bundle.documents["doc_id"].is_unique


def test_chunk_id_unique(bundle: DataBundle) -> None:
    assert bundle.chunks["chunk_id"].is_unique


def test_scenario_id_unique(bundle: DataBundle) -> None:
    assert bundle.scenarios["scenario_id"].is_unique


def test_retrieval_examples_reference_eval_rows(bundle: DataBundle) -> None:
    assert set(bundle.retrieval_events["example_id"].astype(str)).issubset(set(bundle.eval_runs["example_id"].astype(str)))


def test_retrieval_chunks_reference_chunk_table(bundle: DataBundle) -> None:
    assert set(bundle.retrieval_events["chunk_id"].astype(str)).issubset(set(bundle.chunks["chunk_id"].astype(str)))


def test_chunks_reference_documents(bundle: DataBundle) -> None:
    assert set(bundle.chunks["doc_id"].astype(str)).issubset(set(bundle.documents["doc_id"].astype(str)))


def test_eval_runs_reference_scenarios(bundle: DataBundle) -> None:
    assert set(bundle.eval_runs["scenario_id"].astype(str)).issubset(set(bundle.scenarios["scenario_id"].astype(str)))


def test_metric_rates_stay_in_unit_interval(bundle: DataBundle) -> None:
    for col in ["is_correct", "hallucination_flag", "recall_at_5", "recall_at_10", "mrr_at_10"]:
        assert pd.to_numeric(bundle.eval_runs[col], errors="coerce").dropna().between(0, 1).all()


def test_validate_bundle_rejects_broken_retrieval_reference(bundle: DataBundle) -> None:
    broken = bundle.retrieval_events.copy()
    broken.loc[broken.index[0], "chunk_id"] = "missing_chunk"
    candidate = DataBundle(bundle.eval_runs, broken, bundle.documents, bundle.chunks, bundle.scenarios, bundle.dictionary)
    with pytest.raises(DataContractError):
        validate_bundle(candidate)


def test_validate_bundle_rejects_duplicate_example_id(bundle: DataBundle) -> None:
    broken = bundle.eval_runs.copy()
    broken.loc[broken.index[1], "example_id"] = broken.loc[broken.index[0], "example_id"]
    candidate = DataBundle(broken, bundle.retrieval_events, bundle.documents, bundle.chunks, bundle.scenarios, bundle.dictionary)
    with pytest.raises(DataContractError):
        validate_bundle(candidate)