| """Tests for sage.data.eval — evaluation dataset loading utilities.""" |
|
|
| import json |
|
|
| import pytest |
|
|
| from sage.data.eval import load_eval_cases |
|
|
|
|
| class TestLoadEvalCases: |
| """Tests for load_eval_cases function.""" |
|
|
| def test_valid_cases(self, tmp_path, monkeypatch): |
| """Valid JSON file returns list of EvalCase objects.""" |
| monkeypatch.setattr("sage.data.eval.EVAL_DIR", tmp_path) |
|
|
| data = [ |
| { |
| "query": "wireless headphones", |
| "relevant_items": {"B001": 3.0, "B002": 2.0}, |
| }, |
| {"query": "bluetooth speaker", "relevant_items": {"B003": 1.0}}, |
| ] |
| (tmp_path / "test.json").write_text(json.dumps(data)) |
|
|
| cases = load_eval_cases("test.json") |
|
|
| assert len(cases) == 2 |
| assert cases[0].query == "wireless headphones" |
| assert cases[0].relevant_items == {"B001": 3.0, "B002": 2.0} |
| assert cases[1].query == "bluetooth speaker" |
|
|
| def test_empty_list_returns_empty(self, tmp_path, monkeypatch): |
| """Empty JSON array returns empty list without error.""" |
| monkeypatch.setattr("sage.data.eval.EVAL_DIR", tmp_path) |
|
|
| (tmp_path / "empty.json").write_text("[]") |
|
|
| cases = load_eval_cases("empty.json") |
|
|
| assert cases == [] |
|
|
| def test_file_not_found_raises_clear_error(self, tmp_path, monkeypatch): |
| """Missing file raises FileNotFoundError with filepath context.""" |
| monkeypatch.setattr("sage.data.eval.EVAL_DIR", tmp_path) |
|
|
| with pytest.raises(FileNotFoundError, match="Evaluation file not found"): |
| load_eval_cases("nonexistent.json") |
|
|
| def test_invalid_json_raises_clear_error(self, tmp_path, monkeypatch): |
| """Invalid JSON raises ValueError with line/column info.""" |
| monkeypatch.setattr("sage.data.eval.EVAL_DIR", tmp_path) |
|
|
| (tmp_path / "bad.json").write_text("{invalid json") |
|
|
| with pytest.raises(ValueError, match="Invalid JSON format"): |
| load_eval_cases("bad.json") |
|
|
| def test_not_array_raises_error(self, tmp_path, monkeypatch): |
| """JSON object (not array) raises ValueError.""" |
| monkeypatch.setattr("sage.data.eval.EVAL_DIR", tmp_path) |
|
|
| (tmp_path / "object.json").write_text('{"query": "test"}') |
|
|
| with pytest.raises(ValueError, match="must contain a JSON array"): |
| load_eval_cases("object.json") |
|
|
| def test_missing_query_first_case(self, tmp_path, monkeypatch): |
| """Missing query in first case raises ValueError with index.""" |
| monkeypatch.setattr("sage.data.eval.EVAL_DIR", tmp_path) |
|
|
| data = [{"relevant_items": {"B001": 1.0}}] |
| (tmp_path / "test.json").write_text(json.dumps(data)) |
|
|
| with pytest.raises(ValueError, match="Missing 'query' field in case 0"): |
| load_eval_cases("test.json") |
|
|
| def test_missing_query_later_case(self, tmp_path, monkeypatch): |
| """Missing query in later case raises ValueError with correct index.""" |
| monkeypatch.setattr("sage.data.eval.EVAL_DIR", tmp_path) |
|
|
| data = [ |
| {"query": "valid", "relevant_items": {"B001": 1.0}}, |
| {"query": "also valid", "relevant_items": {"B002": 2.0}}, |
| {"relevant_items": {"B003": 3.0}}, |
| ] |
| (tmp_path / "test.json").write_text(json.dumps(data)) |
|
|
| with pytest.raises(ValueError, match="Missing 'query' field in case 2"): |
| load_eval_cases("test.json") |
|
|
| def test_missing_relevant_items(self, tmp_path, monkeypatch): |
| """Missing relevant_items raises ValueError with index.""" |
| monkeypatch.setattr("sage.data.eval.EVAL_DIR", tmp_path) |
|
|
| data = [{"query": "test query"}] |
| (tmp_path / "test.json").write_text(json.dumps(data)) |
|
|
| with pytest.raises( |
| ValueError, match="Missing 'relevant_items' field in case 0" |
| ): |
| load_eval_cases("test.json") |
|
|
| def test_relevant_items_not_dict(self, tmp_path, monkeypatch): |
| """relevant_items as list raises ValueError.""" |
| monkeypatch.setattr("sage.data.eval.EVAL_DIR", tmp_path) |
|
|
| data = [{"query": "test", "relevant_items": ["B001", "B002"]}] |
| (tmp_path / "test.json").write_text(json.dumps(data)) |
|
|
| with pytest.raises(ValueError, match="'relevant_items' must be a dict"): |
| load_eval_cases("test.json") |
|
|
| def test_relevance_score_not_numeric(self, tmp_path, monkeypatch): |
| """Non-numeric relevance score raises ValueError with product ID.""" |
| monkeypatch.setattr("sage.data.eval.EVAL_DIR", tmp_path) |
|
|
| data = [{"query": "test", "relevant_items": {"B001": "high"}}] |
| (tmp_path / "test.json").write_text(json.dumps(data)) |
|
|
| with pytest.raises( |
| ValueError, match="Relevance score for 'B001' must be numeric" |
| ): |
| load_eval_cases("test.json") |
|
|
| def test_relevance_score_as_int_accepted(self, tmp_path, monkeypatch): |
| """Integer relevance scores are accepted.""" |
| monkeypatch.setattr("sage.data.eval.EVAL_DIR", tmp_path) |
|
|
| data = [{"query": "test", "relevant_items": {"B001": 3}}] |
| (tmp_path / "test.json").write_text(json.dumps(data)) |
|
|
| cases = load_eval_cases("test.json") |
|
|
| assert cases[0].relevant_items["B001"] == 3 |
|
|
| def test_user_id_optional(self, tmp_path, monkeypatch): |
| """user_id field is optional.""" |
| monkeypatch.setattr("sage.data.eval.EVAL_DIR", tmp_path) |
|
|
| data = [{"query": "test", "relevant_items": {"B001": 1.0}}] |
| (tmp_path / "test.json").write_text(json.dumps(data)) |
|
|
| cases = load_eval_cases("test.json") |
|
|
| assert cases[0].user_id is None |
|
|
| def test_user_id_preserved(self, tmp_path, monkeypatch): |
| """user_id field is preserved when present.""" |
| monkeypatch.setattr("sage.data.eval.EVAL_DIR", tmp_path) |
|
|
| data = [{"query": "test", "relevant_items": {"B001": 1.0}, "user_id": "U123"}] |
| (tmp_path / "test.json").write_text(json.dumps(data)) |
|
|
| cases = load_eval_cases("test.json") |
|
|
| assert cases[0].user_id == "U123" |
|
|
| def test_extra_fields_ignored(self, tmp_path, monkeypatch): |
| """Extra fields (category, intent) are ignored without error.""" |
| monkeypatch.setattr("sage.data.eval.EVAL_DIR", tmp_path) |
|
|
| data = [ |
| { |
| "query": "smart speaker", |
| "relevant_items": {"B001": 3.0}, |
| "category": "echo_devices", |
| "intent": "feature_specific", |
| } |
| ] |
| (tmp_path / "test.json").write_text(json.dumps(data)) |
|
|
| cases = load_eval_cases("test.json") |
|
|
| assert len(cases) == 1 |
| assert cases[0].query == "smart speaker" |
|
|
| def test_relevant_set_works_after_load(self, tmp_path, monkeypatch): |
| """Loaded cases have working relevant_set property.""" |
| monkeypatch.setattr("sage.data.eval.EVAL_DIR", tmp_path) |
|
|
| data = [ |
| {"query": "test", "relevant_items": {"B001": 3.0, "B002": 0.0, "B003": 1.0}} |
| ] |
| (tmp_path / "test.json").write_text(json.dumps(data)) |
|
|
| cases = load_eval_cases("test.json") |
|
|
| |
| assert cases[0].relevant_set == {"B001", "B003"} |
|
|