Spaces:

vxa8502
/

Sage

Running

App Files Files Community

vxa8502 commited on Feb 23

Commit

cfb4413

1 Parent(s): 866804f

Harden eval loader

Browse files

Files changed (2) hide show

sage/data/eval.py +59 -9
tests/test_eval.py +190 -0

sage/data/eval.py CHANGED Viewed

@@ -20,17 +20,67 @@ def load_eval_cases(filename: str) -> list[EvalCase]:
     Returns:
         List of EvalCase objects.
     """
     filepath = EVAL_DIR / filename
-    with open(filepath) as f:
-        data = json.load(f)
-    return [
-        EvalCase(
-            query=d["query"],
-            relevant_items=d["relevant_items"],
-            user_id=d.get("user_id"),
         )
-        for d in data
-    ]

     Returns:
         List of EvalCase objects.
+    Raises:
+        FileNotFoundError: If the file does not exist.
+        ValueError: If JSON is invalid or data fails validation.
     """
     filepath = EVAL_DIR / filename
+    # Load JSON with helpful error messages
+    try:
+        with open(filepath) as f:
+            data = json.load(f)
+    except FileNotFoundError:
+        raise FileNotFoundError(f"Evaluation file not found: {filepath}")
+    except json.JSONDecodeError as e:
+        raise ValueError(
+            f"Invalid JSON format in evaluation file: {filepath} "
+            f"(line {e.lineno}, column {e.colno})"
+        )
+    # Handle empty list gracefully
+    if not data:
+        return []
+    # Validate structure is a list
+    if not isinstance(data, list):
+        raise ValueError(
+            f"Evaluation file must contain a JSON array, got {type(data).__name__}: "
+            f"{filepath}"
         )
+    # Validate each case
+    cases = []
+    for i, d in enumerate(data):
+        # Check required fields
+        if "query" not in d:
+            raise ValueError(f"Missing 'query' field in case {i}: {filepath}")
+        if "relevant_items" not in d:
+            raise ValueError(f"Missing 'relevant_items' field in case {i}: {filepath}")
+        # Validate relevant_items is a dict
+        relevant_items = d["relevant_items"]
+        if not isinstance(relevant_items, dict):
+            raise ValueError(
+                f"'relevant_items' must be a dict in case {i}, "
+                f"got {type(relevant_items).__name__}: {filepath}"
+            )
+        # Validate relevance scores are numeric
+        for product_id, score in relevant_items.items():
+            if not isinstance(score, (int, float)):
+                raise ValueError(
+                    f"Relevance score for '{product_id}' must be numeric in case {i}, "
+                    f"got {type(score).__name__}: '{score}'"
+                )
+        cases.append(
+            EvalCase(
+                query=d["query"],
+                relevant_items=relevant_items,
+                user_id=d.get("user_id"),
+            )
+        )
+    return cases

tests/test_eval.py ADDED Viewed

	@@ -0,0 +1,190 @@

+"""Tests for sage.data.eval — evaluation dataset loading utilities."""
+import json
+import pytest
+from sage.data.eval import load_eval_cases
+class TestLoadEvalCases:
+    """Tests for load_eval_cases function."""
+    def test_valid_cases(self, tmp_path, monkeypatch):
+        """Valid JSON file returns list of EvalCase objects."""
+        monkeypatch.setattr("sage.data.eval.EVAL_DIR", tmp_path)
+        data = [
+            {
+                "query": "wireless headphones",
+                "relevant_items": {"B001": 3.0, "B002": 2.0},
+            },
+            {"query": "bluetooth speaker", "relevant_items": {"B003": 1.0}},
+        ]
+        (tmp_path / "test.json").write_text(json.dumps(data))
+        cases = load_eval_cases("test.json")
+        assert len(cases) == 2
+        assert cases[0].query == "wireless headphones"
+        assert cases[0].relevant_items == {"B001": 3.0, "B002": 2.0}
+        assert cases[1].query == "bluetooth speaker"
+    def test_empty_list_returns_empty(self, tmp_path, monkeypatch):
+        """Empty JSON array returns empty list without error."""
+        monkeypatch.setattr("sage.data.eval.EVAL_DIR", tmp_path)
+        (tmp_path / "empty.json").write_text("[]")
+        cases = load_eval_cases("empty.json")
+        assert cases == []
+    def test_file_not_found_raises_clear_error(self, tmp_path, monkeypatch):
+        """Missing file raises FileNotFoundError with filepath context."""
+        monkeypatch.setattr("sage.data.eval.EVAL_DIR", tmp_path)
+        with pytest.raises(FileNotFoundError, match="Evaluation file not found"):
+            load_eval_cases("nonexistent.json")
+    def test_invalid_json_raises_clear_error(self, tmp_path, monkeypatch):
+        """Invalid JSON raises ValueError with line/column info."""
+        monkeypatch.setattr("sage.data.eval.EVAL_DIR", tmp_path)
+        (tmp_path / "bad.json").write_text("{invalid json")
+        with pytest.raises(ValueError, match="Invalid JSON format"):
+            load_eval_cases("bad.json")
+    def test_not_array_raises_error(self, tmp_path, monkeypatch):
+        """JSON object (not array) raises ValueError."""
+        monkeypatch.setattr("sage.data.eval.EVAL_DIR", tmp_path)
+        (tmp_path / "object.json").write_text('{"query": "test"}')
+        with pytest.raises(ValueError, match="must contain a JSON array"):
+            load_eval_cases("object.json")
+    def test_missing_query_first_case(self, tmp_path, monkeypatch):
+        """Missing query in first case raises ValueError with index."""
+        monkeypatch.setattr("sage.data.eval.EVAL_DIR", tmp_path)
+        data = [{"relevant_items": {"B001": 1.0}}]
+        (tmp_path / "test.json").write_text(json.dumps(data))
+        with pytest.raises(ValueError, match="Missing 'query' field in case 0"):
+            load_eval_cases("test.json")
+    def test_missing_query_later_case(self, tmp_path, monkeypatch):
+        """Missing query in later case raises ValueError with correct index."""
+        monkeypatch.setattr("sage.data.eval.EVAL_DIR", tmp_path)
+        data = [
+            {"query": "valid", "relevant_items": {"B001": 1.0}},
+            {"query": "also valid", "relevant_items": {"B002": 2.0}},
+            {"relevant_items": {"B003": 3.0}},  # Missing query at index 2
+        ]
+        (tmp_path / "test.json").write_text(json.dumps(data))
+        with pytest.raises(ValueError, match="Missing 'query' field in case 2"):
+            load_eval_cases("test.json")
+    def test_missing_relevant_items(self, tmp_path, monkeypatch):
+        """Missing relevant_items raises ValueError with index."""
+        monkeypatch.setattr("sage.data.eval.EVAL_DIR", tmp_path)
+        data = [{"query": "test query"}]
+        (tmp_path / "test.json").write_text(json.dumps(data))
+        with pytest.raises(
+            ValueError, match="Missing 'relevant_items' field in case 0"
+        ):
+            load_eval_cases("test.json")
+    def test_relevant_items_not_dict(self, tmp_path, monkeypatch):
+        """relevant_items as list raises ValueError."""
+        monkeypatch.setattr("sage.data.eval.EVAL_DIR", tmp_path)
+        data = [{"query": "test", "relevant_items": ["B001", "B002"]}]
+        (tmp_path / "test.json").write_text(json.dumps(data))
+        with pytest.raises(ValueError, match="'relevant_items' must be a dict"):
+            load_eval_cases("test.json")
+    def test_relevance_score_not_numeric(self, tmp_path, monkeypatch):
+        """Non-numeric relevance score raises ValueError with product ID."""
+        monkeypatch.setattr("sage.data.eval.EVAL_DIR", tmp_path)
+        data = [{"query": "test", "relevant_items": {"B001": "high"}}]
+        (tmp_path / "test.json").write_text(json.dumps(data))
+        with pytest.raises(
+            ValueError, match="Relevance score for 'B001' must be numeric"
+        ):
+            load_eval_cases("test.json")
+    def test_relevance_score_as_int_accepted(self, tmp_path, monkeypatch):
+        """Integer relevance scores are accepted."""
+        monkeypatch.setattr("sage.data.eval.EVAL_DIR", tmp_path)
+        data = [{"query": "test", "relevant_items": {"B001": 3}}]
+        (tmp_path / "test.json").write_text(json.dumps(data))
+        cases = load_eval_cases("test.json")
+        assert cases[0].relevant_items["B001"] == 3
+    def test_user_id_optional(self, tmp_path, monkeypatch):
+        """user_id field is optional."""
+        monkeypatch.setattr("sage.data.eval.EVAL_DIR", tmp_path)
+        data = [{"query": "test", "relevant_items": {"B001": 1.0}}]
+        (tmp_path / "test.json").write_text(json.dumps(data))
+        cases = load_eval_cases("test.json")
+        assert cases[0].user_id is None
+    def test_user_id_preserved(self, tmp_path, monkeypatch):
+        """user_id field is preserved when present."""
+        monkeypatch.setattr("sage.data.eval.EVAL_DIR", tmp_path)
+        data = [{"query": "test", "relevant_items": {"B001": 1.0}, "user_id": "U123"}]
+        (tmp_path / "test.json").write_text(json.dumps(data))
+        cases = load_eval_cases("test.json")
+        assert cases[0].user_id == "U123"
+    def test_extra_fields_ignored(self, tmp_path, monkeypatch):
+        """Extra fields (category, intent) are ignored without error."""
+        monkeypatch.setattr("sage.data.eval.EVAL_DIR", tmp_path)
+        data = [
+            {
+                "query": "smart speaker",
+                "relevant_items": {"B001": 3.0},
+                "category": "echo_devices",
+                "intent": "feature_specific",
+            }
+        ]
+        (tmp_path / "test.json").write_text(json.dumps(data))
+        cases = load_eval_cases("test.json")
+        assert len(cases) == 1
+        assert cases[0].query == "smart speaker"
+    def test_relevant_set_works_after_load(self, tmp_path, monkeypatch):
+        """Loaded cases have working relevant_set property."""
+        monkeypatch.setattr("sage.data.eval.EVAL_DIR", tmp_path)
+        data = [
+            {"query": "test", "relevant_items": {"B001": 3.0, "B002": 0.0, "B003": 1.0}}
+        ]
+        (tmp_path / "test.json").write_text(json.dumps(data))
+        cases = load_eval_cases("test.json")
+        # relevant_set should only include items with score > 0
+        assert cases[0].relevant_set == {"B001", "B003"}