Spaces:
Running
Running
Revert "Fix CI: import scoring from evaluate_runner instead of openra_rl_util"
Browse filesThis reverts commit 50772fa898f5c685e89d08926d8cb201577c1376.
- evaluate.py +1 -1
- tests/test_evaluate.py +21 -21
evaluate.py
CHANGED
|
@@ -37,7 +37,7 @@ from pathlib import Path
|
|
| 37 |
from typing import Any, Dict, List
|
| 38 |
from urllib.request import urlopen
|
| 39 |
|
| 40 |
-
from
|
| 41 |
|
| 42 |
# Evaluation results file
|
| 43 |
RESULTS_FILE = Path(__file__).parent / "data" / "results.csv"
|
|
|
|
| 37 |
from typing import Any, Dict, List
|
| 38 |
from urllib.request import urlopen
|
| 39 |
|
| 40 |
+
from openra_rl_util.rubrics import compute_composite_score_from_games, compute_game_metrics
|
| 41 |
|
| 42 |
# Evaluation results file
|
| 43 |
RESULTS_FILE = Path(__file__).parent / "data" / "results.csv"
|
tests/test_evaluate.py
CHANGED
|
@@ -154,24 +154,24 @@ class TestAppendResults:
|
|
| 154 |
assert len(RESULTS_COLUMNS) == 13
|
| 155 |
|
| 156 |
|
| 157 |
-
class
|
| 158 |
-
"""Verify scoring uses the
|
| 159 |
-
|
| 160 |
-
def
|
| 161 |
-
"""
|
| 162 |
-
from
|
| 163 |
-
from
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
assert
|
|
|
|
| 154 |
assert len(RESULTS_COLUMNS) == 13
|
| 155 |
|
| 156 |
|
| 157 |
+
class TestScoringUsesUtil:
|
| 158 |
+
"""Verify scoring uses the single source of truth from openra-rl-util."""
|
| 159 |
+
|
| 160 |
+
def test_rubrics_re_exports_util(self):
|
| 161 |
+
"""rubrics.py should re-export from openra_rl_util."""
|
| 162 |
+
from rubrics import compute_composite_score_from_games
|
| 163 |
+
from openra_rl_util.rubrics import (
|
| 164 |
+
compute_composite_score_from_games as util_fn,
|
| 165 |
+
)
|
| 166 |
+
assert compute_composite_score_from_games is util_fn
|
| 167 |
+
|
| 168 |
+
def test_evaluate_uses_util_scoring(self):
|
| 169 |
+
"""evaluate.py should not have its own compute_composite_score."""
|
| 170 |
+
import evaluate
|
| 171 |
+
assert not hasattr(evaluate, "compute_composite_score"), \
|
| 172 |
+
"evaluate.py should use compute_composite_score_from_games from Util"
|
| 173 |
+
|
| 174 |
+
def test_compute_game_metrics_re_exported(self):
|
| 175 |
+
from rubrics import compute_game_metrics
|
| 176 |
+
from openra_rl_util.rubrics import compute_game_metrics as util_fn
|
| 177 |
+
assert compute_game_metrics is util_fn
|