yxc20098 commited on
Commit
c07d9e8
·
1 Parent(s): 50772fa

Revert "Fix CI: import scoring from evaluate_runner instead of openra_rl_util"

Browse files

This reverts commit 50772fa898f5c685e89d08926d8cb201577c1376.

Files changed (2) hide show
  1. evaluate.py +1 -1
  2. tests/test_evaluate.py +21 -21
evaluate.py CHANGED
@@ -37,7 +37,7 @@ from pathlib import Path
37
  from typing import Any, Dict, List
38
  from urllib.request import urlopen
39
 
40
- from evaluate_runner import compute_composite_score as compute_composite_score_from_games, compute_game_metrics
41
 
42
  # Evaluation results file
43
  RESULTS_FILE = Path(__file__).parent / "data" / "results.csv"
 
37
  from typing import Any, Dict, List
38
  from urllib.request import urlopen
39
 
40
+ from openra_rl_util.rubrics import compute_composite_score_from_games, compute_game_metrics
41
 
42
  # Evaluation results file
43
  RESULTS_FILE = Path(__file__).parent / "data" / "results.csv"
tests/test_evaluate.py CHANGED
@@ -154,24 +154,24 @@ class TestAppendResults:
154
  assert len(RESULTS_COLUMNS) == 13
155
 
156
 
157
- class TestScoringSource:
158
- """Verify scoring uses the inlined functions from evaluate_runner."""
159
-
160
- def test_evaluate_imports_from_runner(self):
161
- """evaluate.py should import scoring from evaluate_runner."""
162
- from evaluate import compute_composite_score_from_games
163
- from evaluate_runner import compute_composite_score
164
- assert compute_composite_score_from_games is compute_composite_score
165
-
166
- def test_compute_game_metrics_from_runner(self):
167
- """evaluate.py should import compute_game_metrics from evaluate_runner."""
168
- from evaluate import compute_game_metrics
169
- from evaluate_runner import compute_game_metrics as runner_fn
170
- assert compute_game_metrics is runner_fn
171
-
172
- def test_score_calculation_basic(self):
173
- """Inlined compute_composite_score should produce valid scores."""
174
- from evaluate_runner import compute_composite_score
175
- games = [{"win": True, "kills_cost": 3000, "deaths_cost": 1000, "assets_value": 8000}]
176
- score = compute_composite_score(games)
177
- assert 0 < score <= 100
 
154
  assert len(RESULTS_COLUMNS) == 13
155
 
156
 
157
+ class TestScoringUsesUtil:
158
+ """Verify scoring uses the single source of truth from openra-rl-util."""
159
+
160
+ def test_rubrics_re_exports_util(self):
161
+ """rubrics.py should re-export from openra_rl_util."""
162
+ from rubrics import compute_composite_score_from_games
163
+ from openra_rl_util.rubrics import (
164
+ compute_composite_score_from_games as util_fn,
165
+ )
166
+ assert compute_composite_score_from_games is util_fn
167
+
168
+ def test_evaluate_uses_util_scoring(self):
169
+ """evaluate.py should not have its own compute_composite_score."""
170
+ import evaluate
171
+ assert not hasattr(evaluate, "compute_composite_score"), \
172
+ "evaluate.py should use compute_composite_score_from_games from Util"
173
+
174
+ def test_compute_game_metrics_re_exported(self):
175
+ from rubrics import compute_game_metrics
176
+ from openra_rl_util.rubrics import compute_game_metrics as util_fn
177
+ assert compute_game_metrics is util_fn