yxc20098 commited on
Commit
602dafc
·
unverified ·
2 Parent(s): d184363 72eb018

Merge pull request #2 from yxc20089/feature/scoring-fix-and-tests

Browse files
.github/workflows/test.yml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Tests
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+ - uses: actions/setup-python@v5
15
+ with:
16
+ python-version: "3.12"
17
+ - name: Install dependencies
18
+ run: |
19
+ pip install pytest
20
+ pip install -r requirements.txt
21
+ - name: Run tests
22
+ run: python -m pytest tests/ -v
evaluate.py CHANGED
@@ -29,6 +29,8 @@ from datetime import datetime, timezone
29
  from pathlib import Path
30
  from typing import Any, Dict, List
31
 
 
 
32
  # Evaluation results file
33
  RESULTS_FILE = Path(__file__).parent / "data" / "results.csv"
34
 
@@ -122,10 +124,8 @@ async def run_game(env: Any, agent_fn: Any, max_steps: int) -> Dict[str, Any]:
122
  max_steps: Maximum steps before timeout.
123
 
124
  Returns:
125
- Dict with game metrics (from rubrics.compute_game_metrics).
126
  """
127
- from rubrics import compute_game_metrics
128
-
129
  obs = await env.reset()
130
  steps = 0
131
 
@@ -143,10 +143,9 @@ def get_agent_fn(agent_type: str) -> Any:
143
  Returns a callable that takes an observation and returns an action.
144
  """
145
  if agent_type == "scripted":
146
- # Import inline to avoid hard dependency
147
  from openra_env.models import OpenRAAction
148
  # Simple no-op agent for evaluation framework testing
149
- # Replace with actual ScriptedBot integration
150
  return lambda obs: OpenRAAction(commands=[])
151
  else:
152
  from openra_env.models import OpenRAAction
@@ -168,7 +167,7 @@ async def run_evaluation(args: argparse.Namespace) -> Dict[str, Any]:
168
  result_str = metrics["result"] or "timeout"
169
  print(f"{result_str} (ticks: {metrics['ticks']}, K/D: {metrics['kd_ratio']:.1f})")
170
 
171
- # Aggregate results
172
  wins = sum(1 for g in game_results if g["win"])
173
  total = len(game_results)
174
 
@@ -178,7 +177,7 @@ async def run_evaluation(args: argparse.Namespace) -> Dict[str, Any]:
178
  "opponent": args.opponent,
179
  "games": total,
180
  "win_rate": round(100.0 * wins / max(total, 1), 1),
181
- "score": round(compute_composite_score(game_results), 1),
182
  "avg_kills": round(sum(g["kills_cost"] for g in game_results) / max(total, 1)),
183
  "avg_deaths": round(sum(g["deaths_cost"] for g in game_results) / max(total, 1)),
184
  "kd_ratio": round(
@@ -195,28 +194,6 @@ async def run_evaluation(args: argparse.Namespace) -> Dict[str, Any]:
195
  }
196
 
197
 
198
- def compute_composite_score(game_results: List[Dict[str, Any]]) -> float:
199
- """Compute the OpenRA-Bench composite score.
200
-
201
- Score = 50% win_rate + 25% avg_kd_normalized + 25% avg_economy_normalized
202
- """
203
- total = len(game_results)
204
- if total == 0:
205
- return 0.0
206
-
207
- win_rate = sum(1 for g in game_results if g["win"]) / total
208
-
209
- # K/D ratio normalized: kd / (kd + 1) maps [0, inf) -> [0, 1)
210
- avg_kd = sum(g["kd_ratio"] for g in game_results) / total
211
- kd_norm = avg_kd / (avg_kd + 1)
212
-
213
- # Economy normalized: assets / (assets + 10000)
214
- avg_assets = sum(g["assets_value"] for g in game_results) / total
215
- econ_norm = avg_assets / (avg_assets + 10000) if avg_assets >= 0 else 0.0
216
-
217
- return 100.0 * (0.5 * win_rate + 0.25 * kd_norm + 0.25 * econ_norm)
218
-
219
-
220
  def append_results(results: Dict[str, Any], output_path: Path) -> None:
221
  """Append evaluation results to CSV file."""
222
  file_exists = output_path.exists() and output_path.stat().st_size > 0
 
29
  from pathlib import Path
30
  from typing import Any, Dict, List
31
 
32
+ from openra_rl_util.rubrics import compute_composite_score_from_games, compute_game_metrics
33
+
34
  # Evaluation results file
35
  RESULTS_FILE = Path(__file__).parent / "data" / "results.csv"
36
 
 
124
  max_steps: Maximum steps before timeout.
125
 
126
  Returns:
127
+ Dict with game metrics (from compute_game_metrics).
128
  """
 
 
129
  obs = await env.reset()
130
  steps = 0
131
 
 
143
  Returns a callable that takes an observation and returns an action.
144
  """
145
  if agent_type == "scripted":
 
146
  from openra_env.models import OpenRAAction
147
  # Simple no-op agent for evaluation framework testing
148
+ # TODO: Wire to openra_env.agents.scripted.ScriptedBot when extracted
149
  return lambda obs: OpenRAAction(commands=[])
150
  else:
151
  from openra_env.models import OpenRAAction
 
167
  result_str = metrics["result"] or "timeout"
168
  print(f"{result_str} (ticks: {metrics['ticks']}, K/D: {metrics['kd_ratio']:.1f})")
169
 
170
+ # Aggregate results using single source of truth from openra-rl-util
171
  wins = sum(1 for g in game_results if g["win"])
172
  total = len(game_results)
173
 
 
177
  "opponent": args.opponent,
178
  "games": total,
179
  "win_rate": round(100.0 * wins / max(total, 1), 1),
180
+ "score": round(compute_composite_score_from_games(game_results), 1),
181
  "avg_kills": round(sum(g["kills_cost"] for g in game_results) / max(total, 1)),
182
  "avg_deaths": round(sum(g["deaths_cost"] for g in game_results) / max(total, 1)),
183
  "kd_ratio": round(
 
194
  }
195
 
196
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
  def append_results(results: Dict[str, Any], output_path: Path) -> None:
198
  """Append evaluation results to CSV file."""
199
  file_exists = output_path.exists() and output_path.stat().st_size > 0
requirements.txt CHANGED
@@ -1,3 +1,5 @@
1
  gradio>=4.44.0
2
  pandas>=2.0.0
3
  openenv-core>=0.2.0
 
 
 
1
  gradio>=4.44.0
2
  pandas>=2.0.0
3
  openenv-core>=0.2.0
4
+ openra-rl-util>=0.1.0
5
+ openra-rl>=0.3.0
rubrics.py CHANGED
@@ -1,152 +1,14 @@
1
- """OpenRA-Bench rubrics for agent evaluation.
2
 
3
- Follows the OpenEnv rubric pattern (see openenv.core.rubrics).
4
- These rubrics score game episodes based on win/loss, military efficiency,
5
- and economic performance.
6
-
7
- Usage:
8
- rubric = OpenRABenchRubric()
9
- rubric.reset()
10
- for action, obs in episode:
11
- reward = rubric(action, obs) # 0.0 until done
12
- step_rewards = rubric.win_loss.compute_step_rewards()
13
  """
14
 
15
- from typing import Any, Dict, List, Tuple
16
-
17
- from openenv.core.rubrics import (
18
- ExponentialDiscountingTrajectoryRubric,
19
- TrajectoryRubric,
20
- WeightedSum,
 
21
  )
22
-
23
-
24
- class OpenRAWinLossRubric(ExponentialDiscountingTrajectoryRubric):
25
- """Score game based on win/loss/draw outcome with temporal discounting.
26
-
27
- Terminal rewards:
28
- - Win: +1.0
29
- - Loss: -1.0
30
- - Draw: 0.0
31
- """
32
-
33
- def score_trajectory(self, trajectory: List[Tuple[Any, Any]]) -> float:
34
- if not trajectory:
35
- return 0.0
36
- _, final_obs = trajectory[-1]
37
- result = getattr(final_obs, "result", "")
38
- if result == "win":
39
- return 1.0
40
- elif result == "lose":
41
- return -1.0
42
- return 0.0
43
-
44
-
45
- class MilitaryEfficiencyRubric(TrajectoryRubric):
46
- """Score based on kill/death cost ratio from final observation.
47
-
48
- Score = kills_cost / max(kills_cost + deaths_cost, 1)
49
- Normalized to 0.0-1.0 range.
50
- """
51
-
52
- def score_trajectory(self, trajectory: List[Tuple[Any, Any]]) -> float:
53
- if not trajectory:
54
- return 0.0
55
- _, final_obs = trajectory[-1]
56
- military = getattr(final_obs, "military", None)
57
- if military is None:
58
- return 0.0
59
- kills = getattr(military, "kills_cost", 0)
60
- deaths = getattr(military, "deaths_cost", 0)
61
- total = kills + deaths
62
- if total == 0:
63
- return 0.5 # No combat occurred
64
- return kills / total
65
-
66
- def compute_step_rewards(self) -> List[float]:
67
- if not self._trajectory:
68
- return []
69
- score = self.score_trajectory(self._trajectory)
70
- return [score] * len(self._trajectory)
71
-
72
-
73
- class EconomyRubric(TrajectoryRubric):
74
- """Score based on final economic state.
75
-
76
- Score = assets_value / (assets_value + 10000)
77
- Sigmoid-like normalization to 0.0-1.0 range.
78
- """
79
-
80
- def score_trajectory(self, trajectory: List[Tuple[Any, Any]]) -> float:
81
- if not trajectory:
82
- return 0.0
83
- _, final_obs = trajectory[-1]
84
- military = getattr(final_obs, "military", None)
85
- if military is None:
86
- return 0.0
87
- assets = getattr(military, "assets_value", 0)
88
- # Sigmoid normalization: maps [0, inf) -> [0, 1)
89
- return assets / (assets + 10000) if assets >= 0 else 0.0
90
-
91
- def compute_step_rewards(self) -> List[float]:
92
- if not self._trajectory:
93
- return []
94
- score = self.score_trajectory(self._trajectory)
95
- return [score] * len(self._trajectory)
96
-
97
-
98
- class OpenRABenchRubric(WeightedSum):
99
- """Composite benchmark score combining win/loss, military, and economy.
100
-
101
- Weights: 50% win/loss, 25% military efficiency, 25% economy.
102
- """
103
-
104
- def __init__(self, gamma: float = 0.99):
105
- win_loss = OpenRAWinLossRubric(gamma=gamma)
106
- military = MilitaryEfficiencyRubric()
107
- economy = EconomyRubric()
108
- super().__init__(
109
- rubrics=[win_loss, military, economy],
110
- weights=[0.5, 0.25, 0.25],
111
- )
112
- # Keep named references for direct access
113
- self.win_loss = win_loss
114
- self.military = military
115
- self.economy = economy
116
-
117
- def reset(self) -> None:
118
- self.win_loss.reset()
119
- self.military.reset()
120
- self.economy.reset()
121
-
122
-
123
- def compute_game_metrics(final_obs: Any) -> Dict[str, Any]:
124
- """Extract benchmark metrics from a final game observation.
125
-
126
- Args:
127
- final_obs: The terminal GameObservation (where done=True).
128
-
129
- Returns:
130
- Dict with keys: result, ticks, kills_cost, deaths_cost,
131
- kd_ratio, assets_value, cash, win (bool).
132
- """
133
- military = getattr(final_obs, "military", None)
134
- economy = getattr(final_obs, "economy", None)
135
-
136
- kills = getattr(military, "kills_cost", 0) if military else 0
137
- deaths = getattr(military, "deaths_cost", 0) if military else 0
138
- assets = getattr(military, "assets_value", 0) if military else 0
139
- cash = getattr(economy, "cash", 0) if economy else 0
140
- result = getattr(final_obs, "result", "")
141
- tick = getattr(final_obs, "tick", 0)
142
-
143
- return {
144
- "result": result,
145
- "win": result == "win",
146
- "ticks": tick,
147
- "kills_cost": kills,
148
- "deaths_cost": deaths,
149
- "kd_ratio": kills / max(deaths, 1),
150
- "assets_value": assets,
151
- "cash": cash,
152
- }
 
1
+ """OpenRA-Bench rubrics re-exported from openra-rl-util.
2
 
3
+ All scoring logic lives in the shared utility library.
4
+ This module re-exports for backward compatibility.
 
 
 
 
 
 
 
 
5
  """
6
 
7
+ from openra_rl_util.rubrics import ( # noqa: F401
8
+ EconomyRubric,
9
+ MilitaryEfficiencyRubric,
10
+ OpenRABenchRubric,
11
+ OpenRAWinLossRubric,
12
+ compute_composite_score_from_games,
13
+ compute_game_metrics,
14
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/__init__.py ADDED
File without changes
tests/test_app.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tests for the Gradio leaderboard app."""
2
+
3
+ import sys
4
+ from pathlib import Path
5
+ from unittest.mock import patch
6
+
7
+ import pandas as pd
8
+ import pytest
9
+
10
+ sys.path.insert(0, str(Path(__file__).parent.parent))
11
+
12
+ from app import (
13
+ AGENT_TYPE_COLORS,
14
+ DISPLAY_COLUMNS,
15
+ add_type_badges,
16
+ build_app,
17
+ filter_leaderboard,
18
+ load_data,
19
+ )
20
+
21
+
22
+ class TestLoadData:
23
+ """Test data loading."""
24
+
25
+ def test_returns_dataframe(self):
26
+ df = load_data()
27
+ assert isinstance(df, pd.DataFrame)
28
+
29
+ def test_has_display_columns(self):
30
+ df = load_data()
31
+ for col in DISPLAY_COLUMNS:
32
+ assert col in df.columns, f"Missing column: {col}"
33
+
34
+ def test_has_rank_column(self):
35
+ df = load_data()
36
+ if len(df) > 0:
37
+ assert df["Rank"].iloc[0] == 1
38
+
39
+ def test_sorted_by_score_descending(self):
40
+ df = load_data()
41
+ if len(df) > 1:
42
+ scores = df["Score"].tolist()
43
+ assert scores == sorted(scores, reverse=True)
44
+
45
+ def test_handles_missing_file(self):
46
+ with patch("app.DATA_PATH", Path("/nonexistent/data.csv")):
47
+ df = load_data()
48
+ assert isinstance(df, pd.DataFrame)
49
+ assert len(df) == 0
50
+
51
+
52
+ class TestBadges:
53
+ """Test type badge rendering."""
54
+
55
+ def test_scripted_badge_has_gold(self):
56
+ df = pd.DataFrame({"Type": ["Scripted"]})
57
+ result = add_type_badges(df)
58
+ assert "#ffcd75" in result["Type"].iloc[0]
59
+
60
+ def test_llm_badge_has_blue(self):
61
+ df = pd.DataFrame({"Type": ["LLM"]})
62
+ result = add_type_badges(df)
63
+ assert "#7497db" in result["Type"].iloc[0]
64
+
65
+ def test_rl_badge_has_gray(self):
66
+ df = pd.DataFrame({"Type": ["RL"]})
67
+ result = add_type_badges(df)
68
+ assert "#75809c" in result["Type"].iloc[0]
69
+
70
+ def test_all_types_have_colors(self):
71
+ for t in ["Scripted", "LLM", "RL"]:
72
+ assert t in AGENT_TYPE_COLORS
73
+
74
+
75
+ class TestFilter:
76
+ """Test leaderboard filtering."""
77
+
78
+ def test_returns_dataframe(self):
79
+ df = filter_leaderboard("", [], "All")
80
+ assert isinstance(df, pd.DataFrame)
81
+
82
+ def test_search_filters_by_name(self):
83
+ df = filter_leaderboard("ScriptedBot", [], "All")
84
+ # If there are results, they should contain "ScriptedBot"
85
+ if len(df) > 0:
86
+ # Badges are in the Type column, not Agent
87
+ assert all("ScriptedBot" in str(row) for row in df["Agent"])
88
+
89
+ def test_opponent_filter(self):
90
+ df = filter_leaderboard("", [], "Hard")
91
+ if len(df) > 0:
92
+ assert all(df["Opponent"] == "Hard")
93
+
94
+
95
+ class TestBuildApp:
96
+ """Test app construction."""
97
+
98
+ def test_builds_without_error(self):
99
+ app = build_app()
100
+ assert app is not None
tests/test_evaluate.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tests for the evaluation harness."""
2
+
3
+ import csv
4
+ import sys
5
+ import tempfile
6
+ from pathlib import Path
7
+ from unittest.mock import patch
8
+
9
+ import pytest
10
+
11
+ # Add parent directory to path for direct import
12
+ sys.path.insert(0, str(Path(__file__).parent.parent))
13
+
14
+ from evaluate import (
15
+ RESULTS_COLUMNS,
16
+ append_results,
17
+ get_agent_fn,
18
+ parse_args,
19
+ )
20
+
21
+
22
+ class TestParseArgs:
23
+ """Test argument parsing."""
24
+
25
+ def test_minimal_args(self):
26
+ with patch("sys.argv", ["evaluate.py", "--agent-name", "TestBot"]):
27
+ args = parse_args()
28
+ assert args.agent_name == "TestBot"
29
+ assert args.agent == "scripted"
30
+ assert args.agent_type == "Scripted"
31
+ assert args.opponent == "Normal"
32
+ assert args.games == 10
33
+
34
+ def test_all_args(self):
35
+ with patch("sys.argv", [
36
+ "evaluate.py",
37
+ "--agent", "llm",
38
+ "--agent-name", "MyLLM",
39
+ "--agent-type", "LLM",
40
+ "--opponent", "Hard",
41
+ "--games", "5",
42
+ "--server", "http://example.com:8000",
43
+ "--max-steps", "3000",
44
+ "--dry-run",
45
+ ]):
46
+ args = parse_args()
47
+ assert args.agent == "llm"
48
+ assert args.agent_name == "MyLLM"
49
+ assert args.agent_type == "LLM"
50
+ assert args.opponent == "Hard"
51
+ assert args.games == 5
52
+ assert args.server == "http://example.com:8000"
53
+ assert args.max_steps == 3000
54
+ assert args.dry_run is True
55
+
56
+ def test_auto_detect_agent_type(self):
57
+ for agent, expected_type in [
58
+ ("scripted", "Scripted"),
59
+ ("llm", "LLM"),
60
+ ("mcp", "Scripted"),
61
+ ("custom", "RL"),
62
+ ]:
63
+ with patch("sys.argv", ["evaluate.py", "--agent", agent, "--agent-name", "T"]):
64
+ args = parse_args()
65
+ assert args.agent_type == expected_type, f"{agent} -> {expected_type}"
66
+
67
+ def test_explicit_type_overrides_auto(self):
68
+ with patch("sys.argv", [
69
+ "evaluate.py", "--agent", "scripted",
70
+ "--agent-name", "T", "--agent-type", "RL",
71
+ ]):
72
+ args = parse_args()
73
+ assert args.agent_type == "RL"
74
+
75
+
76
+ class TestGetAgentFn:
77
+ """Test agent factory."""
78
+
79
+ def test_scripted_returns_callable(self):
80
+ fn = get_agent_fn("scripted")
81
+ assert callable(fn)
82
+
83
+ def test_llm_returns_callable(self):
84
+ fn = get_agent_fn("llm")
85
+ assert callable(fn)
86
+
87
+
88
+ class TestAppendResults:
89
+ """Test CSV output."""
90
+
91
+ def test_creates_new_file(self):
92
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
93
+ path = Path(f.name)
94
+
95
+ path.unlink() # ensure it doesn't exist
96
+ results = {col: "" for col in RESULTS_COLUMNS}
97
+ results["agent_name"] = "TestBot"
98
+ results["games"] = 5
99
+ results["score"] = 85.0
100
+
101
+ append_results(results, path)
102
+
103
+ assert path.exists()
104
+ with open(path) as f:
105
+ reader = csv.DictReader(f)
106
+ rows = list(reader)
107
+ assert len(rows) == 1
108
+ assert rows[0]["agent_name"] == "TestBot"
109
+
110
+ path.unlink()
111
+
112
+ def test_appends_to_existing(self):
113
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
114
+ path = Path(f.name)
115
+
116
+ # Write first result
117
+ results1 = {col: "" for col in RESULTS_COLUMNS}
118
+ results1["agent_name"] = "Bot1"
119
+ append_results(results1, path)
120
+
121
+ # Write second result
122
+ results2 = {col: "" for col in RESULTS_COLUMNS}
123
+ results2["agent_name"] = "Bot2"
124
+ append_results(results2, path)
125
+
126
+ with open(path) as f:
127
+ reader = csv.DictReader(f)
128
+ rows = list(reader)
129
+ assert len(rows) == 2
130
+ assert rows[0]["agent_name"] == "Bot1"
131
+ assert rows[1]["agent_name"] == "Bot2"
132
+
133
+ path.unlink()
134
+
135
+ def test_columns_match_expected(self):
136
+ assert "agent_name" in RESULTS_COLUMNS
137
+ assert "score" in RESULTS_COLUMNS
138
+ assert "win_rate" in RESULTS_COLUMNS
139
+ assert "replay_url" in RESULTS_COLUMNS
140
+ assert len(RESULTS_COLUMNS) == 13
141
+
142
+
143
+ class TestScoringUsesUtil:
144
+ """Verify scoring uses the single source of truth from openra-rl-util."""
145
+
146
+ def test_rubrics_re_exports_util(self):
147
+ """rubrics.py should re-export from openra_rl_util."""
148
+ from rubrics import compute_composite_score_from_games
149
+ from openra_rl_util.rubrics import (
150
+ compute_composite_score_from_games as util_fn,
151
+ )
152
+ assert compute_composite_score_from_games is util_fn
153
+
154
+ def test_evaluate_uses_util_scoring(self):
155
+ """evaluate.py should not have its own compute_composite_score."""
156
+ import evaluate
157
+ assert not hasattr(evaluate, "compute_composite_score"), \
158
+ "evaluate.py should use compute_composite_score_from_games from Util"
159
+
160
+ def test_compute_game_metrics_re_exported(self):
161
+ from rubrics import compute_game_metrics
162
+ from openra_rl_util.rubrics import compute_game_metrics as util_fn
163
+ assert compute_game_metrics is util_fn