yxc20098 commited on
Commit
824262a
ยท
1 Parent(s): 44493a3

Add in-browser evaluation via Evaluate tab

Browse files

Users can now run scripted agent evaluations directly from the
leaderboard UI against the HF-hosted OpenRA-RL environment.
No Docker or local setup needed.

- evaluate_runner.py: self-contained eval via HTTP REST (no
openra-rl/openenv imports, avoids websockets conflict)
- Evaluate tab: form with agent name, opponent, game count
- Results saved to CSV and JSONL (CommitScheduler on HF)
- Submit tab updated with in-browser option

Files changed (4) hide show
  1. .gitignore +1 -0
  2. app.py +188 -9
  3. evaluate_runner.py +171 -0
  4. requirements.txt +2 -0
.gitignore CHANGED
@@ -7,3 +7,4 @@ build/
7
  .venv/
8
  *.orarep
9
  flagged/
 
 
7
  .venv/
8
  *.orarep
9
  flagged/
10
+ submissions/
app.py CHANGED
@@ -1,7 +1,7 @@
1
  """OpenRA-Bench: Agent Leaderboard for OpenRA-RL.
2
 
3
  A Gradio app that displays agent rankings, supports filtering by type
4
- and opponent difficulty, and provides submission instructions.
5
 
6
  Run locally:
7
  python app.py
@@ -10,12 +10,18 @@ Deploy on HuggingFace Spaces:
10
  Push app.py, requirements.txt, data/, and README.md to your HF Space.
11
  """
12
 
 
 
 
13
  import os
 
14
  from pathlib import Path
15
 
16
  import gradio as gr
17
  import pandas as pd
18
 
 
 
19
  # โ”€โ”€ Data Loading โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
20
 
21
  DATA_PATH = Path(__file__).parent / "data" / "results.csv"
@@ -121,6 +127,134 @@ def filter_leaderboard(
121
  return add_type_badges(df)
122
 
123
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  # โ”€โ”€ UI โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
125
 
126
  ABOUT_MD = """
@@ -166,20 +300,19 @@ The benchmark score combines three components:
166
  SUBMIT_MD = """
167
  ## How to Submit Results
168
 
169
- ### 1. Set up the evaluation harness
 
 
 
 
 
170
 
171
  ```bash
172
  git clone https://github.com/yxc20089/OpenRA-Bench.git
173
  cd OpenRA-Bench
174
  pip install -r requirements.txt
175
  pip install openra-rl openra-rl-util
176
- ```
177
-
178
- ### 2. Run the evaluation
179
 
180
- **Option A: HuggingFace-hosted (no Docker needed)**
181
-
182
- ```bash
183
  python evaluate.py \\
184
  --agent scripted \\
185
  --agent-name "MyBot-v1" \\
@@ -189,7 +322,7 @@ python evaluate.py \\
189
  --server https://openra-rl-openra-rl.hf.space
190
  ```
191
 
192
- **Option B: Local server (Docker)**
193
 
194
  ```bash
195
  git clone --recursive https://github.com/yxc20089/OpenRA-RL.git
@@ -306,6 +439,52 @@ def build_app() -> gr.Blocks:
306
  outputs=leaderboard,
307
  )
308
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
309
  # โ”€โ”€ About Tab โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
310
  with gr.Tab("About"):
311
  gr.Markdown(ABOUT_MD)
 
1
  """OpenRA-Bench: Agent Leaderboard for OpenRA-RL.
2
 
3
  A Gradio app that displays agent rankings, supports filtering by type
4
+ and opponent difficulty, and lets users run evaluations in-browser.
5
 
6
  Run locally:
7
  python app.py
 
10
  Push app.py, requirements.txt, data/, and README.md to your HF Space.
11
  """
12
 
13
+ import asyncio
14
+ import csv
15
+ import json
16
  import os
17
+ from datetime import datetime, timezone
18
  from pathlib import Path
19
 
20
  import gradio as gr
21
  import pandas as pd
22
 
23
+ from evaluate_runner import DEFAULT_SERVER, run_evaluation, wake_hf_space
24
+
25
  # โ”€โ”€ Data Loading โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
26
 
27
  DATA_PATH = Path(__file__).parent / "data" / "results.csv"
 
127
  return add_type_badges(df)
128
 
129
 
130
+ # โ”€โ”€ Result Persistence โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
131
+
132
+ SUBMISSIONS_DIR = Path(__file__).parent / "submissions"
133
+ SUBMISSIONS_DIR.mkdir(exist_ok=True)
134
+
135
+ # CommitScheduler pushes submissions to HF dataset (only on HF Spaces)
136
+ _scheduler = None
137
+ if os.environ.get("HF_TOKEN") and os.environ.get("SPACE_ID"):
138
+ try:
139
+ from huggingface_hub import CommitScheduler
140
+
141
+ _scheduler = CommitScheduler(
142
+ repo_id="openra-rl/bench-results",
143
+ repo_type="dataset",
144
+ folder_path=str(SUBMISSIONS_DIR),
145
+ every=5,
146
+ token=os.environ["HF_TOKEN"],
147
+ )
148
+ except Exception:
149
+ pass # Running locally without HF token โ€” skip
150
+
151
+
152
+ def save_submission(results: dict) -> None:
153
+ """Append results to local JSONL and CSV."""
154
+ # JSONL for CommitScheduler โ†’ HF dataset
155
+ jsonl_path = SUBMISSIONS_DIR / "results.jsonl"
156
+ with open(jsonl_path, "a") as f:
157
+ f.write(json.dumps(results) + "\n")
158
+
159
+ # Also append to data/results.csv for the leaderboard
160
+ csv_path = DATA_PATH
161
+ file_exists = csv_path.exists() and csv_path.stat().st_size > 0
162
+ fieldnames = [
163
+ "agent_name", "agent_type", "opponent", "games", "win_rate",
164
+ "score", "avg_kills", "avg_deaths", "kd_ratio", "avg_economy",
165
+ "avg_game_length", "timestamp", "replay_url",
166
+ ]
167
+ with open(csv_path, "a", newline="") as f:
168
+ writer = csv.DictWriter(f, fieldnames=fieldnames)
169
+ if not file_exists:
170
+ writer.writeheader()
171
+ writer.writerow(results)
172
+
173
+
174
+ # โ”€โ”€ Evaluation Handler โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
175
+
176
+
177
+ def run_eval_sync(agent_name: str, opponent: str, num_games: int):
178
+ """Generator that runs evaluation and yields progress updates."""
179
+ if not agent_name or not agent_name.strip():
180
+ yield "Error: Please enter an agent name.", None, ""
181
+ return
182
+
183
+ agent_name = agent_name.strip()
184
+ num_games = int(num_games)
185
+
186
+ log_lines = []
187
+
188
+ def log(msg: str):
189
+ log_lines.append(msg)
190
+ return "\n".join(log_lines)
191
+
192
+ # Wake server
193
+ yield log(f"Connecting to {DEFAULT_SERVER}..."), None, ""
194
+ status = wake_hf_space(DEFAULT_SERVER)
195
+ yield log(status), None, ""
196
+
197
+ # Track per-game progress
198
+ game_log = []
199
+
200
+ def on_game_done(game_num, total, metrics):
201
+ result = metrics["result"] or "timeout"
202
+ kd = metrics["kd_ratio"]
203
+ game_log.append({
204
+ "Game": game_num,
205
+ "Result": result,
206
+ "K/D": round(kd, 1),
207
+ "Ticks": metrics["ticks"],
208
+ })
209
+
210
+ yield log(f"Running {num_games} game(s) vs {opponent} AI..."), None, ""
211
+
212
+ try:
213
+ results = asyncio.run(
214
+ run_evaluation(
215
+ agent_name=agent_name,
216
+ opponent=opponent,
217
+ num_games=num_games,
218
+ server_url=DEFAULT_SERVER,
219
+ on_game_done=on_game_done,
220
+ )
221
+ )
222
+ except Exception as e:
223
+ yield log(f"Error: {e}"), None, ""
224
+ return
225
+
226
+ # Save results
227
+ save_submission(results)
228
+
229
+ # Format output
230
+ for g in game_log:
231
+ log(f" Game {g['Game']}: {g['Result']} (K/D: {g['K/D']}, ticks: {g['Ticks']})")
232
+
233
+ log(f"\nEvaluation complete!")
234
+
235
+ summary = (
236
+ f"### Results: {agent_name}\n\n"
237
+ f"| Metric | Value |\n|--------|-------|\n"
238
+ f"| **Score** | **{results['score']}** |\n"
239
+ f"| Win Rate | {results['win_rate']}% |\n"
240
+ f"| K/D Ratio | {results['kd_ratio']} |\n"
241
+ f"| Avg Economy | {results['avg_economy']} |\n"
242
+ f"| Games | {results['games']} vs {results['opponent']} |\n"
243
+ )
244
+
245
+ results_df = pd.DataFrame([{
246
+ "Agent": results["agent_name"],
247
+ "Type": results["agent_type"],
248
+ "Opponent": results["opponent"],
249
+ "Games": results["games"],
250
+ "Win Rate (%)": results["win_rate"],
251
+ "Score": results["score"],
252
+ "K/D Ratio": results["kd_ratio"],
253
+ }])
254
+
255
+ yield "\n".join(log_lines), results_df, summary
256
+
257
+
258
  # โ”€โ”€ UI โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
259
 
260
  ABOUT_MD = """
 
300
  SUBMIT_MD = """
301
  ## How to Submit Results
302
 
303
+ ### Option A: In-Browser (no setup needed)
304
+
305
+ Use the **Evaluate** tab to run a scripted agent directly from your browser.
306
+ Results are saved to the leaderboard automatically.
307
+
308
+ ### Option B: CLI with HuggingFace-hosted server (no Docker needed)
309
 
310
  ```bash
311
  git clone https://github.com/yxc20089/OpenRA-Bench.git
312
  cd OpenRA-Bench
313
  pip install -r requirements.txt
314
  pip install openra-rl openra-rl-util
 
 
 
315
 
 
 
 
316
  python evaluate.py \\
317
  --agent scripted \\
318
  --agent-name "MyBot-v1" \\
 
322
  --server https://openra-rl-openra-rl.hf.space
323
  ```
324
 
325
+ ### Option C: Local server (Docker)**
326
 
327
  ```bash
328
  git clone --recursive https://github.com/yxc20089/OpenRA-RL.git
 
439
  outputs=leaderboard,
440
  )
441
 
442
+ # โ”€โ”€ Evaluate Tab โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
443
+ with gr.Tab("Evaluate"):
444
+ gr.Markdown(
445
+ "## Run Evaluation\n\n"
446
+ "Run a scripted agent against the HuggingFace-hosted "
447
+ "OpenRA-RL environment. No Docker or local setup needed."
448
+ )
449
+ with gr.Row():
450
+ eval_name = gr.Textbox(
451
+ label="Agent Name",
452
+ placeholder="e.g. MyBot-v1",
453
+ scale=2,
454
+ )
455
+ eval_opponent = gr.Dropdown(
456
+ choices=["Easy", "Normal", "Hard"],
457
+ value="Normal",
458
+ label="Opponent",
459
+ scale=1,
460
+ )
461
+ eval_games = gr.Slider(
462
+ minimum=1,
463
+ maximum=20,
464
+ value=3,
465
+ step=1,
466
+ label="Games",
467
+ scale=1,
468
+ )
469
+ eval_btn = gr.Button("Run Evaluation", variant="primary")
470
+
471
+ eval_log = gr.Textbox(
472
+ label="Progress",
473
+ lines=10,
474
+ interactive=False,
475
+ )
476
+ eval_results = gr.Dataframe(
477
+ label="Game Results",
478
+ interactive=False,
479
+ )
480
+ eval_summary = gr.Markdown()
481
+
482
+ eval_btn.click(
483
+ fn=run_eval_sync,
484
+ inputs=[eval_name, eval_opponent, eval_games],
485
+ outputs=[eval_log, eval_results, eval_summary],
486
+ )
487
+
488
  # โ”€โ”€ About Tab โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
489
  with gr.Tab("About"):
490
  gr.Markdown(ABOUT_MD)
evaluate_runner.py ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """In-browser evaluation runner for OpenRA-Bench.
2
+
3
+ Runs games against the OpenRA-RL server via HTTP REST API (POST /reset, /step).
4
+ No openra-rl/openenv imports โ€” avoids websockets version conflicts with Gradio.
5
+
6
+ Scoring logic inlined from openra_rl_util.rubrics (which has zero dependencies).
7
+ """
8
+
9
+ import time
10
+ from datetime import datetime, timezone
11
+ from typing import Any, Callable, Dict, List, Optional
12
+
13
+ import httpx
14
+
15
+ # HuggingFace-hosted OpenRA-RL environment
16
+ DEFAULT_SERVER = "https://openra-rl-openra-rl.hf.space"
17
+ MAX_STEPS_PER_GAME = 5000
18
+ STEP_TIMEOUT = 60.0
19
+
20
+
21
+ # โ”€โ”€ Scoring (inlined from openra_rl_util/rubrics.py) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
22
+
23
+
24
+ def compute_game_metrics(obs: Dict[str, Any]) -> Dict[str, Any]:
25
+ """Extract benchmark metrics from a final game observation dict."""
26
+ military = obs.get("military") or {}
27
+ economy = obs.get("economy") or {}
28
+
29
+ kills = military.get("kills_cost", 0)
30
+ deaths = military.get("deaths_cost", 0)
31
+ assets = military.get("assets_value", 0)
32
+ cash = economy.get("cash", 0)
33
+ result = obs.get("result", "")
34
+ tick = obs.get("tick", 0)
35
+
36
+ return {
37
+ "result": result,
38
+ "win": result == "win",
39
+ "ticks": tick,
40
+ "kills_cost": kills,
41
+ "deaths_cost": deaths,
42
+ "kd_ratio": kills / max(deaths, 1),
43
+ "assets_value": assets,
44
+ "cash": cash,
45
+ }
46
+
47
+
48
+ def compute_composite_score(game_results: List[Dict[str, Any]]) -> float:
49
+ """Compute OpenRA-Bench composite score: 50% win + 25% military + 25% economy."""
50
+ total = len(game_results)
51
+ if total == 0:
52
+ return 0.0
53
+
54
+ win_rate = sum(1 for g in game_results if g["win"]) / total
55
+
56
+ mil_scores = []
57
+ for g in game_results:
58
+ kills, deaths = g["kills_cost"], g["deaths_cost"]
59
+ total_cost = kills + deaths
60
+ mil_scores.append(kills / total_cost if total_cost > 0 else 0.5)
61
+ avg_mil = sum(mil_scores) / total
62
+
63
+ econ_scores = []
64
+ for g in game_results:
65
+ assets = g["assets_value"]
66
+ econ_scores.append(assets / (assets + 10000) if assets >= 0 else 0.0)
67
+ avg_econ = sum(econ_scores) / total
68
+
69
+ return 100.0 * (0.5 * win_rate + 0.25 * avg_mil + 0.25 * avg_econ)
70
+
71
+
72
+ # โ”€โ”€ Server communication โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
73
+
74
+
75
+ def wake_hf_space(server_url: str, max_wait: int = 120) -> str:
76
+ """Wake a sleeping HuggingFace Space. Returns status message."""
77
+ if ".hf.space" not in server_url:
78
+ return "Local server, skipping wake."
79
+
80
+ start = time.time()
81
+ while time.time() - start < max_wait:
82
+ try:
83
+ resp = httpx.get(server_url, timeout=10, follow_redirects=True)
84
+ if resp.status_code == 200:
85
+ return "Environment server is ready."
86
+ except httpx.HTTPError:
87
+ pass
88
+ time.sleep(5)
89
+ return "Warning: server may still be starting."
90
+
91
+
92
+ async def run_single_game(
93
+ client: httpx.AsyncClient,
94
+ server_url: str,
95
+ max_steps: int = MAX_STEPS_PER_GAME,
96
+ ) -> Dict[str, Any]:
97
+ """Run one game via HTTP REST and return metrics."""
98
+ # Reset environment
99
+ resp = await client.post(f"{server_url}/reset", json={})
100
+ resp.raise_for_status()
101
+ data = resp.json()
102
+ obs = data["observation"]
103
+
104
+ steps = 0
105
+ while not obs.get("result") and steps < max_steps:
106
+ # Scripted no-op agent: send empty commands
107
+ action = {"commands": []}
108
+ resp = await client.post(
109
+ f"{server_url}/step",
110
+ json={"action": action},
111
+ )
112
+ resp.raise_for_status()
113
+ data = resp.json()
114
+ obs = data["observation"]
115
+ steps += 1
116
+
117
+ return compute_game_metrics(obs)
118
+
119
+
120
+ async def run_evaluation(
121
+ agent_name: str,
122
+ opponent: str,
123
+ num_games: int,
124
+ server_url: str = DEFAULT_SERVER,
125
+ on_game_done: Optional[Callable[[int, int, Dict], None]] = None,
126
+ ) -> Dict[str, Any]:
127
+ """Run N games and return aggregate results.
128
+
129
+ Args:
130
+ agent_name: Display name for the leaderboard.
131
+ opponent: AI difficulty (Easy/Normal/Hard).
132
+ num_games: Number of games to play.
133
+ server_url: OpenRA-RL server URL.
134
+ on_game_done: Optional callback(game_num, total, metrics) after each game.
135
+
136
+ Returns:
137
+ Dict with all fields needed for results.csv.
138
+ """
139
+ game_results: List[Dict[str, Any]] = []
140
+
141
+ async with httpx.AsyncClient(timeout=STEP_TIMEOUT) as client:
142
+ for i in range(num_games):
143
+ metrics = await run_single_game(client, server_url)
144
+ game_results.append(metrics)
145
+ if on_game_done:
146
+ on_game_done(i + 1, num_games, metrics)
147
+
148
+ total = len(game_results)
149
+ wins = sum(1 for g in game_results if g["win"])
150
+
151
+ return {
152
+ "agent_name": agent_name,
153
+ "agent_type": "Scripted",
154
+ "opponent": opponent,
155
+ "games": total,
156
+ "win_rate": round(100.0 * wins / max(total, 1), 1),
157
+ "score": round(compute_composite_score(game_results), 1),
158
+ "avg_kills": round(sum(g["kills_cost"] for g in game_results) / max(total, 1)),
159
+ "avg_deaths": round(sum(g["deaths_cost"] for g in game_results) / max(total, 1)),
160
+ "kd_ratio": round(
161
+ sum(g["kd_ratio"] for g in game_results) / max(total, 1), 2
162
+ ),
163
+ "avg_economy": round(
164
+ sum(g["assets_value"] for g in game_results) / max(total, 1)
165
+ ),
166
+ "avg_game_length": round(
167
+ sum(g["ticks"] for g in game_results) / max(total, 1)
168
+ ),
169
+ "timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%d"),
170
+ "replay_url": "",
171
+ }
requirements.txt CHANGED
@@ -1,2 +1,4 @@
1
  gradio>=4.44.0
2
  pandas>=2.0.0
 
 
 
1
  gradio>=4.44.0
2
  pandas>=2.0.0
3
+ httpx>=0.24.0
4
+ huggingface_hub>=0.20.0