yxc20098 commited on
Commit
8ce66d2
ยท
1 Parent(s): 2771ddd

Move Try experience to OpenRA-RL Space, remove Try tab from Bench

Browse files

The Try tab (watch LLM agent play) now lives on the OpenRA-RL Space
at /try instead of as a Gradio tab here. Updated Submit tab to link
to the OpenRA-RL Space for the Try experience.

Files changed (1) hide show
  1. app.py +3 -155
app.py CHANGED
@@ -17,10 +17,9 @@ from datetime import datetime, timezone
17
  from pathlib import Path
18
 
19
  import gradio as gr
20
- import httpx
21
  import pandas as pd
22
 
23
- from evaluate_runner import DEFAULT_SERVER, wake_hf_space
24
 
25
  # โ”€โ”€ Data Loading โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
26
 
@@ -171,123 +170,6 @@ def save_submission(results: dict) -> None:
171
  writer.writerow(results)
172
 
173
 
174
- # โ”€โ”€ Try Agent Handler โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
175
-
176
-
177
- def run_try_agent(opponent: str):
178
- """Generator that streams LLM agent gameplay from the OpenRA-RL server."""
179
- log_lines = []
180
-
181
- def log(msg: str):
182
- log_lines.append(msg)
183
- return "\n".join(log_lines)
184
-
185
- # Wake server first
186
- yield log(f"Connecting to {DEFAULT_SERVER}..."), ""
187
- status = wake_hf_space(DEFAULT_SERVER)
188
- yield log(status), ""
189
- yield log(f"Starting game โ€” LLM agent vs {opponent} AI..."), ""
190
-
191
- try:
192
- with httpx.stream(
193
- "GET",
194
- f"{DEFAULT_SERVER}/try-agent",
195
- params={"opponent": opponent},
196
- timeout=httpx.Timeout(connect=30.0, read=360.0, write=30.0, pool=30.0),
197
- ) as resp:
198
- if resp.status_code == 409:
199
- yield log("A game is already in progress. Please try again later."), ""
200
- return
201
- if resp.status_code != 200:
202
- yield log(f"Error: Server returned {resp.status_code}"), ""
203
- return
204
-
205
- final_data = None
206
- event_type = ""
207
-
208
- for line in resp.iter_lines():
209
- if not line.strip():
210
- continue
211
-
212
- # Parse SSE: event line sets type, data line has payload
213
- if line.startswith("event: "):
214
- event_type = line[7:].strip()
215
- continue
216
- if not line.startswith("data: "):
217
- continue
218
-
219
- try:
220
- data = json.loads(line[6:])
221
- except json.JSONDecodeError:
222
- continue
223
-
224
- etype = event_type or data.get("type", "")
225
-
226
- if etype == "status":
227
- yield log(data["message"]), ""
228
-
229
- elif etype == "turn":
230
- yield log(
231
- f"[Turn {data['turn']}] "
232
- f"API calls: {data['api_calls']} | "
233
- f"Elapsed: {data['elapsed']}s"
234
- ), ""
235
-
236
- elif etype == "llm":
237
- content = data.get("content", "")
238
- if content:
239
- # Truncate long LLM reasoning for display
240
- display = content[:300] + "..." if len(content) > 300 else content
241
- yield log(f" AI: {display}"), ""
242
-
243
- elif etype == "tool_call":
244
- yield log(f" >> {data['name']}({data.get('args', '')})"), ""
245
-
246
- elif etype == "game_state":
247
- yield log(
248
- f" State: tick={data.get('tick', '?')} "
249
- f"units={data.get('units', '?')} "
250
- f"buildings={data.get('buildings', '?')} "
251
- f"cash=${data.get('cash', '?')}"
252
- ), ""
253
-
254
- elif etype == "done":
255
- result = data.get("result", "?").upper()
256
- yield log(f"\nGAME OVER: {result} (tick {data.get('tick', '?')})"), ""
257
-
258
- elif etype == "final":
259
- final_data = data
260
-
261
- elif etype == "error":
262
- yield log(f"Error: {data.get('message', 'Unknown error')}"), ""
263
-
264
- # Show final scorecard
265
- if final_data:
266
- result = final_data.get("result", "ongoing").upper()
267
- summary = (
268
- f"### Game Result: {result}\n\n"
269
- f"| Metric | Value |\n|--------|-------|\n"
270
- f"| Result | **{result}** |\n"
271
- f"| Ticks | {final_data.get('tick', '?')} |\n"
272
- f"| LLM Turns | {final_data.get('turns', '?')} |\n"
273
- f"| Tool Calls | {final_data.get('tool_calls', '?')} |\n"
274
- f"| Duration | {final_data.get('elapsed', '?')}s |\n"
275
- f"| Units Killed | {final_data.get('units_killed', 0)} |\n"
276
- f"| Units Lost | {final_data.get('units_lost', 0)} |\n"
277
- f"| Kill Value | ${final_data.get('kills_cost', 0)} |\n"
278
- f"| Death Value | ${final_data.get('deaths_cost', 0)} |\n"
279
- f"| Cash | ${final_data.get('cash', 0)} |\n"
280
- )
281
- yield "\n".join(log_lines), summary
282
- else:
283
- yield "\n".join(log_lines), ""
284
-
285
- except httpx.ReadTimeout:
286
- yield log("Connection timed out. The game may still be running on the server."), ""
287
- except Exception as e:
288
- yield log(f"Error: {e}"), ""
289
-
290
-
291
  # โ”€โ”€ UI โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
292
 
293
  ABOUT_MD = """
@@ -335,7 +217,8 @@ SUBMIT_MD = """
335
 
336
  ### Option A: Watch AI Play (no setup needed)
337
 
338
- Use the **Try** tab to watch a pre-configured LLM agent play Red Alert
 
339
  directly in your browser. No API keys or setup required.
340
 
341
  ### Option B: CLI with HuggingFace-hosted server (no Docker needed)
@@ -472,41 +355,6 @@ def build_app() -> gr.Blocks:
472
  outputs=leaderboard,
473
  )
474
 
475
- # โ”€โ”€ Try Tab โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
476
- with gr.Tab("Try"):
477
- gr.Markdown(
478
- "## Watch AI Play Red Alert\n\n"
479
- "Watch a pre-configured LLM agent play a game of Red Alert "
480
- "against the built-in AI. No setup needed โ€” just pick a "
481
- "difficulty and click play."
482
- )
483
- with gr.Row():
484
- try_opponent = gr.Dropdown(
485
- choices=["Easy", "Normal", "Hard"],
486
- value="Normal",
487
- label="Opponent Difficulty",
488
- scale=1,
489
- )
490
- try_btn = gr.Button(
491
- "Watch AI Play",
492
- variant="primary",
493
- scale=1,
494
- )
495
-
496
- try_log = gr.Textbox(
497
- label="Live Game Log",
498
- lines=18,
499
- interactive=False,
500
- show_copy_button=True,
501
- )
502
- try_summary = gr.Markdown()
503
-
504
- try_btn.click(
505
- fn=run_try_agent,
506
- inputs=[try_opponent],
507
- outputs=[try_log, try_summary],
508
- )
509
-
510
  # โ”€โ”€ About Tab โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
511
  with gr.Tab("About"):
512
  gr.Markdown(ABOUT_MD)
 
17
  from pathlib import Path
18
 
19
  import gradio as gr
 
20
  import pandas as pd
21
 
22
+ from evaluate_runner import DEFAULT_SERVER
23
 
24
  # โ”€โ”€ Data Loading โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
25
 
 
170
  writer.writerow(results)
171
 
172
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  # โ”€โ”€ UI โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
174
 
175
  ABOUT_MD = """
 
217
 
218
  ### Option A: Watch AI Play (no setup needed)
219
 
220
+ Visit the [OpenRA-RL Space](https://huggingface.co/spaces/openra-rl/openra-rl)
221
+ and click **Try** to watch a pre-configured LLM agent play Red Alert
222
  directly in your browser. No API keys or setup required.
223
 
224
  ### Option B: CLI with HuggingFace-hosted server (no Docker needed)
 
355
  outputs=leaderboard,
356
  )
357
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
  # โ”€โ”€ About Tab โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
359
  with gr.Tab("About"):
360
  gr.Markdown(ABOUT_MD)