databoysu commited on
Commit
a0bb8c8
·
1 Parent(s): 9c3b38b

gradio/backend port swap

Browse files
Files changed (2) hide show
  1. app.py +582 -0
  2. inference.py +1 -1
app.py ADDED
@@ -0,0 +1,582 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import html
4
+ import json
5
+ import os
6
+ import queue
7
+ import re
8
+ import socket
9
+ import subprocess
10
+ import sys
11
+ import threading
12
+ import time
13
+ import urllib.error
14
+ import urllib.request
15
+ from contextlib import closing
16
+ from pathlib import Path
17
+ from typing import Any, Generator
18
+
19
+ import gradio as gr
20
+
21
+ try:
22
+ from tasks import ALL_TASKS
23
+ except Exception:
24
+ ALL_TASKS = []
25
+
26
+
27
+ ROOT_DIR = Path(__file__).resolve().parent
28
+ INFERENCE_PATH = ROOT_DIR / "inference.py"
29
+ BACKEND_HOST = "127.0.0.1"
30
+ BACKEND_PORT = 8000
31
+ GRADIO_HOST = "0.0.0.0"
32
+ GRADIO_PORT = 7860
33
+
34
+ START_RE = re.compile(r"^\[START\]\s+task=(?P<task>\S+)\s+env=(?P<env>\S+)\s+model=(?P<model>.+)$")
35
+ STEP_RE = re.compile(
36
+ r"^\[STEP\]\s+step=(?P<step>\d+)\s+action=(?P<action>[A-Z_]+)\s+"
37
+ r"reward=(?P<reward>-?\d+(?:\.\d+)?)\s+done=(?P<done>true|false)\s+error=(?P<error>.*)$"
38
+ )
39
+ END_RE = re.compile(
40
+ r"^\[END\]\s+success=(?P<success>true|false)\s+steps=(?P<steps>\d+)\s+"
41
+ r"score=(?P<score>-?\d+(?:\.\d+)?)\s+rewards=(?P<rewards>.*)$"
42
+ )
43
+
44
+ TASK_MAP: dict[str, dict[str, Any]] = {
45
+ str(task.get("name", "")): task
46
+ for task in ALL_TASKS
47
+ if isinstance(task, dict) and task.get("name")
48
+ }
49
+
50
+
51
+ CSS = """
52
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=JetBrains+Mono:wght@400;600&display=swap');
53
+
54
+ :root {
55
+ --bg-top: #0f1115;
56
+ --bg-bottom: #1a1e27;
57
+ --panel: rgba(255, 255, 255, 0.04);
58
+ --panel-border: rgba(255, 255, 255, 0.12);
59
+ --text-main: #e7e9ef;
60
+ --text-dim: #aab1c2;
61
+ --accent: #91c6ff;
62
+ --ok: #6ce7b5;
63
+ --warn: #f9d78b;
64
+ --err: #ff9b9b;
65
+ }
66
+
67
+ .gradio-container {
68
+ font-family: 'Inter', sans-serif !important;
69
+ background: radial-gradient(circle at 20% 0%, #202636 0%, transparent 40%),
70
+ linear-gradient(180deg, var(--bg-top) 0%, var(--bg-bottom) 100%);
71
+ color: var(--text-main);
72
+ }
73
+
74
+ #header-wrap {
75
+ margin-bottom: 10px;
76
+ border: 1px solid var(--panel-border);
77
+ background: var(--panel);
78
+ border-radius: 16px;
79
+ padding: 16px 20px;
80
+ }
81
+
82
+ #header-wrap h1 {
83
+ margin: 0;
84
+ letter-spacing: 0.2px;
85
+ font-weight: 600;
86
+ color: #f5f7fb;
87
+ }
88
+
89
+ #header-wrap p {
90
+ margin: 6px 0 0;
91
+ color: var(--text-dim);
92
+ }
93
+
94
+ .panel {
95
+ border: 1px solid var(--panel-border);
96
+ border-radius: 16px;
97
+ background: var(--panel);
98
+ overflow: hidden;
99
+ }
100
+
101
+ .panel-title {
102
+ padding: 10px 14px;
103
+ border-bottom: 1px solid var(--panel-border);
104
+ color: var(--text-dim);
105
+ font-size: 12px;
106
+ letter-spacing: 0.09em;
107
+ text-transform: uppercase;
108
+ }
109
+
110
+ .code-panel * {
111
+ font-family: 'JetBrains Mono', monospace !important;
112
+ }
113
+
114
+ .terminal-wrap {
115
+ height: 620px;
116
+ overflow-y: auto;
117
+ padding: 12px;
118
+ font-family: 'JetBrains Mono', monospace;
119
+ font-size: 12px;
120
+ line-height: 1.55;
121
+ background: #0c0f16;
122
+ }
123
+
124
+ .term-line {
125
+ white-space: pre-wrap;
126
+ word-break: break-word;
127
+ }
128
+
129
+ .term-step { color: var(--accent); }
130
+ .term-start { color: #c8d7ff; }
131
+ .term-end { color: var(--ok); font-weight: 600; }
132
+ .term-thought { color: #b9c7ff; }
133
+ .term-error { color: var(--err); }
134
+ .term-muted { color: var(--text-dim); }
135
+
136
+ .metric {
137
+ border: 1px solid var(--panel-border);
138
+ background: var(--panel);
139
+ border-radius: 14px;
140
+ padding: 12px;
141
+ }
142
+ """
143
+
144
+
145
+ def _code_from_task_name(task_name: str) -> str:
146
+ task = TASK_MAP.get((task_name or "").strip())
147
+ if not task:
148
+ return (
149
+ "# Waiting for mission start...\n"
150
+ "# Tip: Set TASK_NAME to one of the known tasks from tasks.py\n"
151
+ "# so the buggy sandbox code can be previewed before launch."
152
+ )
153
+ return "\n".join(task.get("code", []))
154
+
155
+
156
+ def _normalize_base_url(base_url: str) -> str:
157
+ candidate = (base_url or "").strip()
158
+ if not candidate:
159
+ return f"http://{BACKEND_HOST}:{BACKEND_PORT}"
160
+ if not candidate.startswith(("http://", "https://")):
161
+ candidate = f"http://{candidate}"
162
+ return candidate.rstrip("/")
163
+
164
+
165
+ def _code_from_openenv(task_name: str, env_base_url: str) -> str | None:
166
+ normalized_url = _normalize_base_url(env_base_url)
167
+ task_key = (task_name or "").strip()
168
+ if not task_key:
169
+ return None
170
+
171
+ candidates = [
172
+ f"{normalized_url}/tasks/{task_key}/code",
173
+ f"{normalized_url}/task/{task_key}/code",
174
+ f"{normalized_url}/tasks/{task_key}",
175
+ f"{normalized_url}/task/{task_key}",
176
+ ]
177
+
178
+ for url in candidates:
179
+ try:
180
+ req = urllib.request.Request(url, method="GET")
181
+ with urllib.request.urlopen(req, timeout=3) as response:
182
+ if response.status != 200:
183
+ continue
184
+ payload = json.loads(response.read().decode("utf-8"))
185
+ except (urllib.error.URLError, urllib.error.HTTPError, TimeoutError, ValueError):
186
+ continue
187
+
188
+ if isinstance(payload, dict):
189
+ code = payload.get("code")
190
+ if isinstance(code, list):
191
+ return "\n".join(str(line) for line in code)
192
+ if isinstance(code, str):
193
+ return code
194
+
195
+ task_data = payload.get("task")
196
+ if isinstance(task_data, dict):
197
+ task_code = task_data.get("code")
198
+ if isinstance(task_code, list):
199
+ return "\n".join(str(line) for line in task_code)
200
+ if isinstance(task_code, str):
201
+ return task_code
202
+ return None
203
+
204
+
205
+ def load_code(task_name: str, env_base_url: str) -> str:
206
+ local_code = _code_from_task_name(task_name)
207
+ if "Waiting for mission start" not in local_code:
208
+ return local_code
209
+
210
+ api_code = _code_from_openenv(task_name, env_base_url)
211
+ if api_code:
212
+ return api_code
213
+
214
+ return (
215
+ "# Unable to load code for the selected task.\n"
216
+ "# Verify Task / Bug Selection and confirm OpenEnv API is reachable."
217
+ )
218
+
219
+
220
+ def _solution_from_task_name(task_name: str) -> str | None:
221
+ task = TASK_MAP.get((task_name or "").strip())
222
+ if not task:
223
+ return None
224
+ return "\n".join(task.get("solution", []))
225
+
226
+
227
+ def _terminal_html(lines: list[tuple[str, str]]) -> str:
228
+ rendered: list[str] = []
229
+ for css_class, text in lines:
230
+ safe = html.escape(text)
231
+ rendered.append(f"<div class='term-line {css_class}'>{safe}</div>")
232
+ content = "\n".join(rendered) if rendered else "<div class='term-line term-muted'>Idle. Configure mission variables and press Run Agent.</div>"
233
+ return (
234
+ "<div id='terminal' class='terminal-wrap'>"
235
+ f"{content}"
236
+ "</div>"
237
+ "<script>"
238
+ "const t=document.getElementById('terminal'); if(t){t.scrollTop=t.scrollHeight;}"
239
+ "</script>"
240
+ )
241
+
242
+
243
+ def _metric_block(state: str, details: str) -> str:
244
+ return (
245
+ "<div class='metric'>"
246
+ f"<div><strong>{html.escape(state)}</strong></div>"
247
+ f"<div style='color:var(--text-dim); margin-top: 6px'>{html.escape(details)}</div>"
248
+ "</div>"
249
+ )
250
+
251
+
252
+ def _reader_thread(stream: Any, source: str, out_q: queue.Queue[tuple[str, str | None]]) -> None:
253
+ try:
254
+ for raw in iter(stream.readline, ""):
255
+ out_q.put((source, raw.rstrip("\n")))
256
+ finally:
257
+ try:
258
+ stream.close()
259
+ except Exception:
260
+ pass
261
+ out_q.put((source, None))
262
+
263
+
264
+ def _build_env(
265
+ hf_token: str,
266
+ api_base_url: str,
267
+ model_name: str,
268
+ env_base_url: str,
269
+ task_name: str,
270
+ benchmark: str,
271
+ max_steps: int,
272
+ success_score_threshold: float,
273
+ local_image_name: str,
274
+ ) -> dict[str, str]:
275
+ env = os.environ.copy()
276
+ updates = {
277
+ "HF_TOKEN": hf_token,
278
+ "API_BASE_URL": api_base_url,
279
+ "MODEL_NAME": model_name,
280
+ "ENV_BASE_URL": _normalize_base_url(env_base_url),
281
+ "TASK_NAME": task_name,
282
+ "BENCHMARK": benchmark,
283
+ "MAX_STEPS": str(int(max_steps)),
284
+ "SUCCESS_SCORE_THRESHOLD": str(float(success_score_threshold)),
285
+ "LOCAL_IMAGE_NAME": local_image_name,
286
+ }
287
+ for key, value in updates.items():
288
+ cleaned = (value or "").strip()
289
+ if cleaned:
290
+ env[key] = cleaned
291
+ elif key in env:
292
+ env.pop(key, None)
293
+ return env
294
+
295
+
296
+ def _is_port_open(host: str, port: int) -> bool:
297
+ with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
298
+ sock.settimeout(0.5)
299
+ return sock.connect_ex((host, port)) == 0
300
+
301
+
302
+ def _start_backend_server() -> None:
303
+ if _is_port_open(BACKEND_HOST, BACKEND_PORT):
304
+ return
305
+
306
+ backend_env = os.environ.copy()
307
+ backend_env["HOST"] = BACKEND_HOST
308
+ backend_env["PORT"] = str(BACKEND_PORT)
309
+
310
+ subprocess.Popen(
311
+ [sys.executable, "-m", "server.app"],
312
+ cwd=str(ROOT_DIR),
313
+ env=backend_env,
314
+ stdout=subprocess.DEVNULL,
315
+ stderr=subprocess.DEVNULL,
316
+ )
317
+
318
+ for _ in range(20):
319
+ if _is_port_open(BACKEND_HOST, BACKEND_PORT):
320
+ return
321
+ time.sleep(0.1)
322
+
323
+
324
+ def _reset_run_state(task_name: str) -> tuple[str, str, str, float, str]:
325
+ return (
326
+ _code_from_task_name(task_name),
327
+ _terminal_html([]),
328
+ _metric_block("Mission Ready", "Awaiting [START] from inference subprocess..."),
329
+ 0.0,
330
+ "`Rewards:` pending",
331
+ )
332
+
333
+
334
+ def run_agent(
335
+ hf_token: str,
336
+ api_base_url: str,
337
+ model_name: str,
338
+ env_base_url: str,
339
+ task_name: str,
340
+ benchmark: str,
341
+ max_steps: int,
342
+ success_score_threshold: float,
343
+ local_image_name: str,
344
+ difficulty: str,
345
+ show_thought: bool,
346
+ ) -> Generator[tuple[str, str, str, float, str], None, None]:
347
+ code_view = _code_from_task_name(task_name)
348
+ terminal_lines: list[tuple[str, str]] = []
349
+ terminal_lines.append(("term-muted", "Boot sequence initialized."))
350
+
351
+ status_html = _metric_block("Mission Ready", "Launching inference subprocess...")
352
+ score_value = 0.0
353
+ rewards_md = "`Rewards:` pending"
354
+ yield code_view, _terminal_html(terminal_lines), status_html, score_value, rewards_md
355
+
356
+ cmd = [sys.executable, str(INFERENCE_PATH)]
357
+ if difficulty in {"easy", "medium", "hard"}:
358
+ cmd.append(f"--{difficulty}")
359
+ if show_thought:
360
+ cmd.append("--thought")
361
+
362
+ env = _build_env(
363
+ hf_token,
364
+ api_base_url,
365
+ model_name,
366
+ env_base_url,
367
+ task_name,
368
+ benchmark,
369
+ max_steps,
370
+ success_score_threshold,
371
+ local_image_name,
372
+ )
373
+
374
+ process = subprocess.Popen(
375
+ cmd,
376
+ cwd=str(ROOT_DIR),
377
+ env=env,
378
+ stdout=subprocess.PIPE,
379
+ stderr=subprocess.PIPE,
380
+ text=True,
381
+ bufsize=1,
382
+ )
383
+
384
+ out_q: queue.Queue[tuple[str, str | None]] = queue.Queue()
385
+ stdout_thread = threading.Thread(target=_reader_thread, args=(process.stdout, "stdout", out_q), daemon=True)
386
+ stderr_thread = threading.Thread(target=_reader_thread, args=(process.stderr, "stderr", out_q), daemon=True)
387
+ stdout_thread.start()
388
+ stderr_thread.start()
389
+
390
+ ended_streams: set[str] = set()
391
+ thought_mode = False
392
+ active_task_name = (task_name or "").strip()
393
+ final_steps = 0
394
+
395
+ while True:
396
+ try:
397
+ source, line = out_q.get(timeout=0.15)
398
+ except queue.Empty:
399
+ if process.poll() is not None and ended_streams == {"stdout", "stderr"}:
400
+ break
401
+ continue
402
+
403
+ if line is None:
404
+ ended_streams.add(source)
405
+ if process.poll() is not None and ended_streams == {"stdout", "stderr"}:
406
+ break
407
+ continue
408
+
409
+ if source == "stderr":
410
+ if line.strip() == "[THOUGHT]":
411
+ thought_mode = True
412
+ terminal_lines.append(("term-thought", "[THOUGHT]"))
413
+ elif line.startswith("[") and line.endswith("]"):
414
+ thought_mode = False
415
+ terminal_lines.append(("term-muted", line))
416
+ elif thought_mode:
417
+ terminal_lines.append(("term-thought", line))
418
+ else:
419
+ terminal_lines.append(("term-error", line))
420
+ else:
421
+ start_match = START_RE.match(line)
422
+ step_match = STEP_RE.match(line)
423
+ end_match = END_RE.match(line)
424
+
425
+ if start_match:
426
+ active_task_name = start_match.group("task").strip()
427
+ task_preview = _code_from_task_name(active_task_name)
428
+ if "Waiting for mission start" not in task_preview:
429
+ code_view = task_preview
430
+ terminal_lines.append(("term-start", line))
431
+ status_html = _metric_block(
432
+ "Mission Running",
433
+ f"task={active_task_name} | env={start_match.group('env')} | model={start_match.group('model')}",
434
+ )
435
+ elif step_match:
436
+ final_steps = int(step_match.group("step"))
437
+ action = step_match.group("action")
438
+ reward = float(step_match.group("reward"))
439
+ done_flag = step_match.group("done") == "true"
440
+ err = step_match.group("error")
441
+ css = "term-step" if err == "null" else "term-error"
442
+ terminal_lines.append((css, line))
443
+ status_html = _metric_block(
444
+ "Mission Running",
445
+ f"step={final_steps} action={action} reward={reward:.2f} done={str(done_flag).lower()}",
446
+ )
447
+ elif end_match:
448
+ success = end_match.group("success") == "true"
449
+ final_steps = int(end_match.group("steps"))
450
+ score_value = float(end_match.group("score"))
451
+ rewards_raw = end_match.group("rewards").strip()
452
+ rewards_md = f"`Rewards:` {rewards_raw or 'none'}"
453
+ terminal_lines.append(("term-end", line))
454
+ if success:
455
+ solved = _solution_from_task_name(active_task_name)
456
+ if solved:
457
+ code_view = solved
458
+ status_html = _metric_block(
459
+ "Mission Success",
460
+ f"score={score_value:.2f} | steps={final_steps}",
461
+ )
462
+ else:
463
+ status_html = _metric_block(
464
+ "Mission Failed",
465
+ f"score={score_value:.2f} | steps={final_steps}",
466
+ )
467
+ else:
468
+ terminal_lines.append(("term-muted", line))
469
+
470
+ if len(terminal_lines) > 500:
471
+ terminal_lines = terminal_lines[-500:]
472
+
473
+ yield code_view, _terminal_html(terminal_lines), status_html, score_value, rewards_md
474
+
475
+ return_code = process.wait(timeout=2)
476
+ if return_code != 0:
477
+ terminal_lines.append(("term-error", f"Process exited with code {return_code}."))
478
+ status_html = _metric_block(
479
+ "Mission Error",
480
+ f"inference.py exited non-zero (code={return_code})",
481
+ )
482
+
483
+ if len(terminal_lines) > 500:
484
+ terminal_lines = terminal_lines[-500:]
485
+
486
+ yield code_view, _terminal_html(terminal_lines), status_html, score_value, rewards_md
487
+
488
+
489
+ with gr.Blocks(theme=gr.themes.Monochrome(), css=CSS, title="TraceFix-RL Mission Control") as demo:
490
+ gr.HTML(
491
+ """
492
+ <div id='header-wrap'>
493
+ <h1>TraceFix-RL: Autonomous Debugging Agent</h1>
494
+ <p>Mission Control UI for real-time agent orchestration on Hugging Face Spaces.</p>
495
+ </div>
496
+ """
497
+ )
498
+
499
+ if hasattr(gr, "Sidebar"):
500
+ sidebar_context = gr.Sidebar()
501
+ else:
502
+ sidebar_context = gr.Column()
503
+
504
+ with sidebar_context:
505
+ gr.Markdown("### Runtime Inputs")
506
+ hf_token = gr.Textbox(label="HF Token", type="password", placeholder="hf_xxx")
507
+ task_choices = sorted(TASK_MAP.keys())
508
+ selected_task = os.getenv("TASK_NAME", "")
509
+ with gr.Row():
510
+ task_name = gr.Dropdown(
511
+ label="Task / Bug Selection",
512
+ choices=task_choices,
513
+ value=selected_task if selected_task else None,
514
+ allow_custom_value=True,
515
+ interactive=True,
516
+ )
517
+ load_code_button = gr.Button("Load Code")
518
+ model_name = gr.Textbox(label="Model Name", value=os.getenv("MODEL_NAME", "openai/gpt-oss-20b"))
519
+ api_base_url = gr.Textbox(label="API Base URL", value=os.getenv("API_BASE_URL", "https://router.huggingface.co/v1"))
520
+ env_base_url = gr.Textbox(label="Env Base URL", value=os.getenv("ENV_BASE_URL", f"http://{BACKEND_HOST}:{BACKEND_PORT}"))
521
+ benchmark = gr.Textbox(label="Benchmark", value=os.getenv("BENCHMARK", "tracefix_rl"))
522
+ local_image_name = gr.Textbox(label="Local Image Name", value=os.getenv("LOCAL_IMAGE_NAME", ""), placeholder="optional")
523
+ max_steps = gr.Number(label="Max Steps", value=int(os.getenv("MAX_STEPS", "50")), precision=0)
524
+ success_score_threshold = gr.Number(
525
+ label="Success Score Threshold",
526
+ value=float(os.getenv("SUCCESS_SCORE_THRESHOLD", "0.99")),
527
+ precision=2,
528
+ )
529
+ difficulty = gr.Dropdown(label="Difficulty", choices=["auto", "easy", "medium", "hard"], value="auto")
530
+ show_thought = gr.Checkbox(label="Stream Thought Trace", value=True)
531
+ run_button = gr.Button("Run Agent", variant="primary")
532
+
533
+ with gr.Row(equal_height=True):
534
+ with gr.Column(scale=1, elem_classes=["panel", "code-panel"]):
535
+ gr.HTML("<div class='panel-title'>The Sandbox</div>")
536
+ code_view = gr.Code(
537
+ language="python",
538
+ interactive=False,
539
+ value=_code_from_task_name(selected_task),
540
+ lines=30,
541
+ )
542
+
543
+ with gr.Column(scale=1, elem_classes=["panel"]):
544
+ gr.HTML("<div class='panel-title'>The Terminal</div>")
545
+ terminal = gr.HTML(_terminal_html([]))
546
+
547
+ with gr.Row():
548
+ metric = gr.HTML(_metric_block("Idle", "Waiting for launch."))
549
+ score = gr.Number(label="Final Score", value=0.0, precision=3)
550
+ rewards = gr.Markdown("`Rewards:` pending")
551
+
552
+ load_code_button.click(load_code, inputs=[task_name, env_base_url], outputs=[code_view])
553
+
554
+ run_event = run_button.click(
555
+ _reset_run_state,
556
+ inputs=[task_name],
557
+ outputs=[code_view, terminal, metric, score, rewards],
558
+ queue=False,
559
+ )
560
+
561
+ run_event.then(
562
+ run_agent,
563
+ inputs=[
564
+ hf_token,
565
+ api_base_url,
566
+ model_name,
567
+ env_base_url,
568
+ task_name,
569
+ benchmark,
570
+ max_steps,
571
+ success_score_threshold,
572
+ local_image_name,
573
+ difficulty,
574
+ show_thought,
575
+ ],
576
+ outputs=[code_view, terminal, metric, score, rewards],
577
+ )
578
+
579
+
580
+ if __name__ == "__main__":
581
+ _start_backend_server()
582
+ demo.queue().launch(server_name=GRADIO_HOST, server_port=GRADIO_PORT)
inference.py CHANGED
@@ -42,7 +42,7 @@ MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-20b")
42
  HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("API_KEY") or "lm-studio"
43
  LOCAL_IMAGE_NAME = os.getenv("LOCAL_IMAGE_NAME")
44
 
45
- ENV_BASE_URL = os.getenv("ENV_BASE_URL", "127.0.0.1:7860")
46
  TASK_NAME = os.getenv("TASK_NAME", "tracefix_rl")
47
  BENCHMARK = os.getenv("BENCHMARK", "tracefix_rl")
48
  MAX_STEPS = int(os.getenv("MAX_STEPS", "50"))
 
42
  HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("API_KEY") or "lm-studio"
43
  LOCAL_IMAGE_NAME = os.getenv("LOCAL_IMAGE_NAME")
44
 
45
+ ENV_BASE_URL = os.getenv("ENV_BASE_URL", "http://127.0.0.1:8000")
46
  TASK_NAME = os.getenv("TASK_NAME", "tracefix_rl")
47
  BENCHMARK = os.getenv("BENCHMARK", "tracefix_rl")
48
  MAX_STEPS = int(os.getenv("MAX_STEPS", "50"))