databoysu commited on
Commit
547b9d6
·
1 Parent(s): c8cdf7a

improving ui, persona theme

Browse files
Files changed (1) hide show
  1. vision_ui.py +184 -108
vision_ui.py CHANGED
@@ -18,10 +18,15 @@ import gradio as gr
18
  try:
19
  from tasks import tasks
20
  ALL_TASKS = tasks.ALL_TASKS
 
21
  except Exception:
22
  tasks = None
23
  ALL_TASKS = []
 
24
 
 
 
 
25
 
26
  ROOT_DIR = Path(__file__).resolve().parent
27
  INFERENCE_PATH = ROOT_DIR / "inference.py"
@@ -46,54 +51,53 @@ TASK_MAP: dict[str, dict[str, Any]] = {
46
  if isinstance(task, dict) and task.get("name")
47
  }
48
 
49
-
50
  CSS = """
51
- @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=JetBrains+Mono:wght@400;600&display=swap');
52
 
53
  :root {
54
- --bg-top: #0f1115;
55
- --bg-bottom: #1a1e27;
56
- --panel: rgba(255, 255, 255, 0.04);
57
- --panel-border: rgba(255, 255, 255, 0.12);
58
- --text-main: #e7e9ef;
59
- --text-dim: #aab1c2;
60
- --accent: #91c6ff;
61
- --ok: #6ce7b5;
62
- --warn: #f9d78b;
63
- --err: #ff9b9b;
64
  }
65
 
66
  .gradio-container {
67
  font-family: 'Inter', sans-serif !important;
68
- background: radial-gradient(circle at 20% 0%, #202636 0%, transparent 40%),
69
- linear-gradient(180deg, var(--bg-top) 0%, var(--bg-bottom) 100%);
70
  color: var(--text-main);
71
  }
72
 
73
  #header-wrap {
74
- margin-bottom: 10px;
75
- border: 1px solid var(--panel-border);
76
- background: var(--panel);
77
- border-radius: 16px;
78
  padding: 16px 20px;
 
79
  }
80
 
81
  #header-wrap h1 {
82
  margin: 0;
83
- letter-spacing: 0.2px;
84
- font-weight: 600;
85
- color: #f5f7fb;
 
 
86
  }
87
 
88
  #header-wrap p {
89
  margin: 6px 0 0;
90
- color: var(--text-dim);
 
91
  }
92
 
93
  .panel {
94
  border: 1px solid var(--panel-border);
95
- border-radius: 16px;
96
- background: var(--panel);
97
  overflow: hidden;
98
  }
99
 
@@ -101,9 +105,28 @@ CSS = """
101
  padding: 10px 14px;
102
  border-bottom: 1px solid var(--panel-border);
103
  color: var(--text-dim);
104
- font-size: 12px;
105
- letter-spacing: 0.09em;
106
  text-transform: uppercase;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  }
108
 
109
  .code-panel * {
@@ -111,13 +134,14 @@ CSS = """
111
  }
112
 
113
  .terminal-wrap {
114
- height: 620px;
115
  overflow-y: auto;
116
  padding: 12px;
117
  font-family: 'JetBrains Mono', monospace;
118
- font-size: 12px;
119
- line-height: 1.55;
120
- background: #0c0f16;
 
121
  }
122
 
123
  .term-line {
@@ -125,33 +149,33 @@ CSS = """
125
  word-break: break-word;
126
  }
127
 
128
- .term-step { color: var(--accent); }
129
- .term-start { color: #c8d7ff; }
130
- .term-end { color: var(--ok); font-weight: 600; }
131
- .term-thought { color: #b9c7ff; }
132
- .term-error { color: var(--err); }
133
- .term-muted { color: var(--text-dim); }
 
134
 
135
  .metric {
136
  border: 1px solid var(--panel-border);
137
- background: var(--panel);
138
- border-radius: 14px;
139
  padding: 12px;
 
140
  }
141
  """
142
 
143
-
144
  def _code_from_task_name(task_name: str) -> str:
145
  task = TASK_MAP.get((task_name or "").strip())
146
  if not task:
147
  return (
148
  "# Waiting for mission start...\n"
149
- "# Tip: Set TASK_NAME to one of the known tasks from tasks.py\n"
150
  "# so the buggy sandbox code can be previewed before launch."
151
  )
152
  return "\n".join(task.get("code", []))
153
 
154
-
155
  def _normalize_base_url(base_url: str) -> str:
156
  candidate = (base_url or "").strip()
157
  if not candidate:
@@ -160,7 +184,6 @@ def _normalize_base_url(base_url: str) -> str:
160
  candidate = f"http://{candidate}"
161
  return candidate.rstrip("/")
162
 
163
-
164
  def _code_from_openenv(task_name: str, env_base_url: str) -> str | None:
165
  normalized_url = _normalize_base_url(env_base_url)
166
  task_key = (task_name or "").strip()
@@ -200,7 +223,6 @@ def _code_from_openenv(task_name: str, env_base_url: str) -> str | None:
200
  return task_code
201
  return None
202
 
203
-
204
  def load_code(task_name: str, env_base_url: str) -> str:
205
  local_code = _code_from_task_name(task_name)
206
  if "Waiting for mission start" not in local_code:
@@ -215,20 +237,18 @@ def load_code(task_name: str, env_base_url: str) -> str:
215
  "# Verify Task / Bug Selection and confirm OpenEnv API is reachable."
216
  )
217
 
218
-
219
  def _solution_from_task_name(task_name: str) -> str | None:
220
  task = TASK_MAP.get((task_name or "").strip())
221
  if not task:
222
  return None
223
  return "\n".join(task.get("solution", []))
224
 
225
-
226
  def _terminal_html(lines: list[tuple[str, str]]) -> str:
227
  rendered: list[str] = []
228
  for css_class, text in lines:
229
  safe = html.escape(text)
230
  rendered.append(f"<div class='term-line {css_class}'>{safe}</div>")
231
- content = "\n".join(rendered) if rendered else "<div class='term-line term-muted'>Idle. Configure mission variables and press Run Agent.</div>"
232
  return (
233
  "<div id='terminal' class='terminal-wrap'>"
234
  f"{content}"
@@ -238,7 +258,6 @@ def _terminal_html(lines: list[tuple[str, str]]) -> str:
238
  "</script>"
239
  )
240
 
241
-
242
  def _metric_block(state: str, details: str) -> str:
243
  return (
244
  "<div class='metric'>"
@@ -247,6 +266,13 @@ def _metric_block(state: str, details: str) -> str:
247
  "</div>"
248
  )
249
 
 
 
 
 
 
 
 
250
 
251
  def _reader_thread(stream: Any, source: str, out_q: queue.Queue[tuple[str, str | None]]) -> None:
252
  try:
@@ -259,7 +285,6 @@ def _reader_thread(stream: Any, source: str, out_q: queue.Queue[tuple[str, str |
259
  pass
260
  out_q.put((source, None))
261
 
262
-
263
  def _build_env(
264
  hf_token: str,
265
  api_base_url: str,
@@ -291,38 +316,44 @@ def _build_env(
291
  env.pop(key, None)
292
  return env
293
 
 
 
294
 
295
- def _reset_run_state(task_name: str) -> tuple[str, str, str, float, str]:
 
296
  return (
297
  _code_from_task_name(task_name),
298
  _terminal_html([]),
299
  _metric_block("Mission Ready", "Awaiting [START] from inference subprocess..."),
300
  0.0,
301
- "`Rewards:` pending",
302
  )
303
 
304
-
305
  def run_agent(
 
 
 
306
  hf_token: str,
307
  api_base_url: str,
308
  model_name: str,
309
  env_base_url: str,
310
- task_name: str,
311
  benchmark: str,
312
  max_steps: int,
313
  success_score_threshold: float,
314
  local_image_name: str,
315
  difficulty: str,
316
  show_thought: bool,
317
- ) -> Generator[tuple[str, str, str, float, str], None, None]:
 
 
318
  code_view = _code_from_task_name(task_name)
319
  terminal_lines: list[tuple[str, str]] = []
320
- terminal_lines.append(("term-muted", "Boot sequence initialized."))
321
 
322
- status_html = _metric_block("Mission Ready", "Launching inference subprocess...")
323
  score_value = 0.0
324
  rewards_md = "`Rewards:` pending"
325
- yield code_view, _terminal_html(terminal_lines), status_html, score_value, rewards_md
326
 
327
  cmd = [sys.executable, str(INFERENCE_PATH)]
328
  if difficulty in {"easy", "medium", "hard"}:
@@ -360,7 +391,7 @@ def run_agent(
360
 
361
  ended_streams: set[str] = set()
362
  thought_mode = False
363
- active_task_name = (task_name or "").strip()
364
  final_steps = 0
365
 
366
  while True:
@@ -380,15 +411,25 @@ def run_agent(
380
  if source == "stderr":
381
  if line.strip() == "[THOUGHT]":
382
  thought_mode = True
383
- terminal_lines.append(("term-thought", "[THOUGHT]"))
 
384
  elif line.startswith("[") and line.endswith("]"):
385
  thought_mode = False
386
- terminal_lines.append(("term-muted", line))
387
  elif thought_mode:
388
- terminal_lines.append(("term-thought", line))
 
389
  else:
390
- terminal_lines.append(("term-error", line))
 
 
 
 
391
  else:
 
 
 
 
392
  start_match = START_RE.match(line)
393
  step_match = STEP_RE.match(line)
394
  end_match = END_RE.match(line)
@@ -398,7 +439,7 @@ def run_agent(
398
  task_preview = _code_from_task_name(active_task_name)
399
  if "Waiting for mission start" not in task_preview:
400
  code_view = task_preview
401
- terminal_lines.append(("term-start", line))
402
  status_html = _metric_block(
403
  "Mission Running",
404
  f"task={active_task_name} | env={start_match.group('env')} | model={start_match.group('model')}",
@@ -409,7 +450,7 @@ def run_agent(
409
  reward = float(step_match.group("reward"))
410
  done_flag = step_match.group("done") == "true"
411
  err = step_match.group("error")
412
- css = "term-step" if err == "null" else "term-error"
413
  terminal_lines.append((css, line))
414
  status_html = _metric_block(
415
  "Mission Running",
@@ -421,7 +462,7 @@ def run_agent(
421
  score_value = float(end_match.group("score"))
422
  rewards_raw = end_match.group("rewards").strip()
423
  rewards_md = f"`Rewards:` {rewards_raw or 'none'}"
424
- terminal_lines.append(("term-end", line))
425
  if success:
426
  solved = _solution_from_task_name(active_task_name)
427
  if solved:
@@ -436,16 +477,16 @@ def run_agent(
436
  f"score={score_value:.2f} | steps={final_steps}",
437
  )
438
  else:
439
- terminal_lines.append(("term-muted", line))
440
 
441
  if len(terminal_lines) > 500:
442
  terminal_lines = terminal_lines[-500:]
443
 
444
- yield code_view, _terminal_html(terminal_lines), status_html, score_value, rewards_md
445
 
446
  return_code = process.wait(timeout=2)
447
  if return_code != 0:
448
- terminal_lines.append(("term-error", f"Process exited with code {return_code}."))
449
  status_html = _metric_block(
450
  "Mission Error",
451
  f"inference.py exited non-zero (code={return_code})",
@@ -454,15 +495,16 @@ def run_agent(
454
  if len(terminal_lines) > 500:
455
  terminal_lines = terminal_lines[-500:]
456
 
457
- yield code_view, _terminal_html(terminal_lines), status_html, score_value, rewards_md
458
 
459
 
460
- with gr.Blocks(title="TraceFix-RL Mission Control", css=CSS) as demo:
461
  gr.HTML(
462
- """
 
463
  <div id='header-wrap'>
464
- <h1>TraceFix-RL: Autonomous Debugging Agent</h1>
465
- <p>Mission Control UI for real-time agent orchestration on Hugging Face Spaces.</p>
466
  </div>
467
  """
468
  )
@@ -473,70 +515,104 @@ with gr.Blocks(title="TraceFix-RL Mission Control", css=CSS) as demo:
473
  sidebar_context = gr.Column()
474
 
475
  with sidebar_context:
476
- gr.Markdown("### Runtime Inputs")
 
477
  hf_token = gr.Textbox(label="HF Token", type="password", placeholder="hf_xxx")
478
- task_choices = sorted(TASK_MAP.keys())
479
- selected_task = os.getenv("TASK_NAME", "")
480
- with gr.Row():
481
- task_name = gr.Dropdown(
482
- label="Task / Bug Selection",
483
- choices=task_choices,
484
- value=selected_task if selected_task else None,
485
- allow_custom_value=True,
486
- interactive=True,
487
- )
488
- load_code_button = gr.Button("Load Code")
489
- model_name = gr.Textbox(label="Model Name", value=os.getenv("MODEL_NAME", "openai/gpt-oss-20b"))
490
- api_base_url = gr.Textbox(label="API Base URL", value=os.getenv("API_BASE_URL", "https://router.huggingface.co/v1"))
491
- env_base_url = gr.Textbox(label="Env Base URL", value=os.getenv("ENV_BASE_URL", f"http://{BACKEND_HOST}:{BACKEND_PORT}"))
492
- benchmark = gr.Textbox(label="Benchmark", value=os.getenv("BENCHMARK", "tracefix_rl"))
493
- local_image_name = gr.Textbox(label="Local Image Name", value=os.getenv("LOCAL_IMAGE_NAME", ""), placeholder="optional")
494
- max_steps = gr.Number(label="Max Steps", value=int(os.getenv("MAX_STEPS", "50")), precision=0)
495
- success_score_threshold = gr.Number(
496
- label="Success Score Threshold",
497
- value=float(os.getenv("SUCCESS_SCORE_THRESHOLD", "0.99")),
498
- precision=2,
499
- )
500
- difficulty = gr.Dropdown(label="Difficulty", choices=["auto", "easy", "medium", "hard"], value="auto")
501
- show_thought = gr.Checkbox(label="Stream Thought Trace", value=True)
502
- run_button = gr.Button("Run Agent", variant="primary")
503
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
504
  with gr.Row(equal_height=True):
505
  with gr.Column(scale=1, elem_classes=["panel", "code-panel"]):
506
- gr.HTML("<div class='panel-title'>The Sandbox</div>")
507
  code_view = gr.Code(
508
  language="python",
509
  interactive=False,
510
- value=_code_from_task_name(selected_task),
511
  lines=30,
512
  )
513
 
514
  with gr.Column(scale=1, elem_classes=["panel"]):
515
- gr.HTML("<div class='panel-title'>The Terminal</div>")
516
  terminal = gr.HTML(_terminal_html([]))
517
 
518
  with gr.Row():
519
- metric = gr.HTML(_metric_block("Idle", "Waiting for launch."))
520
  score = gr.Number(label="Final Score", value=0.0, precision=3)
521
  rewards = gr.Markdown("`Rewards:` pending")
522
 
523
- load_code_button.click(load_code, inputs=[task_name, env_base_url], outputs=[code_view])
524
-
525
- run_event = run_button.click(
 
 
 
 
 
 
 
 
 
 
 
 
526
  _reset_run_state,
527
- inputs=[task_name],
528
  outputs=[code_view, terminal, metric, score, rewards],
529
  queue=False,
530
  )
531
 
 
532
  run_event.then(
533
  run_agent,
534
  inputs=[
 
 
 
535
  hf_token,
536
  api_base_url,
537
  model_name,
538
  env_base_url,
539
- task_name,
540
  benchmark,
541
  max_steps,
542
  success_score_threshold,
@@ -544,5 +620,5 @@ with gr.Blocks(title="TraceFix-RL Mission Control", css=CSS) as demo:
544
  difficulty,
545
  show_thought,
546
  ],
547
- outputs=[code_view, terminal, metric, score, rewards],
548
  )
 
18
  try:
19
  from tasks import tasks
20
  ALL_TASKS = tasks.ALL_TASKS
21
+ TASKS_BY_DIFFICULTY = tasks.TASKS_BY_DIFFICULTY
22
  except Exception:
23
  tasks = None
24
  ALL_TASKS = []
25
+ TASKS_BY_DIFFICULTY = {"easy": [], "medium": [], "hard": []}
26
 
27
+ EASY_CHOICES = [t.get("name") for t in TASKS_BY_DIFFICULTY.get("easy", []) if t.get("name")]
28
+ MEDIUM_CHOICES = [t.get("name") for t in TASKS_BY_DIFFICULTY.get("medium", []) if t.get("name")]
29
+ HARD_CHOICES = [t.get("name") for t in TASKS_BY_DIFFICULTY.get("hard", []) if t.get("name")]
30
 
31
  ROOT_DIR = Path(__file__).resolve().parent
32
  INFERENCE_PATH = ROOT_DIR / "inference.py"
 
51
  if isinstance(task, dict) and task.get("name")
52
  }
53
 
 
54
  CSS = """
55
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;900&family=JetBrains+Mono:wght@400;600;800&display=swap');
56
 
57
  :root {
58
+ --bg-top: #0a0a0a;
59
+ --bg-bottom: #111111;
60
+ --panel: #0a0a0a;
61
+ --panel-border: rgba(255, 255, 255, 0.15);
62
+ --text-main: #f5f5f5;
63
+ --text-dim: #999;
64
+ --accent: #E60012;
 
 
 
65
  }
66
 
67
  .gradio-container {
68
  font-family: 'Inter', sans-serif !important;
69
+ background: var(--bg-top);
 
70
  color: var(--text-main);
71
  }
72
 
73
  #header-wrap {
74
+ margin-bottom: 5px;
75
+ border: 1px solid var(--accent);
76
+ background: #000;
77
+ border-radius: 0px;
78
  padding: 16px 20px;
79
+ text-transform: uppercase;
80
  }
81
 
82
  #header-wrap h1 {
83
  margin: 0;
84
+ letter-spacing: 2px;
85
+ font-weight: 900;
86
+ color: #fff;
87
+ font-style: italic;
88
+ text-shadow: 2px 2px #E60012;
89
  }
90
 
91
  #header-wrap p {
92
  margin: 6px 0 0;
93
+ color: #fff;
94
+ font-weight: 500;
95
  }
96
 
97
  .panel {
98
  border: 1px solid var(--panel-border);
99
+ border-radius: 0px !important;
100
+ background: var(--panel) !important;
101
  overflow: hidden;
102
  }
103
 
 
105
  padding: 10px 14px;
106
  border-bottom: 1px solid var(--panel-border);
107
  color: var(--text-dim);
108
+ font-size: 14px;
109
+ letter-spacing: 0.1em;
110
  text-transform: uppercase;
111
+ font-weight: bold;
112
+ }
113
+
114
+ #execute-btn {
115
+ background: var(--accent) !important;
116
+ color: #fff !important;
117
+ border-radius: 0px !important;
118
+ font-weight: 900 !important;
119
+ font-size: 18px !important;
120
+ text-transform: uppercase !important;
121
+ border: none !important;
122
+ transition: all 0.2s ease !important;
123
+ height: 60px !important;
124
+ }
125
+
126
+ #execute-btn:hover {
127
+ background: #fff !important;
128
+ color: var(--accent) !important;
129
+ box-shadow: 0 0 15px var(--accent) !important;
130
  }
131
 
132
  .code-panel * {
 
134
  }
135
 
136
  .terminal-wrap {
137
+ height: 600px;
138
  overflow-y: auto;
139
  padding: 12px;
140
  font-family: 'JetBrains Mono', monospace;
141
+ font-size: 13px;
142
+ line-height: 1.6;
143
+ background: #050505;
144
+ border: 2px solid var(--accent);
145
  }
146
 
147
  .term-line {
 
149
  word-break: break-word;
150
  }
151
 
152
+ /* Cyberpunk Log Colors */
153
+ .c-start { color: #E60012; font-weight: bold; }
154
+ .c-end { color: #E60012; font-weight: bold; }
155
+ .c-step { color: #39ff14; font-weight: bold; }
156
+ .c-thought { color: #5b7a96; font-style: italic; }
157
+ .c-error { color: #E60012; }
158
+ .c-muted { color: var(--text-dim); }
159
 
160
  .metric {
161
  border: 1px solid var(--panel-border);
162
+ background: #000;
163
+ border-radius: 0px;
164
  padding: 12px;
165
+ border-left: 4px solid var(--accent);
166
  }
167
  """
168
 
 
169
  def _code_from_task_name(task_name: str) -> str:
170
  task = TASK_MAP.get((task_name or "").strip())
171
  if not task:
172
  return (
173
  "# Waiting for mission start...\n"
174
+ "# Tip: Select a target from the Mission Board\n"
175
  "# so the buggy sandbox code can be previewed before launch."
176
  )
177
  return "\n".join(task.get("code", []))
178
 
 
179
  def _normalize_base_url(base_url: str) -> str:
180
  candidate = (base_url or "").strip()
181
  if not candidate:
 
184
  candidate = f"http://{candidate}"
185
  return candidate.rstrip("/")
186
 
 
187
  def _code_from_openenv(task_name: str, env_base_url: str) -> str | None:
188
  normalized_url = _normalize_base_url(env_base_url)
189
  task_key = (task_name or "").strip()
 
223
  return task_code
224
  return None
225
 
 
226
  def load_code(task_name: str, env_base_url: str) -> str:
227
  local_code = _code_from_task_name(task_name)
228
  if "Waiting for mission start" not in local_code:
 
237
  "# Verify Task / Bug Selection and confirm OpenEnv API is reachable."
238
  )
239
 
 
240
  def _solution_from_task_name(task_name: str) -> str | None:
241
  task = TASK_MAP.get((task_name or "").strip())
242
  if not task:
243
  return None
244
  return "\n".join(task.get("solution", []))
245
 
 
246
  def _terminal_html(lines: list[tuple[str, str]]) -> str:
247
  rendered: list[str] = []
248
  for css_class, text in lines:
249
  safe = html.escape(text)
250
  rendered.append(f"<div class='term-line {css_class}'>{safe}</div>")
251
+ content = "\n".join(rendered) if rendered else "<div class='term-line c-muted'>Idle. Configure mission variables and press EXECUTE TRACEFIX.</div>"
252
  return (
253
  "<div id='terminal' class='terminal-wrap'>"
254
  f"{content}"
 
258
  "</script>"
259
  )
260
 
 
261
  def _metric_block(state: str, details: str) -> str:
262
  return (
263
  "<div class='metric'>"
 
266
  "</div>"
267
  )
268
 
269
+ def _update_hud_badge(task_name: str, difficulty: str) -> str:
270
+ if not task_name:
271
+ return "<div style='padding: 10px; color: var(--text-dim); border: 1px dashed var(--panel-border); text-align: center;'>WAITING FOR TARGET SELECTION...</div>"
272
+ color = "#39ff14" if difficulty == "Easy" else ("#f9d78b" if difficulty == "Medium" else "#E60012")
273
+ return f"""<div style='border: 2px solid {color}; padding: 12px; background: rgba(0,0,0,0.5); color: {color}; font-weight: 900; font-size: 16px; text-transform: uppercase; text-align: center; letter-spacing: 1.5px;'>
274
+ >> TARGET ACQUIRED: {html.escape(task_name)} | THREAT LEVEL: {difficulty} <<
275
+ </div>"""
276
 
277
  def _reader_thread(stream: Any, source: str, out_q: queue.Queue[tuple[str, str | None]]) -> None:
278
  try:
 
285
  pass
286
  out_q.put((source, None))
287
 
 
288
  def _build_env(
289
  hf_token: str,
290
  api_base_url: str,
 
316
  env.pop(key, None)
317
  return env
318
 
319
+ def get_active_task(easy, medium, hard):
320
+ return (easy or medium or hard or "").strip()
321
 
322
+ def _reset_run_state(easy, medium, hard):
323
+ task_name = get_active_task(easy, medium, hard)
324
  return (
325
  _code_from_task_name(task_name),
326
  _terminal_html([]),
327
  _metric_block("Mission Ready", "Awaiting [START] from inference subprocess..."),
328
  0.0,
329
+ "`Rewards:` pending"
330
  )
331
 
 
332
  def run_agent(
333
+ easy_radio: str,
334
+ medium_radio: str,
335
+ hard_radio: str,
336
  hf_token: str,
337
  api_base_url: str,
338
  model_name: str,
339
  env_base_url: str,
 
340
  benchmark: str,
341
  max_steps: int,
342
  success_score_threshold: float,
343
  local_image_name: str,
344
  difficulty: str,
345
  show_thought: bool,
346
+ ) -> Generator[tuple[str, str, str, float, str, dict], None, None]:
347
+
348
+ task_name = get_active_task(easy_radio, medium_radio, hard_radio)
349
  code_view = _code_from_task_name(task_name)
350
  terminal_lines: list[tuple[str, str]] = []
351
+ terminal_lines.append(("c-muted", "Boot sequence initialized... infiltrating target."))
352
 
353
+ status_html = _metric_block("Mission Infiltration", "Launching inference subprocess...")
354
  score_value = 0.0
355
  rewards_md = "`Rewards:` pending"
356
+ yield code_view, _terminal_html(terminal_lines), status_html, score_value, rewards_md, gr.update(value="INFILTRATING...", interactive=False)
357
 
358
  cmd = [sys.executable, str(INFERENCE_PATH)]
359
  if difficulty in {"easy", "medium", "hard"}:
 
391
 
392
  ended_streams: set[str] = set()
393
  thought_mode = False
394
+ active_task_name = task_name
395
  final_steps = 0
396
 
397
  while True:
 
411
  if source == "stderr":
412
  if line.strip() == "[THOUGHT]":
413
  thought_mode = True
414
+ if show_thought:
415
+ terminal_lines.append(("c-thought", "[THOUGHT]"))
416
  elif line.startswith("[") and line.endswith("]"):
417
  thought_mode = False
418
+ terminal_lines.append(("c-muted", line))
419
  elif thought_mode:
420
+ if show_thought:
421
+ terminal_lines.append(("c-thought", line))
422
  else:
423
+ if not show_thought:
424
+ # Strict gatekeeper rules over stderr leakage too
425
+ if not any(tag in line for tag in ["[START]", "[STEP]", "[END]"]):
426
+ continue
427
+ terminal_lines.append(("c-error", line))
428
  else:
429
+ if not show_thought:
430
+ if not any(tag in line for tag in ["[START]", "[STEP]", "[END]"]):
431
+ continue # Strict Gatekeeper skipping log
432
+
433
  start_match = START_RE.match(line)
434
  step_match = STEP_RE.match(line)
435
  end_match = END_RE.match(line)
 
439
  task_preview = _code_from_task_name(active_task_name)
440
  if "Waiting for mission start" not in task_preview:
441
  code_view = task_preview
442
+ terminal_lines.append(("c-start", line))
443
  status_html = _metric_block(
444
  "Mission Running",
445
  f"task={active_task_name} | env={start_match.group('env')} | model={start_match.group('model')}",
 
450
  reward = float(step_match.group("reward"))
451
  done_flag = step_match.group("done") == "true"
452
  err = step_match.group("error")
453
+ css = "c-step" if err == "null" else "c-error"
454
  terminal_lines.append((css, line))
455
  status_html = _metric_block(
456
  "Mission Running",
 
462
  score_value = float(end_match.group("score"))
463
  rewards_raw = end_match.group("rewards").strip()
464
  rewards_md = f"`Rewards:` {rewards_raw or 'none'}"
465
+ terminal_lines.append(("c-end", line))
466
  if success:
467
  solved = _solution_from_task_name(active_task_name)
468
  if solved:
 
477
  f"score={score_value:.2f} | steps={final_steps}",
478
  )
479
  else:
480
+ terminal_lines.append(("c-muted", line))
481
 
482
  if len(terminal_lines) > 500:
483
  terminal_lines = terminal_lines[-500:]
484
 
485
+ yield code_view, _terminal_html(terminal_lines), status_html, score_value, rewards_md, gr.update(value="INFILTRATING...", interactive=False)
486
 
487
  return_code = process.wait(timeout=2)
488
  if return_code != 0:
489
+ terminal_lines.append(("c-error", f"Process exited with code {return_code}."))
490
  status_html = _metric_block(
491
  "Mission Error",
492
  f"inference.py exited non-zero (code={return_code})",
 
495
  if len(terminal_lines) > 500:
496
  terminal_lines = terminal_lines[-500:]
497
 
498
+ yield code_view, _terminal_html(terminal_lines), status_html, score_value, rewards_md, gr.update(value="EXECUTE TRACEFIX", interactive=True)
499
 
500
 
501
+ with gr.Blocks(title="TraceFix-RL Mission Control") as demo:
502
  gr.HTML(
503
+ f"""
504
+ <style>{CSS}</style>
505
  <div id='header-wrap'>
506
+ <h1>TraceFix-RL /// PHANTOM PROTOCOL</h1>
507
+ <p>Real-time autonomous agent infiltration orchestration.</p>
508
  </div>
509
  """
510
  )
 
515
  sidebar_context = gr.Column()
516
 
517
  with sidebar_context:
518
+ # Zone 1: The Config Sidebar
519
+ gr.Markdown("### CORE AUTHENTICATION")
520
  hf_token = gr.Textbox(label="HF Token", type="password", placeholder="hf_xxx")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
521
 
522
+ with gr.Accordion("Advanced Engine Parameters", open=False):
523
+ model_name = gr.Textbox(label="Model Name", value=os.getenv("MODEL_NAME", "openai/gpt-oss-20b"))
524
+ api_base_url = gr.Textbox(label="API Base URL", value=os.getenv("API_BASE_URL", "https://router.huggingface.co/v1"))
525
+ env_base_url = gr.Textbox(label="Env Base URL", value=os.getenv("ENV_BASE_URL", f"http://{BACKEND_HOST}:{BACKEND_PORT}"))
526
+ benchmark = gr.Textbox(label="Benchmark", value=os.getenv("BENCHMARK", "tracefix_rl"))
527
+ local_image_name = gr.Textbox(label="Local Image Name", value=os.getenv("LOCAL_IMAGE_NAME", ""), placeholder="optional")
528
+ max_steps = gr.Number(label="Max Steps", value=int(os.getenv("MAX_STEPS", "50")), precision=0)
529
+ success_score_threshold = gr.Number(
530
+ label="Success Score Threshold",
531
+ value=float(os.getenv("SUCCESS_SCORE_THRESHOLD", "0.99")),
532
+ precision=2,
533
+ )
534
+ difficulty = gr.Dropdown(label="Difficulty", choices=["auto", "easy", "medium", "hard"], value="auto")
535
+ show_thought = gr.Checkbox(label="Stream Thought Trace", value=True)
536
+
537
+ # Zone 2: The Mission Board
538
+ gr.HTML("<div class='panel-title' style='margin-top: 10px;'>MISSION BOARD /// TARGET SELECTION</div>")
539
+ with gr.Row(elem_classes=["panel"]):
540
+ easy_radio = gr.Radio(choices=EASY_CHOICES, label="Easy Targets", elem_id="easy-radio")
541
+ medium_radio = gr.Radio(choices=MEDIUM_CHOICES, label="Medium Targets", elem_id="medium-radio")
542
+ hard_radio = gr.Radio(choices=HARD_CHOICES, label="Hard Targets", elem_id="hard-radio")
543
+
544
+ # Zone 3: The HUD
545
+ hud_badge = gr.HTML(_update_hud_badge("", ""))
546
+ run_button = gr.Button("EXECUTE TRACEFIX", elem_id="execute-btn", variant="primary")
547
+
548
+ # Radio change handlers for mutual exclusivity logic & HUD updates
549
+ def select_easy(val):
550
+ if not val:
551
+ return gr.skip(), gr.skip(), gr.skip(), gr.skip()
552
+ return None, None, _update_hud_badge(val, "Easy"), _code_from_task_name(val)
553
+
554
+ def select_medium(val):
555
+ if not val:
556
+ return gr.skip(), gr.skip(), gr.skip(), gr.skip()
557
+ return None, None, _update_hud_badge(val, "Medium"), _code_from_task_name(val)
558
+
559
+ def select_hard(val):
560
+ if not val:
561
+ return gr.skip(), gr.skip(), gr.skip(), gr.skip()
562
+ return None, None, _update_hud_badge(val, "Hard"), _code_from_task_name(val)
563
+
564
+ # Zone 4: The Arena
565
  with gr.Row(equal_height=True):
566
  with gr.Column(scale=1, elem_classes=["panel", "code-panel"]):
567
+ gr.HTML("<div class='panel-title'>SANDBOX CODE</div>")
568
  code_view = gr.Code(
569
  language="python",
570
  interactive=False,
571
+ value=_code_from_task_name(""),
572
  lines=30,
573
  )
574
 
575
  with gr.Column(scale=1, elem_classes=["panel"]):
576
+ gr.HTML("<div class='panel-title'>TERMINAL TRACE</div>")
577
  terminal = gr.HTML(_terminal_html([]))
578
 
579
  with gr.Row():
580
+ metric = gr.HTML(_metric_block("Idle", "Awaiting target selection."))
581
  score = gr.Number(label="Final Score", value=0.0, precision=3)
582
  rewards = gr.Markdown("`Rewards:` pending")
583
 
584
+ easy_radio.change(select_easy, inputs=[easy_radio], outputs=[medium_radio, hard_radio, hud_badge, code_view])
585
+ medium_radio.change(select_medium, inputs=[medium_radio], outputs=[easy_radio, hard_radio, hud_badge, code_view])
586
+ hard_radio.change(select_hard, inputs=[hard_radio], outputs=[easy_radio, medium_radio, hud_badge, code_view])
587
+
588
+ # Run Sequence
589
+ # First disable button to show immediate feedback
590
+ run_immediate = run_button.click(
591
+ lambda: gr.update(value="INFILTRATING...", interactive=False),
592
+ inputs=[],
593
+ outputs=[run_button],
594
+ queue=False
595
+ )
596
+
597
+ # Then reset state
598
+ run_event = run_immediate.then(
599
  _reset_run_state,
600
+ inputs=[easy_radio, medium_radio, hard_radio],
601
  outputs=[code_view, terminal, metric, score, rewards],
602
  queue=False,
603
  )
604
 
605
+ # Finally run generator (loads environment, streams stdout, then re-enables button upon END)
606
  run_event.then(
607
  run_agent,
608
  inputs=[
609
+ easy_radio,
610
+ medium_radio,
611
+ hard_radio,
612
  hf_token,
613
  api_base_url,
614
  model_name,
615
  env_base_url,
 
616
  benchmark,
617
  max_steps,
618
  success_score_threshold,
 
620
  difficulty,
621
  show_thought,
622
  ],
623
+ outputs=[code_view, terminal, metric, score, rewards, run_button],
624
  )