AdithyaSK HF Staff commited on
Commit
6c15447
·
verified ·
1 Parent(s): 5cc6087

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. server/gradio_ui.py +128 -31
  2. server/opencode_environment.py +42 -2
server/gradio_ui.py CHANGED
@@ -194,6 +194,29 @@ def _logprobs_md(turns: list[dict[str, Any]]) -> str:
194
  return "\n\n".join(lines)
195
 
196
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
  def _files_md(files: dict[str, str]) -> str:
198
  if not files:
199
  return "_No files in the workdir._"
@@ -256,16 +279,28 @@ def opencode_gradio_builder(
256
  max_tokens_cap: int,
257
  top_logprobs: int,
258
  agent_timeout_s: float,
259
- progress: gr.Progress = gr.Progress(),
260
  ):
261
- progress(0.05, desc="resolving endpoint…")
 
 
 
 
 
 
 
 
 
 
 
 
262
  try:
263
  resolved = resolve_endpoint(
264
  endpoint, base_url=base_url, api_key=api_key, model=model
265
  )
266
  except ValueError as exc:
267
  err = f"endpoint resolution failed: {exc}"
268
- return (err, [], [], "", "", "", {"error": err})
 
269
 
270
  # Translate "auto" / "on" / "off" into bool / None.
271
  if disable_thinking == "on":
@@ -273,45 +308,107 @@ def opencode_gradio_builder(
273
  elif disable_thinking == "off":
274
  dt = False
275
  else:
276
- dt = None # let the catalog default win
277
 
278
- progress(0.10, desc=f"{resolved.kind}: {resolved.model}")
279
  env = OpenCodeEnvironment()
280
 
281
- progress(0.15, desc="creating sandbox + running agent…")
282
- try:
283
- payload = env._run_rollout_impl(
284
- base_url=resolved.base_url,
285
- api_key=resolved.api_key,
286
- model=resolved.model,
287
- instruction=instruction,
288
- setup=_split_commands(setup_text),
289
- verify=_split_commands(verify_text),
290
- task_id="ui",
291
- mode=mode,
292
- disable_thinking=(
293
- dt if dt is not None else resolved.disable_thinking_default
294
- ),
295
- max_tokens_cap=int(max_tokens_cap),
296
- top_logprobs=int(top_logprobs),
297
- agent_timeout_s=float(agent_timeout_s),
298
- template=template,
299
- )
300
- except Exception as exc: # noqa: BLE001
301
- err = f"{type(exc).__name__}: {exc}"
302
- return (err, [], [], "", "", "", {"error": err})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
 
304
- progress(0.95, desc="rendering result…")
305
- result = json.loads(payload)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
 
307
- return (
 
308
  _summary_md(result),
309
  _command_rows(result.get("setup_results") or []),
310
  _command_rows(result.get("verify_results") or []),
311
  _files_md(result.get("files") or {}),
312
  _logprobs_md(result.get("proxy_turns") or []),
313
  (
314
- f"### agent log (tail)\n```\n{result.get('agent_log_tail', '')[:4000]}\n```\n\n"
 
 
 
315
  f"### proxy log (tail)\n```\n{result.get('proxy_log_tail', '')[:4000]}\n```"
316
  ),
317
  result,
 
194
  return "\n\n".join(lines)
195
 
196
 
197
+ def _live_status_md(
198
+ endpoint_kind: str,
199
+ model: str,
200
+ mode: str,
201
+ elapsed_s: float,
202
+ lines: list[tuple[float, str]],
203
+ ) -> str:
204
+ """Render a live phase log (latest at the bottom) with elapsed timestamps."""
205
+ head = (
206
+ f"### running… `elapsed={elapsed_s:.1f}s`\n\n"
207
+ f"_endpoint=`{endpoint_kind}` model=`{model}` mode=`{mode}`_\n\n"
208
+ )
209
+ if not lines:
210
+ body = "_(waiting for first phase update…)_"
211
+ else:
212
+ # Show the most recent ~12 lines so the panel doesn't grow unbounded.
213
+ rows = ["| t (s) | phase |", "|---|---|"]
214
+ for ts, msg in lines[-12:]:
215
+ rows.append(f"| `{ts:>6.1f}` | {msg.replace(chr(10), ' ')[:200]} |")
216
+ body = "\n".join(rows)
217
+ return head + body
218
+
219
+
220
  def _files_md(files: dict[str, str]) -> str:
221
  if not files:
222
  return "_No files in the workdir._"
 
279
  max_tokens_cap: int,
280
  top_logprobs: int,
281
  agent_timeout_s: float,
 
282
  ):
283
+ """Generator handler — yields incremental UI updates.
284
+
285
+ Each ``yield`` is a tuple matching ``outputs=[...]``:
286
+ (summary_md, setup_table, verify_table, files_md, logprobs_md,
287
+ logs_md, raw_json). Early yields keep summary_md as a live phase
288
+ log while the rollout runs; the final yield populates everything.
289
+ """
290
+ import queue
291
+ import threading
292
+ import time
293
+
294
+ # Resolve endpoint up front — if this fails, we can return one
295
+ # immediate result with no streaming needed.
296
  try:
297
  resolved = resolve_endpoint(
298
  endpoint, base_url=base_url, api_key=api_key, model=model
299
  )
300
  except ValueError as exc:
301
  err = f"endpoint resolution failed: {exc}"
302
+ yield (f"### error\n\n```\n{err}\n```", [], [], "", "", "", {"error": err})
303
+ return
304
 
305
  # Translate "auto" / "on" / "off" into bool / None.
306
  if disable_thinking == "on":
 
308
  elif disable_thinking == "off":
309
  dt = False
310
  else:
311
+ dt = None
312
 
 
313
  env = OpenCodeEnvironment()
314
 
315
+ # The worker fires _run_rollout_impl in a background thread and
316
+ # streams progress messages into a queue; this generator polls the
317
+ # queue every 0.5s and yields a refreshed status_md to the UI.
318
+ status_q: queue.Queue = queue.Queue()
319
+ result_holder: dict = {}
320
+
321
+ def _cb(msg: str) -> None:
322
+ status_q.put(("status", msg, time.time()))
323
+
324
+ def _worker():
325
+ try:
326
+ payload = env._run_rollout_impl(
327
+ base_url=resolved.base_url,
328
+ api_key=resolved.api_key,
329
+ model=resolved.model,
330
+ instruction=instruction,
331
+ setup=_split_commands(setup_text),
332
+ verify=_split_commands(verify_text),
333
+ task_id="ui",
334
+ mode=mode,
335
+ disable_thinking=(
336
+ dt if dt is not None else resolved.disable_thinking_default
337
+ ),
338
+ max_tokens_cap=int(max_tokens_cap),
339
+ top_logprobs=int(top_logprobs),
340
+ agent_timeout_s=float(agent_timeout_s),
341
+ template=template,
342
+ progress_cb=_cb,
343
+ )
344
+ result_holder["payload"] = payload
345
+ except Exception as exc: # noqa: BLE001
346
+ result_holder["error"] = f"{type(exc).__name__}: {exc}"
347
+ status_q.put(("error", result_holder["error"], time.time()))
348
+ finally:
349
+ status_q.put(("done", None, time.time()))
350
+
351
+ worker = threading.Thread(target=_worker, daemon=True)
352
+ t_start = time.time()
353
+ worker.start()
354
+
355
+ # First yield: announce we've started. Empty result panels.
356
+ yield (
357
+ f"### running…\n\n_endpoint=`{resolved.kind}` model=`{resolved.model}` mode=`{mode}`_",
358
+ [], [], "", "", "", {},
359
+ )
360
 
361
+ status_lines: list[tuple[float, str]] = []
362
+ finished = False
363
+ while not finished:
364
+ try:
365
+ kind, msg, ts = status_q.get(timeout=0.5)
366
+ if kind == "status":
367
+ status_lines.append((ts - t_start, msg))
368
+ elif kind == "error":
369
+ status_lines.append((ts - t_start, f"ERROR: {msg}"))
370
+ elif kind == "done":
371
+ finished = True
372
+ except queue.Empty:
373
+ pass
374
+
375
+ # Render the live status pane.
376
+ elapsed = time.time() - t_start
377
+ md = _live_status_md(resolved.kind, resolved.model, mode, elapsed, status_lines)
378
+ yield (md, [], [], "", "", "", {})
379
+
380
+ # Drain any final messages still in the queue.
381
+ while not status_q.empty():
382
+ try:
383
+ kind, msg, ts = status_q.get_nowait()
384
+ if kind == "status":
385
+ status_lines.append((ts - t_start, msg))
386
+ except queue.Empty:
387
+ break
388
+
389
+ if "payload" not in result_holder:
390
+ err = result_holder.get("error", "unknown error")
391
+ yield (
392
+ f"### error\n\n```\n{err}\n```",
393
+ [], [], "", "",
394
+ _live_status_md(resolved.kind, resolved.model, mode,
395
+ time.time() - t_start, status_lines),
396
+ {"error": err},
397
+ )
398
+ return
399
 
400
+ result = json.loads(result_holder["payload"])
401
+ yield (
402
  _summary_md(result),
403
  _command_rows(result.get("setup_results") or []),
404
  _command_rows(result.get("verify_results") or []),
405
  _files_md(result.get("files") or {}),
406
  _logprobs_md(result.get("proxy_turns") or []),
407
  (
408
+ f"### live phase log\n\n"
409
+ + _live_status_md(resolved.kind, resolved.model, mode,
410
+ time.time() - t_start, status_lines)
411
+ + f"\n\n### agent log (tail)\n```\n{result.get('agent_log_tail', '')[:4000]}\n```\n\n"
412
  f"### proxy log (tail)\n```\n{result.get('proxy_log_tail', '')[:4000]}\n```"
413
  ),
414
  result,
server/opencode_environment.py CHANGED
@@ -254,7 +254,18 @@ class OpenCodeEnvironment(MCPEnvironment):
254
  top_logprobs: int,
255
  agent_timeout_s: float,
256
  template: str,
 
257
  ) -> str:
 
 
 
 
 
 
 
 
 
 
258
  result = self._RolloutResult(task_id=task_id, mode=mode)
259
  t0 = time.time()
260
 
@@ -267,8 +278,11 @@ class OpenCodeEnvironment(MCPEnvironment):
267
  "run_rollout."
268
  )
269
  result.wall_s = round(time.time() - t0, 3)
 
270
  return result.model_dump_json()
271
 
 
 
272
  # Build OpenCodeConfig + factory. We keep the proxy in charge of
273
  # ``model_override`` / ``logprobs`` / ``max_tokens``-cap injection.
274
  config = self._OpenCodeConfig(
@@ -307,8 +321,16 @@ class OpenCodeEnvironment(MCPEnvironment):
307
 
308
  session = None
309
  try:
 
 
 
 
310
  session = factory.create(task=opencode_task)
311
  result.sandbox_id = session.sandbox.sandbox_id
 
 
 
 
312
 
313
  # Run setup commands one at a time, *before* the agent starts.
314
  # The factory has already started the agent in start_agent()
@@ -318,27 +340,36 @@ class OpenCodeEnvironment(MCPEnvironment):
318
  # for ~1-2s but is fine for typical pip/git/download work
319
  # because opencode itself takes >=20s to make its first model
320
  # call.
321
- for cmd in setup:
 
322
  cr = self._exec_command(session.sandbox, cmd)
323
  result.setup_results.append(cr)
324
  if cr.exit_code != 0:
325
  result.error = (
326
  f"setup command failed (exit {cr.exit_code}): {cmd[:120]}"
327
  )
 
328
  break
329
 
330
  # Block until the agent is done (or setup already failed).
331
  if result.error is None:
 
 
 
 
332
  try:
333
  result.agent_exit_code = session.wait_for_completion(
334
  timeout_s=agent_timeout_s
335
  )
 
336
  except TimeoutError as exc:
337
  result.error = f"agent timeout: {exc}"
 
338
 
339
  # Run verify commands one at a time, capture each.
340
  verify_passed = 0
341
- for cmd in verify:
 
342
  cr = self._exec_command(session.sandbox, cmd)
343
  result.verify_results.append(cr)
344
  if cr.exit_code == 0:
@@ -354,23 +385,32 @@ class OpenCodeEnvironment(MCPEnvironment):
354
  result.reward = None
355
 
356
  # Collect filesystem + proxy trace.
 
357
  result.files, result.files_extra = self._collect_files(session.sandbox)
358
  result.proxy_turns = self._collect_proxy_turns(session)
359
  result.proxy_log_tail = self._safe_read(session.sandbox, PROXY_LOG)[-2000:]
360
  result.agent_log_tail = self._safe_read(session.sandbox, AGENT_LOG)[-2000:]
 
 
 
 
 
361
  except Exception as exc: # noqa: BLE001
362
  result.error = f"{type(exc).__name__}: {exc}"
 
363
  if session is not None:
364
  result.proxy_log_tail = self._safe_read(session.sandbox, PROXY_LOG)[-2000:]
365
  result.agent_log_tail = self._safe_read(session.sandbox, AGENT_LOG)[-2000:]
366
  finally:
367
  if session is not None:
368
  try:
 
369
  session.close()
370
  except Exception:
371
  pass
372
 
373
  result.wall_s = round(time.time() - t0, 3)
 
374
 
375
  # Bookkeeping on the per-session state.
376
  self._state.rollouts_completed += 1
 
254
  top_logprobs: int,
255
  agent_timeout_s: float,
256
  template: str,
257
+ progress_cb=None,
258
  ) -> str:
259
+ # Optional progress callback: receives short status strings at each
260
+ # phase boundary so the Gradio UI can stream live updates. Safe to
261
+ # be None (silently no-op).
262
+ def _emit(msg: str) -> None:
263
+ if progress_cb is not None:
264
+ try:
265
+ progress_cb(msg)
266
+ except Exception:
267
+ pass
268
+
269
  result = self._RolloutResult(task_id=task_id, mode=mode)
270
  t0 = time.time()
271
 
 
278
  "run_rollout."
279
  )
280
  result.wall_s = round(time.time() - t0, 3)
281
+ _emit("error: E2B_API_KEY missing on server")
282
  return result.model_dump_json()
283
 
284
+ _emit(f"resolving config (model={model}, mode={mode})")
285
+
286
  # Build OpenCodeConfig + factory. We keep the proxy in charge of
287
  # ``model_override`` / ``logprobs`` / ``max_tokens``-cap injection.
288
  config = self._OpenCodeConfig(
 
321
 
322
  session = None
323
  try:
324
+ _emit(
325
+ f"creating E2B sandbox (template={template or 'default'}) — "
326
+ "this is the slow phase (~5–60s cold, ~5s with template)"
327
+ )
328
  session = factory.create(task=opencode_task)
329
  result.sandbox_id = session.sandbox.sandbox_id
330
+ _emit(
331
+ f"sandbox ready: {result.sandbox_id} — agent started "
332
+ f"({'proxy on :7000, logprobs capturing' if mode == 'transparent_proxy' else 'direct LLM, no logprobs'})"
333
+ )
334
 
335
  # Run setup commands one at a time, *before* the agent starts.
336
  # The factory has already started the agent in start_agent()
 
340
  # for ~1-2s but is fine for typical pip/git/download work
341
  # because opencode itself takes >=20s to make its first model
342
  # call.
343
+ for i, cmd in enumerate(setup, 1):
344
+ _emit(f"setup [{i}/{len(setup)}]: {cmd[:80]}")
345
  cr = self._exec_command(session.sandbox, cmd)
346
  result.setup_results.append(cr)
347
  if cr.exit_code != 0:
348
  result.error = (
349
  f"setup command failed (exit {cr.exit_code}): {cmd[:120]}"
350
  )
351
+ _emit(f"setup FAILED at [{i}]: exit={cr.exit_code}")
352
  break
353
 
354
  # Block until the agent is done (or setup already failed).
355
  if result.error is None:
356
+ _emit(
357
+ f"agent running — opencode CLI in sandbox "
358
+ f"(timeout {int(agent_timeout_s)}s)"
359
+ )
360
  try:
361
  result.agent_exit_code = session.wait_for_completion(
362
  timeout_s=agent_timeout_s
363
  )
364
+ _emit(f"agent finished: exit_code={result.agent_exit_code}")
365
  except TimeoutError as exc:
366
  result.error = f"agent timeout: {exc}"
367
+ _emit(f"agent TIMEOUT: {exc}")
368
 
369
  # Run verify commands one at a time, capture each.
370
  verify_passed = 0
371
+ for i, cmd in enumerate(verify, 1):
372
+ _emit(f"verify [{i}/{len(verify)}]: {cmd[:80]}")
373
  cr = self._exec_command(session.sandbox, cmd)
374
  result.verify_results.append(cr)
375
  if cr.exit_code == 0:
 
385
  result.reward = None
386
 
387
  # Collect filesystem + proxy trace.
388
+ _emit("collecting workdir files + proxy trace + logs")
389
  result.files, result.files_extra = self._collect_files(session.sandbox)
390
  result.proxy_turns = self._collect_proxy_turns(session)
391
  result.proxy_log_tail = self._safe_read(session.sandbox, PROXY_LOG)[-2000:]
392
  result.agent_log_tail = self._safe_read(session.sandbox, AGENT_LOG)[-2000:]
393
+ _emit(
394
+ f"collected: {len(result.files)} file(s), "
395
+ f"{len(result.proxy_turns)} proxy turn(s), "
396
+ f"reward={'%.2f' % result.reward if result.reward is not None else 'n/a'}"
397
+ )
398
  except Exception as exc: # noqa: BLE001
399
  result.error = f"{type(exc).__name__}: {exc}"
400
+ _emit(f"ERROR: {result.error}")
401
  if session is not None:
402
  result.proxy_log_tail = self._safe_read(session.sandbox, PROXY_LOG)[-2000:]
403
  result.agent_log_tail = self._safe_read(session.sandbox, AGENT_LOG)[-2000:]
404
  finally:
405
  if session is not None:
406
  try:
407
+ _emit("tearing down sandbox")
408
  session.close()
409
  except Exception:
410
  pass
411
 
412
  result.wall_s = round(time.time() - t0, 3)
413
+ _emit(f"done in {result.wall_s:.1f}s")
414
 
415
  # Bookkeeping on the per-session state.
416
  self._state.rollouts_completed += 1