HongzeFu commited on
Commit
4ccc0e4
·
1 Parent(s): 8e0fa10
gradio-web/config.py CHANGED
@@ -2,10 +2,8 @@
2
  配置常量模块
3
  """
4
  # --- Configuration ---
5
- VIDEO_PLAYBACK_FPS = 20.0 # Frame rate for demonstration video playback
6
  USE_SEGMENTED_VIEW = False # Set to True to use segmented view, False to use original image
7
- LIVE_OBS_REFRESH_HZ = 30.0 # Live observation refresh frequency in Hz
8
- KEYFRAME_DOWNSAMPLE_FACTOR = 1 # Keep 1 frame out of every N streamed frames
9
 
10
  # 主界面两列宽度比例 (Point Selection : Right Panel)
11
  POINT_SELECTION_SCALE = 1
 
2
  配置常量模块
3
  """
4
  # --- Configuration ---
5
+ VIDEO_PLAYBACK_FPS = 20.0 # Frame rate for demonstration and execute video playback
6
  USE_SEGMENTED_VIEW = False # Set to True to use segmented view, False to use original image
 
 
7
 
8
  # 主界面两列宽度比例 (Point Selection : Right Panel)
9
  POINT_SELECTION_SCALE = 1
gradio-web/gradio_callbacks.py CHANGED
@@ -4,15 +4,14 @@ Gradio回调函数模块
4
  """
5
  import logging
6
  import os
7
- import queue
8
  import re
9
  import threading
10
  import time
11
  from datetime import datetime
 
12
 
13
  import gradio as gr
14
  import numpy as np
15
- from PIL import Image
16
 
17
  from state_manager import (
18
  cleanup_session,
@@ -31,8 +30,6 @@ from image_utils import draw_marker, save_video, concatenate_frames_horizontally
31
  from user_manager import user_manager
32
  from config import (
33
  EXECUTE_LIMIT_OFFSET,
34
- KEYFRAME_DOWNSAMPLE_FACTOR,
35
- LIVE_OBS_REFRESH_HZ,
36
  UI_TEXT,
37
  USE_SEGMENTED_VIEW,
38
  get_live_obs_elem_classes,
@@ -43,10 +40,9 @@ from process_session import ScrewPlanFailureError
43
  from note_content import get_task_hint
44
 
45
 
46
- # --- live_obs refresh queue state ---
47
- # Each uid keeps its own FIFO queue and sampling cursor.
48
- _LIVE_OBS_REFRESH = {}
49
- _LIVE_OBS_REFRESH_LOCK = threading.Lock()
50
  LOGGER = logging.getLogger("robomme.callbacks")
51
 
52
 
@@ -86,8 +82,7 @@ def cleanup_user_session(uid):
86
  """Unified cleanup entry for gr.State TTL deletion and unload hooks."""
87
  if not uid:
88
  return
89
- with _LIVE_OBS_REFRESH_LOCK:
90
- _LIVE_OBS_REFRESH.pop(uid, None)
91
  cleanup_session(uid)
92
 
93
 
@@ -134,15 +129,6 @@ def _option_requires_coords(session, option_value) -> bool:
134
  return bool(raw_solve_options[option_idx].get("available"))
135
 
136
 
137
- def _should_enqueue_sample(sample_index: int) -> bool:
138
- factor = max(1, int(KEYFRAME_DOWNSAMPLE_FACTOR))
139
- return sample_index % factor == 0
140
-
141
-
142
- def _live_obs_refresh_interval_sec() -> float:
143
- return 1.0 / max(float(LIVE_OBS_REFRESH_HZ), 1.0)
144
-
145
-
146
  def _uid_for_log(uid):
147
  if not uid:
148
  return "<none>"
@@ -150,6 +136,109 @@ def _uid_for_log(uid):
150
  return text if len(text) <= 12 else f"{text[:8]}..."
151
 
152
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  def capitalize_first_letter(text: str) -> str:
154
  """确保字符串的第一个字母大写,其余字符保持不变"""
155
  if not text:
@@ -284,21 +373,9 @@ def on_demo_video_play(uid):
284
 
285
 
286
  def switch_to_execute_phase(uid):
287
- """Disable controls and point clicking during execute playback."""
288
  if uid:
289
- session = get_session(uid)
290
- base_count = len(getattr(session, "base_frames", []) or []) if session else 0
291
- LOGGER.debug(
292
- "switch_to_execute_phase uid=%s base_frames=%s",
293
- _uid_for_log(uid),
294
- base_count,
295
- )
296
- with _LIVE_OBS_REFRESH_LOCK:
297
- _LIVE_OBS_REFRESH[uid] = {
298
- "frame_queue": queue.Queue(),
299
- "last_base_count": base_count,
300
- "sample_index": 0,
301
- }
302
  return (
303
  gr.update(interactive=False), # options_radio
304
  gr.update(interactive=False), # exec_btn
@@ -313,8 +390,6 @@ def switch_to_action_phase(uid=None):
313
  """Switch display to action phase and restore control panel interactions."""
314
  if uid:
315
  LOGGER.debug("switch_to_action_phase uid=%s", _uid_for_log(uid))
316
- with _LIVE_OBS_REFRESH_LOCK:
317
- _LIVE_OBS_REFRESH.pop(uid, None)
318
  return (
319
  gr.update(interactive=True), # options_radio
320
  gr.update(), # exec_btn (keep execute_step result)
@@ -325,152 +400,29 @@ def switch_to_action_phase(uid=None):
325
  )
326
 
327
 
328
- def _get_live_obs_refresh_state(uid, base_count=0):
329
- with _LIVE_OBS_REFRESH_LOCK:
330
- if uid not in _LIVE_OBS_REFRESH:
331
- _LIVE_OBS_REFRESH[uid] = {
332
- "frame_queue": queue.Queue(),
333
- "last_base_count": int(base_count),
334
- "sample_index": 0,
335
- }
336
- return _LIVE_OBS_REFRESH[uid]
337
-
338
-
339
- def _enqueue_live_obs_frames(uid, base_frames):
340
- """
341
- Push newly appended base_frames into per-uid FIFO queue with configurable downsampling.
342
- """
343
- if not uid:
344
- return 0
345
- frames = base_frames or []
346
- state = _get_live_obs_refresh_state(uid, base_count=len(frames))
347
- frame_queue = state["frame_queue"]
348
- current_count = len(frames)
349
- last_count = int(state.get("last_base_count", 0))
350
-
351
- # Session/task reset: history shrank.
352
- if current_count < last_count:
353
- with _LIVE_OBS_REFRESH_LOCK:
354
- state["frame_queue"] = queue.Queue()
355
- state["last_base_count"] = current_count
356
- state["sample_index"] = 0
357
- return 0
358
-
359
- if current_count <= last_count:
360
- return frame_queue.qsize()
361
-
362
- new_frames = frames[last_count:current_count]
363
- sample_index = int(state.get("sample_index", 0))
364
- for frame in new_frames:
365
- if _should_enqueue_sample(sample_index) and frame is not None:
366
- frame_queue.put(frame)
367
- sample_index += 1
368
-
369
- with _LIVE_OBS_REFRESH_LOCK:
370
- state["last_base_count"] = current_count
371
- state["sample_index"] = sample_index
372
- return frame_queue.qsize()
373
-
374
-
375
- def _wait_for_live_obs_queue_drain(uid, max_wait_sec=None, empty_grace_sec=0.2, poll_sec=0.05):
376
- """
377
- Wait for timer-driven live_obs refresh to consume queued frames before phase switch.
378
- """
379
- if not uid:
380
- return
381
- with _LIVE_OBS_REFRESH_LOCK:
382
- state0 = _LIVE_OBS_REFRESH.get(uid)
383
- queue0 = state0.get("frame_queue") if state0 else None
384
- initial_qsize = int(queue0.qsize()) if queue0 is not None else 0
385
- if max_wait_sec is None:
386
- # Timer-driven playback + small buffer, capped to keep UI responsive.
387
- max_wait_sec = min(30.0, max(2.0, initial_qsize * (_live_obs_refresh_interval_sec() + 0.02) + 1.0))
388
-
389
- start = time.time()
390
- empty_since = None
391
- while True:
392
- if (time.time() - start) >= max_wait_sec:
393
- break
394
- with _LIVE_OBS_REFRESH_LOCK:
395
- state = _LIVE_OBS_REFRESH.get(uid)
396
- frame_queue = state.get("frame_queue") if state else None
397
- if frame_queue is None:
398
- break
399
- if frame_queue.qsize() > 0:
400
- empty_since = None
401
- else:
402
- if empty_since is None:
403
- empty_since = time.time()
404
- elif (time.time() - empty_since) >= empty_grace_sec:
405
- break
406
- time.sleep(poll_sec)
407
-
408
-
409
- def _prepare_refresh_frame(frame):
410
- """Normalize cached frame to an RGB uint8 PIL image for gr.Image."""
411
- if frame is None:
412
- return None
413
- frame_arr = np.asarray(frame)
414
- if frame_arr.dtype != np.uint8:
415
- max_val = float(np.max(frame_arr)) if frame_arr.size else 0.0
416
- if max_val <= 1.0:
417
- frame_arr = (frame_arr * 255.0).clip(0, 255).astype(np.uint8)
418
- else:
419
- frame_arr = frame_arr.clip(0, 255).astype(np.uint8)
420
- if frame_arr.ndim == 2:
421
- frame_arr = np.stack([frame_arr] * 3, axis=-1)
422
- elif frame_arr.ndim == 3 and frame_arr.shape[2] == 4:
423
- frame_arr = frame_arr[:, :, :3]
424
- return Image.fromarray(frame_arr)
425
-
426
-
427
- def refresh_live_obs(uid, ui_phase):
428
- """
429
- Poll latest cached frame during execute phase.
430
- Updates live_obs using the configured gr.Timer interval.
431
- """
432
- if ui_phase != "execution_playback":
433
- return gr.update()
434
- session = get_session(uid)
435
- if not session:
436
- return gr.update()
437
-
438
- base_frames = getattr(session, "base_frames", None) or []
439
- if not base_frames:
440
- return gr.update()
441
-
442
- _enqueue_live_obs_frames(uid, base_frames)
443
- state = _get_live_obs_refresh_state(uid, base_count=len(base_frames))
444
- frame_queue = state["frame_queue"]
445
-
446
- if frame_queue.empty():
447
- return gr.update()
448
-
449
- latest = frame_queue.get()
450
- env_id = getattr(session, "env_id", None)
451
- stitched = concatenate_frames_horizontally([latest], env_id=env_id)
452
- if stitched:
453
- latest = stitched[-1]
454
-
455
- img = _prepare_refresh_frame(latest)
456
- if img is None:
457
- return gr.update()
458
- return _live_obs_update(value=img, interactive=False)
459
-
460
-
461
- def on_video_end_transition(uid):
462
- """Called when demo video finishes. Transition from video to action phase."""
463
  return (
464
  gr.update(visible=False), # video_phase_group
465
  gr.update(visible=True), # action_phase_group
466
  gr.update(visible=True), # control_panel_group
467
- _action_selection_log(),
468
  gr.update(visible=False, interactive=False), # watch_demo_video_btn
 
469
  )
470
 
471
 
472
  def _task_load_failed_response(uid, message):
473
  LOGGER.warning("task_load_failed uid=%s message=%s", _uid_for_log(uid), message)
 
474
  return (
475
  uid,
476
  gr.update(visible=True), # main_interface
@@ -479,7 +431,7 @@ def _task_load_failed_response(uid, message):
479
  gr.update(choices=[], value=None), # options_radio
480
  "", # goal_box
481
  _ui_text("coords", "not_needed"), # coords_box
482
- gr.update(value=None, visible=False), # video_display
483
  gr.update(visible=False, interactive=False), # watch_demo_video_btn
484
  "", # task_info_box
485
  "", # progress_info_box
@@ -525,8 +477,7 @@ def _load_status_task(uid, status):
525
 
526
  LOGGER.debug("loading episode env=%s episode=%s uid=%s", env_id, ep_num, _uid_for_log(uid))
527
 
528
- with _LIVE_OBS_REFRESH_LOCK:
529
- _LIVE_OBS_REFRESH.pop(uid, None)
530
  reset_play_button_clicked(uid)
531
  reset_execute_count(uid, env_id, int(ep_num))
532
 
@@ -555,7 +506,7 @@ def _load_status_task(uid, status):
555
  gr.update(choices=[], value=None), # options_radio
556
  "", # goal_box
557
  _ui_text("coords", "not_needed"), # coords_box
558
- gr.update(value=None, visible=False), # video_display
559
  gr.update(visible=False, interactive=False), # watch_demo_video_btn
560
  f"{actual_env_id} (Episode {ep_num})", # task_info_box
561
  progress_text, # progress_info_box
@@ -574,19 +525,7 @@ def _load_status_task(uid, status):
574
  else:
575
  goal_text = capitalize_first_letter(session.language_goal) if session.language_goal else ""
576
 
577
- options = session.available_options
578
- radio_choices = []
579
- for opt_label, opt_idx in options:
580
- opt_label = _ui_option_label(session, opt_label, opt_idx)
581
- if 0 <= opt_idx < len(session.raw_solve_options):
582
- opt = session.raw_solve_options[opt_idx]
583
- if opt.get("available"):
584
- opt_label_with_hint = f"{opt_label}{_ui_text('actions', 'point_required_suffix')}"
585
- else:
586
- opt_label_with_hint = opt_label
587
- else:
588
- opt_label_with_hint = opt_label
589
- radio_choices.append((opt_label_with_hint, opt_idx))
590
  LOGGER.debug(
591
  "options prepared uid=%s env=%s count=%s",
592
  _uid_for_log(uid),
@@ -634,7 +573,7 @@ def _load_status_task(uid, status):
634
  gr.update(choices=radio_choices, value=None), # options_radio
635
  goal_text, # goal_box
636
  _ui_text("coords", "not_needed"), # coords_box
637
- gr.update(value=demo_video_path, visible=True), # video_display
638
  gr.update(visible=True, interactive=True), # watch_demo_video_btn
639
  f"{actual_env_id} (Episode {ep_num})", # task_info_box
640
  progress_text, # progress_info_box
@@ -658,7 +597,7 @@ def _load_status_task(uid, status):
658
  gr.update(choices=radio_choices, value=None), # options_radio
659
  goal_text, # goal_box
660
  _ui_text("coords", "not_needed"), # coords_box
661
- gr.update(value=None, visible=False), # video_display (no video)
662
  gr.update(visible=False, interactive=False), # watch_demo_video_btn
663
  f"{actual_env_id} (Episode {ep_num})", # task_info_box
664
  progress_text, # progress_info_box
@@ -1061,18 +1000,60 @@ def execute_step(uid, option_idx, coords_str):
1061
  option_idx,
1062
  coords_str,
1063
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1064
  session = get_session(uid)
1065
  if not session:
1066
  LOGGER.error("execute_step missing session uid=%s", _uid_for_log(uid))
1067
- return (
1068
- _live_obs_update(value=None, interactive=False),
1069
- format_log_markdown(_session_error_text()),
1070
- gr.update(),
1071
- gr.update(),
1072
- gr.update(interactive=False),
1073
- gr.update(interactive=False),
 
 
1074
  )
1075
-
1076
  # 检查 execute 次数限制(在执行前检查,如果达到限制则模拟失败状态)
1077
  execute_limit_reached = False
1078
  if uid and session.env_id is not None and session.episode_idx is not None:
@@ -1094,22 +1075,24 @@ def execute_step(uid, option_idx, coords_str):
1094
  max_execute,
1095
  execute_limit_reached,
1096
  )
1097
-
1098
- # Ensure at least one cached frame exists for timer-based refresh.
1099
  if not session.base_frames:
1100
  LOGGER.debug("execute_step uid=%s base_frames empty; triggering update_observation", _uid_for_log(uid))
1101
  session.update_observation(use_segmentation=USE_SEGMENTED_VIEW)
1102
-
 
 
1103
  option_idx = _parse_option_idx(option_idx)
1104
  if option_idx is None:
1105
  LOGGER.debug("execute_step uid=%s aborted: option_idx is None", _uid_for_log(uid))
1106
- return (
1107
- _live_obs_update(value=session.get_pil_image(use_segmented=USE_SEGMENTED_VIEW), interactive=False),
1108
- format_log_markdown(_ui_text("log", "execute_missing_action")),
1109
- gr.update(),
1110
- gr.update(),
1111
- gr.update(interactive=False),
1112
- gr.update(interactive=True),
1113
  )
1114
 
1115
  needs_coords = _option_requires_coords(session, option_idx)
@@ -1125,7 +1108,15 @@ def execute_step(uid, option_idx, coords_str):
1125
  )
1126
  current_img = session.get_pil_image(use_segmented=USE_SEGMENTED_VIEW)
1127
  error_msg = _ui_text("coords", "select_point_before_execute")
1128
- return _live_obs_update(value=current_img, interactive=False), format_log_markdown(error_msg), gr.update(), gr.update(), gr.update(interactive=False), gr.update(interactive=True)
 
 
 
 
 
 
 
 
1129
 
1130
  # Parse coords
1131
  click_coords = None
@@ -1212,14 +1203,6 @@ def execute_step(uid, option_idx, coords_str):
1212
  new_count,
1213
  )
1214
 
1215
- # Execute frames are produced in batch when execute_action returns from worker process.
1216
- # Enqueue them now, then wait briefly for the configured timer to drain FIFO playback.
1217
- _enqueue_live_obs_frames(uid, getattr(session, "base_frames", None))
1218
- _wait_for_live_obs_queue_drain(uid)
1219
- LOGGER.debug("execute_step playback drain complete uid=%s", _uid_for_log(uid))
1220
-
1221
- # 注意:执行阶段画面由 live_obs 的配置化轮询间隔刷新。
1222
-
1223
  progress_update = gr.update() # 默认不更新 progress
1224
  task_update = gr.update()
1225
 
@@ -1263,28 +1246,40 @@ def execute_step(uid, option_idx, coords_str):
1263
 
1264
  # 根据视图模式重新获取图片
1265
  img = session.get_pil_image(use_segmented=USE_SEGMENTED_VIEW)
1266
-
 
 
1267
  restart_episode_update = gr.update(interactive=True)
1268
  next_task_update = gr.update(interactive=True)
1269
  exec_btn_update = gr.update(interactive=False) if done else gr.update(interactive=True)
 
 
 
1270
 
1271
  # 格式化日志消息为 HTML 格式(支持颜色显示)
1272
  formatted_status = format_log_markdown(status)
1273
  LOGGER.debug(
1274
- "execute_step done uid=%s env=%s ep=%s done=%s exec_btn_interactive=%s",
1275
  _uid_for_log(uid),
1276
  getattr(session, "env_id", None),
1277
  getattr(session, "episode_idx", None),
1278
  done,
1279
  not done,
 
1280
  )
1281
-
1282
- return (
1283
- _live_obs_update(value=img, interactive=False),
1284
- formatted_status,
1285
- task_update,
1286
- progress_update,
1287
- restart_episode_update,
1288
- next_task_update,
1289
- exec_btn_update,
 
 
 
 
 
 
1290
  )
 
4
  """
5
  import logging
6
  import os
 
7
  import re
8
  import threading
9
  import time
10
  from datetime import datetime
11
+ from pathlib import Path
12
 
13
  import gradio as gr
14
  import numpy as np
 
15
 
16
  from state_manager import (
17
  cleanup_session,
 
30
  from user_manager import user_manager
31
  from config import (
32
  EXECUTE_LIMIT_OFFSET,
 
 
33
  UI_TEXT,
34
  USE_SEGMENTED_VIEW,
35
  get_live_obs_elem_classes,
 
40
  from note_content import get_task_hint
41
 
42
 
43
+ # --- execute video temp files ---
44
+ _EXECUTION_VIDEO_PATHS = {}
45
+ _EXECUTION_VIDEO_LOCK = threading.Lock()
 
46
  LOGGER = logging.getLogger("robomme.callbacks")
47
 
48
 
 
82
  """Unified cleanup entry for gr.State TTL deletion and unload hooks."""
83
  if not uid:
84
  return
85
+ _clear_execution_video_path(uid)
 
86
  cleanup_session(uid)
87
 
88
 
 
129
  return bool(raw_solve_options[option_idx].get("available"))
130
 
131
 
 
 
 
 
 
 
 
 
 
132
  def _uid_for_log(uid):
133
  if not uid:
134
  return "<none>"
 
136
  return text if len(text) <= 12 else f"{text[:8]}..."
137
 
138
 
139
+ def _delete_temp_video(path):
140
+ if not path:
141
+ return
142
+ try:
143
+ Path(path).unlink(missing_ok=True)
144
+ except Exception:
145
+ LOGGER.warning("failed to delete temp video: %s", path, exc_info=True)
146
+
147
+
148
+ def _clear_execution_video_path(uid):
149
+ if not uid:
150
+ return
151
+ with _EXECUTION_VIDEO_LOCK:
152
+ old_path = _EXECUTION_VIDEO_PATHS.pop(uid, None)
153
+ _delete_temp_video(old_path)
154
+
155
+
156
+ def _set_execution_video_path(uid, path):
157
+ if not uid:
158
+ return
159
+ with _EXECUTION_VIDEO_LOCK:
160
+ old_path = _EXECUTION_VIDEO_PATHS.get(uid)
161
+ _EXECUTION_VIDEO_PATHS[uid] = path
162
+ if old_path and old_path != path:
163
+ _delete_temp_video(old_path)
164
+
165
+
166
+ def _build_radio_choices(session):
167
+ radio_choices = []
168
+ options = getattr(session, "available_options", None) or []
169
+ raw_solve_options = getattr(session, "raw_solve_options", None) or []
170
+ for opt_label, opt_idx in options:
171
+ ui_label = _ui_option_label(session, opt_label, opt_idx)
172
+ if 0 <= opt_idx < len(raw_solve_options) and raw_solve_options[opt_idx].get("available"):
173
+ ui_label = f"{ui_label}{_ui_text('actions', 'point_required_suffix')}"
174
+ radio_choices.append((ui_label, opt_idx))
175
+ return radio_choices
176
+
177
+
178
+ def _coerce_video_source_frames(frames):
179
+ if not isinstance(frames, list):
180
+ return []
181
+ valid = []
182
+ for frame in frames:
183
+ if frame is None:
184
+ continue
185
+ frame_arr = np.asarray(frame)
186
+ if frame_arr.ndim not in {2, 3}:
187
+ continue
188
+ if frame_arr.dtype.kind in {"U", "S", "O"}:
189
+ continue
190
+ valid.append(frame_arr)
191
+ return valid
192
+
193
+
194
+ def _fallback_execution_frames(session):
195
+ base_frames = getattr(session, "base_frames", None) or []
196
+ if base_frames:
197
+ return [np.asarray(base_frames[-1])]
198
+ try:
199
+ pil_image = session.get_pil_image(use_segmented=False)
200
+ except Exception:
201
+ return []
202
+ if pil_image is None:
203
+ return []
204
+ frame_arr = np.asarray(pil_image)
205
+ if frame_arr.ndim not in {2, 3}:
206
+ return []
207
+ if frame_arr.dtype.kind in {"U", "S", "O"}:
208
+ return []
209
+ return [frame_arr]
210
+
211
+
212
+ def _build_execution_video_update(uid, session):
213
+ raw_frames = _coerce_video_source_frames(getattr(session, "last_execution_frames", None))
214
+ if not raw_frames:
215
+ raw_frames = _fallback_execution_frames(session)
216
+ stitched_frames = concatenate_frames_horizontally(
217
+ raw_frames,
218
+ env_id=getattr(session, "env_id", None),
219
+ )
220
+ if not stitched_frames:
221
+ _clear_execution_video_path(uid)
222
+ return gr.update(value=None, visible=False)
223
+
224
+ suffix = f"execute_{int(time.time() * 1000)}"
225
+ video_path = save_video(stitched_frames, suffix=suffix)
226
+ if not video_path:
227
+ _clear_execution_video_path(uid)
228
+ return gr.update(value=None, visible=False)
229
+ if not (os.path.exists(video_path) and os.path.getsize(video_path) > 0):
230
+ _clear_execution_video_path(uid)
231
+ return gr.update(value=None, visible=False)
232
+
233
+ _set_execution_video_path(uid, video_path)
234
+ return gr.update(
235
+ value=video_path,
236
+ visible=True,
237
+ autoplay=True,
238
+ playback_position=0,
239
+ )
240
+
241
+
242
  def capitalize_first_letter(text: str) -> str:
243
  """确保字符串的第一个字母大写,其余字符保持不变"""
244
  if not text:
 
373
 
374
 
375
  def switch_to_execute_phase(uid):
376
+ """Disable controls and point clicking while execute work is running."""
377
  if uid:
378
+ LOGGER.debug("switch_to_execute_phase uid=%s", _uid_for_log(uid))
 
 
 
 
 
 
 
 
 
 
 
 
379
  return (
380
  gr.update(interactive=False), # options_radio
381
  gr.update(interactive=False), # exec_btn
 
390
  """Switch display to action phase and restore control panel interactions."""
391
  if uid:
392
  LOGGER.debug("switch_to_action_phase uid=%s", _uid_for_log(uid))
 
 
393
  return (
394
  gr.update(interactive=True), # options_radio
395
  gr.update(), # exec_btn (keep execute_step result)
 
400
  )
401
 
402
 
403
+ def on_video_end_transition(uid, ui_phase=None):
404
+ """Transition from video phase back to the action phase."""
405
+ LOGGER.debug(
406
+ "on_video_end_transition uid=%s ui_phase=%s",
407
+ _uid_for_log(uid),
408
+ ui_phase,
409
+ )
410
+ log_update = gr.update()
411
+ if ui_phase == "demo_video" or ui_phase is None:
412
+ log_update = _action_selection_log()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
413
  return (
414
  gr.update(visible=False), # video_phase_group
415
  gr.update(visible=True), # action_phase_group
416
  gr.update(visible=True), # control_panel_group
417
+ log_update, # log_output
418
  gr.update(visible=False, interactive=False), # watch_demo_video_btn
419
+ "action_point", # ui_phase_state
420
  )
421
 
422
 
423
  def _task_load_failed_response(uid, message):
424
  LOGGER.warning("task_load_failed uid=%s message=%s", _uid_for_log(uid), message)
425
+ _clear_execution_video_path(uid)
426
  return (
427
  uid,
428
  gr.update(visible=True), # main_interface
 
431
  gr.update(choices=[], value=None), # options_radio
432
  "", # goal_box
433
  _ui_text("coords", "not_needed"), # coords_box
434
+ gr.update(value=None, visible=False, autoplay=False, playback_position=0), # video_display
435
  gr.update(visible=False, interactive=False), # watch_demo_video_btn
436
  "", # task_info_box
437
  "", # progress_info_box
 
477
 
478
  LOGGER.debug("loading episode env=%s episode=%s uid=%s", env_id, ep_num, _uid_for_log(uid))
479
 
480
+ _clear_execution_video_path(uid)
 
481
  reset_play_button_clicked(uid)
482
  reset_execute_count(uid, env_id, int(ep_num))
483
 
 
506
  gr.update(choices=[], value=None), # options_radio
507
  "", # goal_box
508
  _ui_text("coords", "not_needed"), # coords_box
509
+ gr.update(value=None, visible=False, autoplay=False, playback_position=0), # video_display
510
  gr.update(visible=False, interactive=False), # watch_demo_video_btn
511
  f"{actual_env_id} (Episode {ep_num})", # task_info_box
512
  progress_text, # progress_info_box
 
525
  else:
526
  goal_text = capitalize_first_letter(session.language_goal) if session.language_goal else ""
527
 
528
+ radio_choices = _build_radio_choices(session)
 
 
 
 
 
 
 
 
 
 
 
 
529
  LOGGER.debug(
530
  "options prepared uid=%s env=%s count=%s",
531
  _uid_for_log(uid),
 
573
  gr.update(choices=radio_choices, value=None), # options_radio
574
  goal_text, # goal_box
575
  _ui_text("coords", "not_needed"), # coords_box
576
+ gr.update(value=demo_video_path, visible=True, autoplay=False, playback_position=0), # video_display
577
  gr.update(visible=True, interactive=True), # watch_demo_video_btn
578
  f"{actual_env_id} (Episode {ep_num})", # task_info_box
579
  progress_text, # progress_info_box
 
597
  gr.update(choices=radio_choices, value=None), # options_radio
598
  goal_text, # goal_box
599
  _ui_text("coords", "not_needed"), # coords_box
600
+ gr.update(value=None, visible=False, autoplay=False, playback_position=0), # video_display (no video)
601
  gr.update(visible=False, interactive=False), # watch_demo_video_btn
602
  f"{actual_env_id} (Episode {ep_num})", # task_info_box
603
  progress_text, # progress_info_box
 
1000
  option_idx,
1001
  coords_str,
1002
  )
1003
+
1004
+ def _response(
1005
+ *,
1006
+ img_update,
1007
+ log_update,
1008
+ task_update=gr.update(),
1009
+ progress_update=gr.update(),
1010
+ restart_update=gr.update(interactive=True),
1011
+ next_update=gr.update(interactive=True),
1012
+ exec_update=gr.update(interactive=True),
1013
+ video_update=None,
1014
+ options_update=gr.update(interactive=True),
1015
+ coords_update=None,
1016
+ reference_update=gr.update(interactive=True),
1017
+ show_execution_video=False,
1018
+ ui_phase="action_point",
1019
+ ):
1020
+ if video_update is None:
1021
+ video_update = gr.update(value=None, visible=False, autoplay=False, playback_position=0)
1022
+ if coords_update is None:
1023
+ coords_update = _ui_text("coords", "not_needed")
1024
+ return (
1025
+ img_update,
1026
+ log_update,
1027
+ task_update,
1028
+ progress_update,
1029
+ restart_update,
1030
+ next_update,
1031
+ exec_update,
1032
+ video_update,
1033
+ gr.update(visible=False, interactive=False), # watch_demo_video_btn
1034
+ gr.update(visible=show_execution_video), # video_phase_group
1035
+ gr.update(visible=not show_execution_video), # action_phase_group
1036
+ gr.update(visible=not show_execution_video), # control_panel_group
1037
+ options_update,
1038
+ coords_update,
1039
+ reference_update,
1040
+ ui_phase,
1041
+ )
1042
+
1043
  session = get_session(uid)
1044
  if not session:
1045
  LOGGER.error("execute_step missing session uid=%s", _uid_for_log(uid))
1046
+ return _response(
1047
+ img_update=_live_obs_update(value=None, interactive=False),
1048
+ log_update=format_log_markdown(_session_error_text()),
1049
+ restart_update=gr.update(interactive=False),
1050
+ next_update=gr.update(interactive=False),
1051
+ exec_update=gr.update(interactive=False),
1052
+ options_update=gr.update(interactive=False),
1053
+ reference_update=gr.update(interactive=False),
1054
+ show_execution_video=False,
1055
  )
1056
+
1057
  # 检查 execute 次数限制(在执行前检查,如果达到限制则模拟失败状态)
1058
  execute_limit_reached = False
1059
  if uid and session.env_id is not None and session.episode_idx is not None:
 
1075
  max_execute,
1076
  execute_limit_reached,
1077
  )
1078
+
1079
+ # Ensure at least one cached frame exists for fallback clip generation.
1080
  if not session.base_frames:
1081
  LOGGER.debug("execute_step uid=%s base_frames empty; triggering update_observation", _uid_for_log(uid))
1082
  session.update_observation(use_segmentation=USE_SEGMENTED_VIEW)
1083
+ if hasattr(session, "last_execution_frames"):
1084
+ session.last_execution_frames = []
1085
+
1086
  option_idx = _parse_option_idx(option_idx)
1087
  if option_idx is None:
1088
  LOGGER.debug("execute_step uid=%s aborted: option_idx is None", _uid_for_log(uid))
1089
+ return _response(
1090
+ img_update=_live_obs_update(value=session.get_pil_image(use_segmented=USE_SEGMENTED_VIEW), interactive=False),
1091
+ log_update=format_log_markdown(_ui_text("log", "execute_missing_action")),
1092
+ exec_update=gr.update(interactive=True),
1093
+ options_update=gr.update(choices=_build_radio_choices(session), value=None, interactive=True),
1094
+ reference_update=gr.update(interactive=True),
1095
+ show_execution_video=False,
1096
  )
1097
 
1098
  needs_coords = _option_requires_coords(session, option_idx)
 
1108
  )
1109
  current_img = session.get_pil_image(use_segmented=USE_SEGMENTED_VIEW)
1110
  error_msg = _ui_text("coords", "select_point_before_execute")
1111
+ return _response(
1112
+ img_update=_live_obs_update(value=current_img, interactive=False),
1113
+ log_update=format_log_markdown(error_msg),
1114
+ exec_update=gr.update(interactive=True),
1115
+ options_update=gr.update(choices=_build_radio_choices(session), value=None, interactive=True),
1116
+ coords_update=coords_str,
1117
+ reference_update=gr.update(interactive=True),
1118
+ show_execution_video=False,
1119
+ )
1120
 
1121
  # Parse coords
1122
  click_coords = None
 
1203
  new_count,
1204
  )
1205
 
 
 
 
 
 
 
 
 
1206
  progress_update = gr.update() # 默认不更新 progress
1207
  task_update = gr.update()
1208
 
 
1246
 
1247
  # 根据视图模式重新获取图片
1248
  img = session.get_pil_image(use_segmented=USE_SEGMENTED_VIEW)
1249
+ video_update = _build_execution_video_update(uid, session)
1250
+ show_execution_video = video_update.get("visible") is True
1251
+ radio_choices = _build_radio_choices(session)
1252
  restart_episode_update = gr.update(interactive=True)
1253
  next_task_update = gr.update(interactive=True)
1254
  exec_btn_update = gr.update(interactive=False) if done else gr.update(interactive=True)
1255
+ options_update = gr.update(choices=radio_choices, value=None, interactive=True)
1256
+ coords_update = _ui_text("coords", "not_needed")
1257
+ reference_update = gr.update(interactive=True)
1258
 
1259
  # 格式化日志消息为 HTML 格式(支持颜色显示)
1260
  formatted_status = format_log_markdown(status)
1261
  LOGGER.debug(
1262
+ "execute_step done uid=%s env=%s ep=%s done=%s exec_btn_interactive=%s show_execution_video=%s",
1263
  _uid_for_log(uid),
1264
  getattr(session, "env_id", None),
1265
  getattr(session, "episode_idx", None),
1266
  done,
1267
  not done,
1268
+ show_execution_video,
1269
  )
1270
+
1271
+ return _response(
1272
+ img_update=_live_obs_update(value=img, interactive=False),
1273
+ log_update=formatted_status,
1274
+ task_update=task_update,
1275
+ progress_update=progress_update,
1276
+ restart_update=restart_episode_update,
1277
+ next_update=next_task_update,
1278
+ exec_update=exec_btn_update,
1279
+ video_update=video_update,
1280
+ options_update=options_update,
1281
+ coords_update=coords_update,
1282
+ reference_update=reference_update,
1283
+ show_execution_video=show_execution_video,
1284
+ ui_phase="execution_video" if show_execution_video else "action_point",
1285
  )
gradio-web/process_session.py CHANGED
@@ -180,6 +180,7 @@ def session_worker_loop(cmd_queue, result_queue, stream_queue, dataset_root, gui
180
  "difficulty": session.difficulty,
181
  "seed": session.seed,
182
  "demonstration_frames": session.demonstration_frames,
 
183
  "base_frames": session.base_frames, # 加载时完整同步
184
  "wrist_frames": session.wrist_frames, # 加载时完整同步
185
  "available_options": session.available_options,
@@ -192,6 +193,7 @@ def session_worker_loop(cmd_queue, result_queue, stream_queue, dataset_root, gui
192
 
193
  elif cmd == CMD_EXECUTE_ACTION:
194
  # 执行动作(重计算任务)
 
195
  try:
196
  res = session.execute_action(*args, **kwargs)
197
  LOGGER.info(
@@ -240,8 +242,11 @@ def session_worker_loop(cmd_queue, result_queue, stream_queue, dataset_root, gui
240
  if session.env:
241
  is_demonstration = getattr(session.env, 'current_task_demonstration', False)
242
 
 
 
243
  # 构建状态更新(只更新选项和分割视图,帧通过流队列同步)
244
  state_update = {
 
245
  "available_options": session.available_options,
246
  "raw_solve_options": _sanitize_options(session.raw_solve_options),
247
  "seg_vis": session.seg_vis,
@@ -353,6 +358,7 @@ class ProcessSessionProxy:
353
  self.difficulty = None
354
  self.seed = None
355
  self.demonstration_frames = []
 
356
  self.base_frames = [] # 由后台同步线程持续更新
357
  self.wrist_frames = [] # 由后台同步线程持续更新
358
  self.available_options = []
@@ -482,6 +488,7 @@ class ProcessSessionProxy:
482
  Returns:
483
  tuple: (PIL.Image, str, bool) 图像、状态消息、是否完成
484
  """
 
485
  return self._send_cmd(CMD_EXECUTE_ACTION, action_idx, click_coords)
486
 
487
  def get_pil_image(self, use_segmented=True):
 
180
  "difficulty": session.difficulty,
181
  "seed": session.seed,
182
  "demonstration_frames": session.demonstration_frames,
183
+ "last_execution_frames": [],
184
  "base_frames": session.base_frames, # 加载时完整同步
185
  "wrist_frames": session.wrist_frames, # 加载时完整同步
186
  "available_options": session.available_options,
 
193
 
194
  elif cmd == CMD_EXECUTE_ACTION:
195
  # 执行动作(重计算任务)
196
+ execute_base_start = len(session.base_frames)
197
  try:
198
  res = session.execute_action(*args, **kwargs)
199
  LOGGER.info(
 
242
  if session.env:
243
  is_demonstration = getattr(session.env, 'current_task_demonstration', False)
244
 
245
+ execution_frames = session.base_frames[execute_base_start:]
246
+
247
  # 构建状态更新(只更新选项和分割视图,帧通过流队列同步)
248
  state_update = {
249
+ "last_execution_frames": execution_frames,
250
  "available_options": session.available_options,
251
  "raw_solve_options": _sanitize_options(session.raw_solve_options),
252
  "seg_vis": session.seg_vis,
 
358
  self.difficulty = None
359
  self.seed = None
360
  self.demonstration_frames = []
361
+ self.last_execution_frames = []
362
  self.base_frames = [] # 由后台同步线程持续更新
363
  self.wrist_frames = [] # 由后台同步线程持续更新
364
  self.available_options = []
 
488
  Returns:
489
  tuple: (PIL.Image, str, bool) 图像、状态消息、是否完成
490
  """
491
+ self.last_execution_frames = []
492
  return self._send_cmd(CMD_EXECUTE_ACTION, action_idx, click_coords)
493
 
494
  def get_pil_image(self, use_segmented=True):
gradio-web/test/test_live_obs_refresh.py CHANGED
@@ -1,64 +1,104 @@
1
  from __future__ import annotations
2
 
3
  import numpy as np
4
- from PIL import Image
5
 
6
 
7
  class _FakeSession:
8
- def __init__(self, frames, env_id="BinFill"):
9
- self.base_frames = frames
10
- self.env_id = env_id
11
-
12
-
13
- def test_refresh_live_obs_skips_when_not_execution_phase(monkeypatch, reload_module):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  callbacks = reload_module("gradio_callbacks")
15
- monkeypatch.setattr(callbacks, "get_session", lambda uid: _FakeSession([]))
16
 
17
- update = callbacks.refresh_live_obs("uid-1", "action_point")
 
 
 
18
 
19
- assert update.get("__type__") == "update"
20
- assert "value" not in update
 
21
 
 
22
 
23
- def test_refresh_live_obs_updates_image_from_latest_frame(monkeypatch, reload_module):
24
- config = reload_module("config")
25
- callbacks = reload_module("gradio_callbacks")
26
- frame0 = np.zeros((8, 8, 3), dtype=np.uint8)
27
- frame1 = np.full((8, 8, 3), 11, dtype=np.uint8)
28
- frame2 = np.full((8, 8, 3), 22, dtype=np.uint8)
29
- frame3 = np.full((8, 8, 3), 33, dtype=np.uint8)
30
- frame4 = np.full((8, 8, 3), 44, dtype=np.uint8)
31
- session = _FakeSession([frame0])
32
  monkeypatch.setattr(callbacks, "get_session", lambda uid: session)
33
- monkeypatch.setattr(callbacks, "KEYFRAME_DOWNSAMPLE_FACTOR", 2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
- # Reset queue state at execute start (cursor anchored at current base_frames length).
36
- callbacks.switch_to_execute_phase("uid-2")
37
- session.base_frames.extend([frame1, frame2, frame3, frame4])
38
 
39
- # Downsample x2 + FIFO => first frame1, then frame3.
40
- update1 = callbacks.refresh_live_obs("uid-2", "execution_playback")
41
- update2 = callbacks.refresh_live_obs("uid-2", "execution_playback")
42
- update3 = callbacks.refresh_live_obs("uid-2", "execution_playback")
 
 
 
 
 
 
 
 
 
43
 
44
- assert update1.get("__type__") == "update"
45
- assert update1.get("interactive") is False
46
- assert update1.get("elem_classes") == config.get_live_obs_elem_classes()
47
- assert isinstance(update1.get("value"), Image.Image)
48
- assert update1["value"].getpixel((0, 0)) == (11, 11, 11)
49
 
50
- assert update2.get("__type__") == "update"
51
- assert update2.get("interactive") is False
52
- assert update2.get("elem_classes") == config.get_live_obs_elem_classes()
53
- assert isinstance(update2.get("value"), Image.Image)
54
- assert update2["value"].getpixel((0, 0)) == (33, 33, 33)
55
 
56
- # Queue drained, so no further value update.
57
- assert update3.get("__type__") == "update"
58
- assert "value" not in update3
 
59
 
60
 
61
- def test_switch_phase_keeps_live_obs_visible_and_toggles_interactive(reload_module):
62
  config = reload_module("config")
63
  callbacks = reload_module("gradio_callbacks")
64
 
 
1
  from __future__ import annotations
2
 
3
  import numpy as np
 
4
 
5
 
6
  class _FakeSession:
7
+ def __init__(self):
8
+ self.env_id = "BinFill"
9
+ self.episode_idx = 1
10
+ self.raw_solve_options = [{"available": False}]
11
+ self.available_options = [("pick", 0)]
12
+ self.base_frames = []
13
+ self.last_execution_frames = []
14
+ self.non_demonstration_task_length = None
15
+ self.difficulty = "easy"
16
+ self.language_goal = "goal"
17
+ self.seed = 123
18
+
19
+ def get_pil_image(self, use_segmented=False):
20
+ _ = use_segmented
21
+ return "IMG"
22
+
23
+ def update_observation(self, use_segmentation=False):
24
+ _ = use_segmentation
25
+ return None
26
+
27
+
28
+ def test_execute_step_builds_video_from_last_execution_frames(monkeypatch, reload_module):
29
  callbacks = reload_module("gradio_callbacks")
 
30
 
31
+ frame1 = np.full((8, 8, 3), 11, dtype=np.uint8)
32
+ frame2 = np.full((8, 8, 3), 22, dtype=np.uint8)
33
+ session = _FakeSession()
34
+ session.base_frames = [frame2]
35
 
36
+ def _execute_action(_option_idx, _coords):
37
+ session.last_execution_frames = [frame1, frame2]
38
+ return "IMG", "Executing: pick", False
39
 
40
+ session.execute_action = _execute_action
41
 
42
+ captured = {}
 
 
 
 
 
 
 
 
43
  monkeypatch.setattr(callbacks, "get_session", lambda uid: session)
44
+ monkeypatch.setattr(callbacks, "increment_execute_count", lambda uid, env_id, episode_idx: 1)
45
+ monkeypatch.setattr(callbacks, "concatenate_frames_horizontally", lambda frames, env_id=None: list(frames))
46
+ def _save_video(frames, suffix=""):
47
+ captured["payload"] = (list(frames), suffix)
48
+ return "/tmp/exec.mp4"
49
+
50
+ monkeypatch.setattr(callbacks, "save_video", _save_video)
51
+ monkeypatch.setattr(callbacks.os.path, "exists", lambda path: True)
52
+ monkeypatch.setattr(callbacks.os.path, "getsize", lambda path: 10)
53
+
54
+ result = callbacks.execute_step("uid-1", 0, callbacks.UI_TEXT["coords"]["not_needed"])
55
+
56
+ saved_frames, suffix = captured["payload"]
57
+ assert [int(frame[0, 0, 0]) for frame in saved_frames] == [11, 22]
58
+ assert suffix.startswith("execute_")
59
+ assert result[7]["visible"] is True
60
+ assert result[7]["autoplay"] is True
61
+ assert result[9]["visible"] is True
62
+ assert result[10]["visible"] is False
63
+ assert result[11]["visible"] is False
64
+ assert result[12]["value"] is None
65
+ assert result[15] == "execution_video"
66
+
67
+
68
+ def test_execute_step_falls_back_to_single_frame_clip_when_no_new_frames(monkeypatch, reload_module):
69
+ callbacks = reload_module("gradio_callbacks")
70
 
71
+ frame = np.full((8, 8, 3), 33, dtype=np.uint8)
72
+ session = _FakeSession()
73
+ session.base_frames = [frame]
74
 
75
+ def _execute_action(_option_idx, _coords):
76
+ session.last_execution_frames = []
77
+ return "IMG", "Executing: pick", False
78
+
79
+ session.execute_action = _execute_action
80
+
81
+ captured = {}
82
+ monkeypatch.setattr(callbacks, "get_session", lambda uid: session)
83
+ monkeypatch.setattr(callbacks, "increment_execute_count", lambda uid, env_id, episode_idx: 1)
84
+ monkeypatch.setattr(callbacks, "concatenate_frames_horizontally", lambda frames, env_id=None: list(frames))
85
+ def _save_video(frames, suffix=""):
86
+ captured["frames"] = list(frames)
87
+ return "/tmp/exec-single.mp4"
88
 
89
+ monkeypatch.setattr(callbacks, "save_video", _save_video)
90
+ monkeypatch.setattr(callbacks.os.path, "exists", lambda path: True)
91
+ monkeypatch.setattr(callbacks.os.path, "getsize", lambda path: 10)
 
 
92
 
93
+ result = callbacks.execute_step("uid-1", 0, callbacks.UI_TEXT["coords"]["not_needed"])
 
 
 
 
94
 
95
+ assert len(captured["frames"]) == 1
96
+ assert int(captured["frames"][0][0, 0, 0]) == 33
97
+ assert result[7]["visible"] is True
98
+ assert result[15] == "execution_video"
99
 
100
 
101
+ def test_switch_phase_toggles_live_obs_interactive_without_refresh_queue(reload_module):
102
  config = reload_module("config")
103
  callbacks = reload_module("gradio_callbacks")
104
 
gradio-web/test/test_queue_session_limit_e2e.py CHANGED
@@ -435,23 +435,21 @@ def test_execute_does_not_use_episode_loading_copy(monkeypatch):
435
  gr.update(interactive=True),
436
  gr.update(interactive=True),
437
  gr.update(interactive=True),
438
- )
439
-
440
- def fake_switch_to_action_phase(uid=None):
441
- return (
442
- gr.update(interactive=True),
443
- gr.update(),
444
- gr.update(),
445
- gr.update(),
446
- gr.update(interactive=True),
447
  gr.update(interactive=True),
 
448
  )
449
 
450
  monkeypatch.setattr(ui_layout, "init_app", fake_init_app)
451
  monkeypatch.setattr(ui_layout, "precheck_execute_inputs", fake_precheck_execute_inputs)
452
  monkeypatch.setattr(ui_layout, "switch_to_execute_phase", fake_switch_to_execute_phase)
453
  monkeypatch.setattr(ui_layout, "execute_step", fake_execute_step)
454
- monkeypatch.setattr(ui_layout, "switch_to_action_phase", fake_switch_to_action_phase)
455
 
456
  demo = ui_layout.create_ui_blocks()
457
  root_url, demo, server, thread = _mount_demo(demo)
 
435
  gr.update(interactive=True),
436
  gr.update(interactive=True),
437
  gr.update(interactive=True),
438
+ gr.update(value=None, visible=False, autoplay=False, playback_position=0),
439
+ gr.update(visible=False, interactive=False),
440
+ gr.update(visible=False),
441
+ gr.update(visible=True),
442
+ gr.update(visible=True),
443
+ gr.update(choices=[("pick", 0)], value=None, interactive=True),
444
+ "No need for coordinates",
 
 
445
  gr.update(interactive=True),
446
+ "action_point",
447
  )
448
 
449
  monkeypatch.setattr(ui_layout, "init_app", fake_init_app)
450
  monkeypatch.setattr(ui_layout, "precheck_execute_inputs", fake_precheck_execute_inputs)
451
  monkeypatch.setattr(ui_layout, "switch_to_execute_phase", fake_switch_to_execute_phase)
452
  monkeypatch.setattr(ui_layout, "execute_step", fake_execute_step)
 
453
 
454
  demo = ui_layout.create_ui_blocks()
455
  root_url, demo, server, thread = _mount_demo(demo)
gradio-web/test/test_ui_native_layout_contract.py CHANGED
@@ -184,12 +184,14 @@ def test_native_ui_config_contains_phase_machine_and_precheck_chain(reload_modul
184
  if comp.get("props", {}).get("elem_id") == "demo_video"
185
  )
186
  assert demo_video_comp.get("props", {}).get("autoplay") is False
 
 
187
 
188
  api_names = [dep.get("api_name") for dep in cfg.get("dependencies", [])]
189
  assert "on_demo_video_play" in api_names
190
  assert "precheck_execute_inputs" in api_names
191
  assert "switch_to_execute_phase" in api_names
192
  assert "execute_step" in api_names
193
- assert "switch_to_action_phase" in api_names
194
  finally:
195
  demo.close()
 
184
  if comp.get("props", {}).get("elem_id") == "demo_video"
185
  )
186
  assert demo_video_comp.get("props", {}).get("autoplay") is False
187
+ component_types = [comp.get("type") for comp in cfg.get("components", [])]
188
+ assert "timer" not in component_types
189
 
190
  api_names = [dep.get("api_name") for dep in cfg.get("dependencies", [])]
191
  assert "on_demo_video_play" in api_names
192
  assert "precheck_execute_inputs" in api_names
193
  assert "switch_to_execute_phase" in api_names
194
  assert "execute_step" in api_names
195
+ assert "refresh_live_obs" not in api_names
196
  finally:
197
  demo.close()
gradio-web/test/test_ui_phase_machine_runtime_e2e.py CHANGED
@@ -416,6 +416,7 @@ def font_size_probe_ui_url(monkeypatch):
416
  def phase_machine_ui_url():
417
  state = {"precheck_calls": 0, "play_clicks": 0}
418
  demo_video_url = "https://interactive-examples.mdn.mozilla.net/media/cc0-videos/flower.mp4"
 
419
  ui_layout = importlib.reload(importlib.import_module("ui_layout"))
420
 
421
  with gr.Blocks(title="Native phase machine test") as demo:
@@ -504,7 +505,6 @@ def phase_machine_ui_url():
504
  gr.update(interactive=False),
505
  gr.update(interactive=False),
506
  gr.update(interactive=False),
507
- "execution_playback",
508
  )
509
 
510
  def execute_fn():
@@ -513,16 +513,15 @@ def phase_machine_ui_url():
513
  "executed",
514
  gr.update(interactive=True),
515
  gr.update(interactive=True),
516
- )
517
-
518
- def to_action_fn():
519
- return (
520
- gr.update(interactive=True),
521
- gr.update(interactive=True),
522
- gr.update(interactive=True),
523
  gr.update(interactive=True),
 
524
  gr.update(interactive=True),
525
- "action_point",
526
  )
527
 
528
  login_btn.click(
@@ -616,16 +615,24 @@ def phase_machine_ui_url():
616
  next_task_btn,
617
  img_display,
618
  reference_action_btn,
619
- phase_state,
620
  ],
621
  queue=False,
622
  ).then(
623
  fn=execute_fn,
624
- outputs=[log_output, next_task_btn, exec_btn],
625
- queue=False,
626
- ).then(
627
- fn=to_action_fn,
628
- outputs=[options_radio, exec_btn, next_task_btn, img_display, reference_action_btn, phase_state],
 
 
 
 
 
 
 
 
 
629
  queue=False,
630
  )
631
 
@@ -963,6 +970,21 @@ def test_phase_machine_runtime_flow_and_execute_precheck(phase_machine_ui_url):
963
  assert interactive_snapshot["execDisabled"] is True
964
  assert interactive_snapshot["nextDisabled"] is True
965
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
966
  page.wait_for_function(
967
  """() => {
968
  const execBtn = document.querySelector('button#exec_btn') || document.querySelector('#exec_btn button');
@@ -2451,54 +2473,58 @@ def test_header_task_switch_to_video_task_shows_demo_phase(monkeypatch):
2451
 
2452
  def test_phase_machine_runtime_local_video_path_end_transition():
2453
  import gradio_callbacks as cb
2454
- ui_layout = importlib.reload(importlib.import_module("ui_layout"))
2455
 
 
2456
  demo_video_path = gr.get_video("world.mp4")
2457
  fake_obs = np.zeros((24, 24, 3), dtype=np.uint8)
2458
 
2459
  class FakeSession:
2460
  def __init__(self):
2461
- self.env_id = "VideoUnmask"
 
2462
  self.language_goal = "place cube on target"
2463
- self.available_options = [("pick", 0)]
2464
- self.raw_solve_options = [{"available": False}]
2465
- self.demonstration_frames = [fake_obs.copy() for _ in range(4)]
2466
-
2467
- def load_episode(self, env_id, episode_idx):
2468
- self.env_id = env_id
2469
- return fake_obs.copy(), f"loaded {env_id}:{episode_idx}"
 
2470
 
2471
  def get_pil_image(self, use_segmented=False):
2472
  _ = use_segmented
2473
  return fake_obs.copy()
2474
 
 
 
 
 
 
 
 
 
 
 
2475
  originals = {
2476
  "get_session": cb.get_session,
2477
- "reset_play_button_clicked": cb.reset_play_button_clicked,
2478
- "reset_execute_count": cb.reset_execute_count,
2479
- "set_task_start_time": cb.set_task_start_time,
2480
- "set_ui_phase": cb.set_ui_phase,
2481
  "save_video": cb.save_video,
2482
  }
2483
 
2484
  fake_session = FakeSession()
2485
 
2486
  cb.get_session = lambda uid: fake_session
2487
- cb.reset_play_button_clicked = lambda uid: None
2488
- cb.reset_execute_count = lambda uid, env_id, ep_num: None
2489
- cb.set_task_start_time = lambda uid, env_id, ep_num, start_time: None
2490
- cb.set_ui_phase = lambda uid, phase: None
2491
  cb.save_video = lambda frames, suffix="": demo_video_path
2492
 
2493
  try:
2494
  with gr.Blocks(title="Native phase machine local video test") as demo:
2495
  uid_state = gr.State(value="uid-local-video")
2496
- demo.load(
2497
- fn=None,
2498
- js=ui_layout.DEMO_VIDEO_PLAY_BINDING_JS,
2499
- queue=False,
2500
- )
2501
- with gr.Column(visible=False, elem_id="main_interface") as main_interface:
2502
  with gr.Column(visible=False, elem_id="video_phase_group") as video_phase_group:
2503
  video_display = gr.Video(value=None, elem_id="demo_video", autoplay=False)
2504
  watch_demo_video_btn = gr.Button(
@@ -2512,72 +2538,87 @@ def test_phase_machine_runtime_local_video_path_end_transition():
2512
  img_display = gr.Image(value=fake_obs.copy(), elem_id="live_obs")
2513
 
2514
  with gr.Column(visible=True, elem_id="control_panel_group") as control_panel_group:
2515
- options_radio = gr.Radio(choices=[("pick", 0)], value=None, elem_id="action_radio")
 
 
 
 
 
2516
 
2517
  log_output = gr.Markdown("", elem_id="log_output")
2518
- goal_box = gr.Textbox("")
2519
- coords_box = gr.Textbox("No need for coordinates")
2520
  task_info_box = gr.Textbox("")
2521
  progress_info_box = gr.Textbox("")
2522
- task_hint_display = gr.Textbox("")
2523
- with gr.Column(visible=False) as loading_overlay:
2524
- gr.Markdown("Loading...")
2525
-
2526
- restart_episode_btn = gr.Button("restart", interactive=False)
2527
- next_task_btn = gr.Button("next", interactive=False)
2528
- exec_btn = gr.Button("execute", interactive=False)
2529
- reference_action_btn = gr.Button("reference", interactive=False)
2530
-
2531
- def load_fn():
2532
- status = {
2533
- "current_task": {"env_id": "VideoUnmask", "episode_idx": 1},
2534
- "completed_count": 0,
2535
- }
2536
- return cb._load_status_task("uid-local-video", status)
2537
 
2538
- demo.load(
2539
- fn=load_fn,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2540
  outputs=[
2541
- uid_state,
2542
- main_interface,
2543
  img_display,
2544
  log_output,
2545
- options_radio,
2546
- goal_box,
2547
- coords_box,
2548
- video_display,
2549
- watch_demo_video_btn,
2550
  task_info_box,
2551
  progress_info_box,
2552
  restart_episode_btn,
2553
  next_task_btn,
2554
  exec_btn,
 
 
2555
  video_phase_group,
2556
  action_phase_group,
2557
  control_panel_group,
2558
- task_hint_display,
2559
- loading_overlay,
2560
  reference_action_btn,
 
2561
  ],
2562
  queue=False,
2563
  )
2564
-
2565
- watch_demo_video_btn.click(
2566
- fn=cb.on_demo_video_play,
2567
- inputs=[uid_state],
2568
- outputs=[watch_demo_video_btn],
2569
  queue=False,
2570
  )
2571
 
2572
  video_display.end(
2573
  fn=cb.on_video_end_transition,
2574
- inputs=[uid_state],
2575
  outputs=[
2576
  video_phase_group,
2577
  action_phase_group,
2578
  control_panel_group,
2579
  log_output,
2580
  watch_demo_video_btn,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2581
  ],
2582
  queue=False,
2583
  )
@@ -2601,9 +2642,10 @@ def test_phase_machine_runtime_local_video_path_end_transition():
2601
  page = browser.new_page(viewport={"width": 1280, "height": 900})
2602
  page.goto(root_url, wait_until="domcontentloaded")
2603
  page.wait_for_selector("#main_interface", state="visible", timeout=20000)
2604
-
 
2605
  page.wait_for_selector("#demo_video video", timeout=5000)
2606
- phase_after_login = page.evaluate(
2607
  """() => {
2608
  const visible = (id) => {
2609
  const el = document.getElementById(id);
@@ -2611,37 +2653,25 @@ def test_phase_machine_runtime_local_video_path_end_transition():
2611
  const st = getComputedStyle(el);
2612
  return st.display !== 'none' && st.visibility !== 'hidden' && el.getClientRects().length > 0;
2613
  };
2614
- return {
2615
- video: visible('demo_video'),
2616
- watchButton: visible('watch_demo_video_btn'),
2617
- action: visible('live_obs'),
2618
- control: visible('action_radio'),
2619
- };
2620
- }"""
2621
- )
2622
- assert phase_after_login == {
2623
- "video": True,
2624
- "watchButton": True,
2625
- "action": False,
2626
- "control": False,
2627
- }
2628
-
2629
- controls_after_login = _read_demo_video_controls(page)
2630
- assert controls_after_login["buttonVisible"] is True
2631
- assert controls_after_login["buttonDisabled"] is False
2632
- assert controls_after_login["autoplay"] is False
2633
- assert controls_after_login["paused"] is True
2634
-
2635
- _click_demo_video_button(page)
2636
- page.wait_for_function(
2637
- """() => {
2638
- const button =
2639
- document.querySelector('#watch_demo_video_btn button') ||
2640
- document.querySelector('button#watch_demo_video_btn');
2641
- return !!button && button.disabled === true;
2642
  }""",
2643
- timeout=5000,
2644
  )
 
 
 
 
 
2645
 
2646
  did_dispatch_end = _dispatch_video_event(page, "ended")
2647
  assert did_dispatch_end
@@ -2663,6 +2693,30 @@ def test_phase_machine_runtime_local_video_path_end_transition():
2663
  }""",
2664
  timeout=2000,
2665
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2666
 
2667
  browser.close()
2668
  finally:
 
416
  def phase_machine_ui_url():
417
  state = {"precheck_calls": 0, "play_clicks": 0}
418
  demo_video_url = "https://interactive-examples.mdn.mozilla.net/media/cc0-videos/flower.mp4"
419
+ execution_video_path = gr.get_video("world.mp4")
420
  ui_layout = importlib.reload(importlib.import_module("ui_layout"))
421
 
422
  with gr.Blocks(title="Native phase machine test") as demo:
 
505
  gr.update(interactive=False),
506
  gr.update(interactive=False),
507
  gr.update(interactive=False),
 
508
  )
509
 
510
  def execute_fn():
 
513
  "executed",
514
  gr.update(interactive=True),
515
  gr.update(interactive=True),
516
+ gr.update(value=execution_video_path, visible=True, autoplay=True, playback_position=0),
517
+ gr.update(visible=False, interactive=False),
518
+ gr.update(visible=True),
519
+ gr.update(visible=False),
520
+ gr.update(visible=False),
 
 
521
  gr.update(interactive=True),
522
+ "No need for coordinates",
523
  gr.update(interactive=True),
524
+ "execution_video",
525
  )
526
 
527
  login_btn.click(
 
615
  next_task_btn,
616
  img_display,
617
  reference_action_btn,
 
618
  ],
619
  queue=False,
620
  ).then(
621
  fn=execute_fn,
622
+ outputs=[
623
+ log_output,
624
+ next_task_btn,
625
+ exec_btn,
626
+ video_display,
627
+ watch_demo_video_btn,
628
+ video_phase_group,
629
+ action_phase_group,
630
+ control_panel_group,
631
+ options_radio,
632
+ coords_box,
633
+ reference_action_btn,
634
+ phase_state,
635
+ ],
636
  queue=False,
637
  )
638
 
 
970
  assert interactive_snapshot["execDisabled"] is True
971
  assert interactive_snapshot["nextDisabled"] is True
972
 
973
+ page.wait_for_function(
974
+ """() => {
975
+ const videoEl = document.querySelector('#demo_video video');
976
+ return !!videoEl && videoEl.autoplay === true && (videoEl.paused === false || videoEl.currentTime > 0);
977
+ }""",
978
+ timeout=6000,
979
+ )
980
+ execute_video_controls = _read_demo_video_controls(page)
981
+ assert execute_video_controls["videoVisible"] is True
982
+ assert execute_video_controls["autoplay"] is True
983
+ assert execute_video_controls["paused"] is False
984
+
985
+ did_dispatch_end = _dispatch_video_event(page, "ended")
986
+ assert did_dispatch_end
987
+
988
  page.wait_for_function(
989
  """() => {
990
  const execBtn = document.querySelector('button#exec_btn') || document.querySelector('#exec_btn button');
 
2473
 
2474
  def test_phase_machine_runtime_local_video_path_end_transition():
2475
  import gradio_callbacks as cb
2476
+ import config as config_module
2477
 
2478
+ ui_layout = importlib.reload(importlib.import_module("ui_layout"))
2479
  demo_video_path = gr.get_video("world.mp4")
2480
  fake_obs = np.zeros((24, 24, 3), dtype=np.uint8)
2481
 
2482
  class FakeSession:
2483
  def __init__(self):
2484
+ self.env_id = "BinFill"
2485
+ self.episode_idx = 1
2486
  self.language_goal = "place cube on target"
2487
+ self.available_options = [("pick", 0), ("point", 1)]
2488
+ self.raw_solve_options = [{"available": False}, {"available": [object()]}]
2489
+ self.demonstration_frames = []
2490
+ self.last_execution_frames = []
2491
+ self.base_frames = [fake_obs.copy()]
2492
+ self.non_demonstration_task_length = None
2493
+ self.difficulty = "easy"
2494
+ self.seed = 123
2495
 
2496
  def get_pil_image(self, use_segmented=False):
2497
  _ = use_segmented
2498
  return fake_obs.copy()
2499
 
2500
+ def update_observation(self, use_segmentation=False):
2501
+ _ = use_segmentation
2502
+ return None
2503
+
2504
+ def execute_action(self, option_idx, click_coords):
2505
+ _ = option_idx, click_coords
2506
+ self.last_execution_frames = [fake_obs.copy() for _ in range(3)]
2507
+ self.base_frames.extend(self.last_execution_frames)
2508
+ return fake_obs.copy(), "Executing: pick", False
2509
+
2510
  originals = {
2511
  "get_session": cb.get_session,
2512
+ "increment_execute_count": cb.increment_execute_count,
 
 
 
2513
  "save_video": cb.save_video,
2514
  }
2515
 
2516
  fake_session = FakeSession()
2517
 
2518
  cb.get_session = lambda uid: fake_session
2519
+ cb.increment_execute_count = lambda uid, env_id, ep_num: 1
 
 
 
2520
  cb.save_video = lambda frames, suffix="": demo_video_path
2521
 
2522
  try:
2523
  with gr.Blocks(title="Native phase machine local video test") as demo:
2524
  uid_state = gr.State(value="uid-local-video")
2525
+ phase_state = gr.State(value="action_point")
2526
+ suppress_state = gr.State(value=False)
2527
+ with gr.Column(visible=True, elem_id="main_interface") as main_interface:
 
 
 
2528
  with gr.Column(visible=False, elem_id="video_phase_group") as video_phase_group:
2529
  video_display = gr.Video(value=None, elem_id="demo_video", autoplay=False)
2530
  watch_demo_video_btn = gr.Button(
 
2538
  img_display = gr.Image(value=fake_obs.copy(), elem_id="live_obs")
2539
 
2540
  with gr.Column(visible=True, elem_id="control_panel_group") as control_panel_group:
2541
+ options_radio = gr.Radio(choices=[("pick", 0), ("point", 1)], value=None, elem_id="action_radio")
2542
+ coords_box = gr.Textbox(config_module.UI_TEXT["coords"]["not_needed"], elem_id="coords_box")
2543
+ exec_btn = gr.Button("execute", interactive=True, elem_id="exec_btn")
2544
+ reference_action_btn = gr.Button("reference", interactive=True, elem_id="reference_action_btn")
2545
+ restart_episode_btn = gr.Button("restart", interactive=True, elem_id="restart_episode_btn")
2546
+ next_task_btn = gr.Button("next", interactive=True, elem_id="next_task_btn")
2547
 
2548
  log_output = gr.Markdown("", elem_id="log_output")
 
 
2549
  task_info_box = gr.Textbox("")
2550
  progress_info_box = gr.Textbox("")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2551
 
2552
+ exec_btn.click(
2553
+ fn=cb.precheck_execute_inputs,
2554
+ inputs=[uid_state, options_radio, coords_box],
2555
+ outputs=[],
2556
+ queue=False,
2557
+ ).then(
2558
+ fn=cb.switch_to_execute_phase,
2559
+ inputs=[uid_state],
2560
+ outputs=[
2561
+ options_radio,
2562
+ exec_btn,
2563
+ restart_episode_btn,
2564
+ next_task_btn,
2565
+ img_display,
2566
+ reference_action_btn,
2567
+ ],
2568
+ queue=False,
2569
+ ).then(
2570
+ fn=cb.execute_step,
2571
+ inputs=[uid_state, options_radio, coords_box],
2572
  outputs=[
 
 
2573
  img_display,
2574
  log_output,
 
 
 
 
 
2575
  task_info_box,
2576
  progress_info_box,
2577
  restart_episode_btn,
2578
  next_task_btn,
2579
  exec_btn,
2580
+ video_display,
2581
+ watch_demo_video_btn,
2582
  video_phase_group,
2583
  action_phase_group,
2584
  control_panel_group,
2585
+ options_radio,
2586
+ coords_box,
2587
  reference_action_btn,
2588
+ phase_state,
2589
  ],
2590
  queue=False,
2591
  )
2592
+ options_radio.change(
2593
+ fn=cb.on_option_select,
2594
+ inputs=[uid_state, options_radio, coords_box, suppress_state],
2595
+ outputs=[coords_box, img_display, log_output, suppress_state],
 
2596
  queue=False,
2597
  )
2598
 
2599
  video_display.end(
2600
  fn=cb.on_video_end_transition,
2601
+ inputs=[uid_state, phase_state],
2602
  outputs=[
2603
  video_phase_group,
2604
  action_phase_group,
2605
  control_panel_group,
2606
  log_output,
2607
  watch_demo_video_btn,
2608
+ phase_state,
2609
+ ],
2610
+ queue=False,
2611
+ )
2612
+ video_display.stop(
2613
+ fn=cb.on_video_end_transition,
2614
+ inputs=[uid_state, phase_state],
2615
+ outputs=[
2616
+ video_phase_group,
2617
+ action_phase_group,
2618
+ control_panel_group,
2619
+ log_output,
2620
+ watch_demo_video_btn,
2621
+ phase_state,
2622
  ],
2623
  queue=False,
2624
  )
 
2642
  page = browser.new_page(viewport={"width": 1280, "height": 900})
2643
  page.goto(root_url, wait_until="domcontentloaded")
2644
  page.wait_for_selector("#main_interface", state="visible", timeout=20000)
2645
+ page.locator("#action_radio input[type='radio']").first.check(force=True)
2646
+ page.locator("#exec_btn button, button#exec_btn").first.click()
2647
  page.wait_for_selector("#demo_video video", timeout=5000)
2648
+ page.wait_for_function(
2649
  """() => {
2650
  const visible = (id) => {
2651
  const el = document.getElementById(id);
 
2653
  const st = getComputedStyle(el);
2654
  return st.display !== 'none' && st.visibility !== 'hidden' && el.getClientRects().length > 0;
2655
  };
2656
+ const videoEl = document.querySelector('#demo_video video');
2657
+ return (
2658
+ visible('video_phase_group') &&
2659
+ visible('demo_video') &&
2660
+ !visible('watch_demo_video_btn') &&
2661
+ !visible('action_phase_group') &&
2662
+ !visible('control_panel_group') &&
2663
+ !!videoEl &&
2664
+ videoEl.autoplay === true &&
2665
+ (videoEl.paused === false || videoEl.currentTime > 0)
2666
+ );
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2667
  }""",
2668
+ timeout=10000,
2669
  )
2670
+ controls_after_execute = _read_demo_video_controls(page)
2671
+ assert controls_after_execute["videoVisible"] is True
2672
+ assert controls_after_execute["buttonVisible"] is False
2673
+ assert controls_after_execute["autoplay"] is True
2674
+ assert controls_after_execute["paused"] is False
2675
 
2676
  did_dispatch_end = _dispatch_video_event(page, "ended")
2677
  assert did_dispatch_end
 
2693
  }""",
2694
  timeout=2000,
2695
  )
2696
+ page.locator("#action_radio input[type='radio']").nth(1).check(force=True)
2697
+ page.wait_for_function(
2698
+ """(state) => {
2699
+ const liveObs = document.getElementById('live_obs');
2700
+ const coordsRoot = document.getElementById('coords_box');
2701
+ const coordsField = coordsRoot?.querySelector('textarea, input');
2702
+ const logRoot = document.getElementById('log_output');
2703
+ const logField = logRoot?.querySelector('textarea, input');
2704
+ const coordsValue = coordsField ? coordsField.value.trim() : '';
2705
+ const logValue = logField ? logField.value.trim() : (logRoot?.textContent || '').trim();
2706
+ return (
2707
+ !!liveObs &&
2708
+ liveObs.classList.contains(state.waitClass) &&
2709
+ coordsValue === state.coordsPrompt &&
2710
+ logValue === state.waitLog
2711
+ );
2712
+ }""",
2713
+ arg={
2714
+ "waitClass": config_module.LIVE_OBS_POINT_WAIT_CLASS,
2715
+ "coordsPrompt": config_module.UI_TEXT["coords"]["select_point"],
2716
+ "waitLog": config_module.UI_TEXT["log"]["point_selection_prompt"],
2717
+ },
2718
+ timeout=5000,
2719
+ )
2720
 
2721
  browser.close()
2722
  finally:
gradio-web/test/test_ui_text_config.py CHANGED
@@ -75,11 +75,12 @@ def test_on_video_end_transition_uses_configured_action_prompt(monkeypatch, relo
75
 
76
  monkeypatch.setitem(callbacks.UI_TEXT["log"], "action_selection_prompt", "choose an action from config")
77
 
78
- result = callbacks.on_video_end_transition("uid-1")
79
 
80
  assert result[3] == "choose an action from config"
81
  assert result[4]["visible"] is False
82
  assert result[4]["interactive"] is False
 
83
 
84
 
85
  def test_on_demo_video_play_disables_button_and_sets_single_use_state(monkeypatch, reload_module):
 
75
 
76
  monkeypatch.setitem(callbacks.UI_TEXT["log"], "action_selection_prompt", "choose an action from config")
77
 
78
+ result = callbacks.on_video_end_transition("uid-1", "demo_video")
79
 
80
  assert result[3] == "choose an action from config"
81
  assert result[4]["visible"] is False
82
  assert result[4]["interactive"] is False
83
+ assert result[5] == "action_point"
84
 
85
 
86
  def test_on_demo_video_play_disables_button_and_sets_single_use_state(monkeypatch, reload_module):
gradio-web/ui_layout.py CHANGED
@@ -1,6 +1,6 @@
1
  """
2
  Native Gradio UI layout.
3
- Sequential media phases: Demo Video -> Action+Point.
4
  Two-column layout: Point Selection | Right Panel.
5
  """
6
 
@@ -16,7 +16,6 @@ from config import (
16
  SESSION_CONCURRENCY_ID,
17
  SESSION_CONCURRENCY_LIMIT,
18
  SESSION_TIMEOUT,
19
- LIVE_OBS_REFRESH_HZ,
20
  POINT_SELECTION_SCALE,
21
  RIGHT_TOP_ACTION_SCALE,
22
  RIGHT_TOP_LOG_SCALE,
@@ -36,7 +35,6 @@ from gradio_callbacks import (
36
  on_reference_action,
37
  on_video_end_transition,
38
  precheck_execute_inputs,
39
- refresh_live_obs,
40
  restart_episode_wrapper,
41
  switch_env_wrapper,
42
  switch_to_action_phase,
@@ -49,7 +47,7 @@ from user_manager import user_manager
49
  PHASE_INIT = "init"
50
  PHASE_DEMO_VIDEO = "demo_video"
51
  PHASE_ACTION_POINT = "action_point"
52
- PHASE_EXECUTION_PLAYBACK = "execution_playback"
53
  LOAD_STATUS_MODE_IDLE = "idle"
54
  LOAD_STATUS_MODE_EPISODE_LOAD = "episode_load"
55
 
@@ -863,13 +861,13 @@ def _with_rejected_init(load_result, message):
863
 
864
 
865
  def _phase_visibility_updates(phase):
866
- if phase == PHASE_DEMO_VIDEO:
867
  return (
868
  gr.update(visible=True),
869
  gr.update(visible=False),
870
  gr.update(visible=False),
871
  )
872
- if phase in {PHASE_ACTION_POINT, PHASE_EXECUTION_PLAYBACK}:
873
  return (
874
  gr.update(visible=False),
875
  gr.update(visible=True),
@@ -931,7 +929,6 @@ def create_ui_blocks():
931
  ui_phase_state = gr.State(value=PHASE_INIT)
932
  current_task_env_state = gr.State(value=None)
933
  suppress_next_option_change_state = gr.State(value=False)
934
- live_obs_timer = gr.Timer(value=1.0 / LIVE_OBS_REFRESH_HZ, active=True)
935
 
936
  task_info_box = gr.Textbox(visible=False, elem_id="task_info_box")
937
  progress_info_box = gr.Textbox(visible=False)
@@ -949,7 +946,7 @@ def create_ui_blocks():
949
  with gr.Column(elem_classes=["native-card"], elem_id="media_card"):
950
  with gr.Column(visible=False, elem_id="video_phase_group") as video_phase_group:
951
  video_display = gr.Video(
952
- label="Demonstration Video 🎬",
953
  interactive=False,
954
  elem_id="demo_video",
955
  autoplay=False,
@@ -1316,21 +1313,17 @@ def create_ui_blocks():
1316
 
1317
  video_display.end(
1318
  fn=on_video_end_transition,
1319
- inputs=[uid_state],
1320
  outputs=[
1321
  video_phase_group,
1322
  action_phase_group,
1323
  control_panel_group,
1324
  log_output,
1325
  watch_demo_video_btn,
 
1326
  ],
1327
  queue=False,
1328
  show_progress="hidden",
1329
- ).then(
1330
- fn=lambda: PHASE_ACTION_POINT,
1331
- outputs=[ui_phase_state],
1332
- queue=False,
1333
- show_progress="hidden",
1334
  ).then(
1335
  fn=touch_session,
1336
  inputs=[uid_state],
@@ -1340,21 +1333,17 @@ def create_ui_blocks():
1340
  )
1341
  video_display.stop(
1342
  fn=on_video_end_transition,
1343
- inputs=[uid_state],
1344
  outputs=[
1345
  video_phase_group,
1346
  action_phase_group,
1347
  control_panel_group,
1348
  log_output,
1349
  watch_demo_video_btn,
 
1350
  ],
1351
  queue=False,
1352
  show_progress="hidden",
1353
- ).then(
1354
- fn=lambda: PHASE_ACTION_POINT,
1355
- outputs=[ui_phase_state],
1356
- queue=False,
1357
- show_progress="hidden",
1358
  ).then(
1359
  fn=touch_session,
1360
  inputs=[uid_state],
@@ -1437,11 +1426,6 @@ def create_ui_blocks():
1437
  ],
1438
  queue=False,
1439
  show_progress="hidden",
1440
- ).then(
1441
- fn=lambda: PHASE_EXECUTION_PLAYBACK,
1442
- outputs=[ui_phase_state],
1443
- queue=False,
1444
- show_progress="hidden",
1445
  ).then(
1446
  fn=touch_session,
1447
  inputs=[uid_state],
@@ -1451,27 +1435,26 @@ def create_ui_blocks():
1451
  ).then(
1452
  fn=execute_step,
1453
  inputs=[uid_state, options_radio, coords_box],
1454
- outputs=[img_display, log_output, task_info_box, progress_info_box, restart_episode_btn, next_task_btn, exec_btn],
1455
- show_progress="hidden",
1456
- **action_queue_kwargs,
1457
- ).then(
1458
- fn=switch_to_action_phase,
1459
- inputs=[uid_state],
1460
  outputs=[
1461
- options_radio,
1462
- exec_btn,
 
 
1463
  restart_episode_btn,
1464
  next_task_btn,
1465
- img_display,
 
 
 
 
 
 
 
1466
  reference_action_btn,
 
1467
  ],
1468
- queue=False,
1469
- show_progress="hidden",
1470
- ).then(
1471
- fn=lambda: PHASE_ACTION_POINT,
1472
- outputs=[ui_phase_state],
1473
- queue=False,
1474
  show_progress="hidden",
 
1475
  ).then(
1476
  fn=touch_session,
1477
  inputs=[uid_state],
@@ -1480,14 +1463,6 @@ def create_ui_blocks():
1480
  show_progress="hidden",
1481
  )
1482
 
1483
- live_obs_timer.tick(
1484
- fn=refresh_live_obs,
1485
- inputs=[uid_state, ui_phase_state],
1486
- outputs=[img_display],
1487
- queue=False,
1488
- show_progress="hidden",
1489
- )
1490
-
1491
  demo.load(
1492
  fn=None,
1493
  js=THEME_LOCK_JS,
 
1
  """
2
  Native Gradio UI layout.
3
+ Sequential media phases: Demo Video -> Action+Point -> Execute Video.
4
  Two-column layout: Point Selection | Right Panel.
5
  """
6
 
 
16
  SESSION_CONCURRENCY_ID,
17
  SESSION_CONCURRENCY_LIMIT,
18
  SESSION_TIMEOUT,
 
19
  POINT_SELECTION_SCALE,
20
  RIGHT_TOP_ACTION_SCALE,
21
  RIGHT_TOP_LOG_SCALE,
 
35
  on_reference_action,
36
  on_video_end_transition,
37
  precheck_execute_inputs,
 
38
  restart_episode_wrapper,
39
  switch_env_wrapper,
40
  switch_to_action_phase,
 
47
  PHASE_INIT = "init"
48
  PHASE_DEMO_VIDEO = "demo_video"
49
  PHASE_ACTION_POINT = "action_point"
50
+ PHASE_EXECUTION_VIDEO = "execution_video"
51
  LOAD_STATUS_MODE_IDLE = "idle"
52
  LOAD_STATUS_MODE_EPISODE_LOAD = "episode_load"
53
 
 
861
 
862
 
863
  def _phase_visibility_updates(phase):
864
+ if phase in {PHASE_DEMO_VIDEO, PHASE_EXECUTION_VIDEO}:
865
  return (
866
  gr.update(visible=True),
867
  gr.update(visible=False),
868
  gr.update(visible=False),
869
  )
870
+ if phase == PHASE_ACTION_POINT:
871
  return (
872
  gr.update(visible=False),
873
  gr.update(visible=True),
 
929
  ui_phase_state = gr.State(value=PHASE_INIT)
930
  current_task_env_state = gr.State(value=None)
931
  suppress_next_option_change_state = gr.State(value=False)
 
932
 
933
  task_info_box = gr.Textbox(visible=False, elem_id="task_info_box")
934
  progress_info_box = gr.Textbox(visible=False)
 
946
  with gr.Column(elem_classes=["native-card"], elem_id="media_card"):
947
  with gr.Column(visible=False, elem_id="video_phase_group") as video_phase_group:
948
  video_display = gr.Video(
949
+ label="Video Playback 🎬",
950
  interactive=False,
951
  elem_id="demo_video",
952
  autoplay=False,
 
1313
 
1314
  video_display.end(
1315
  fn=on_video_end_transition,
1316
+ inputs=[uid_state, ui_phase_state],
1317
  outputs=[
1318
  video_phase_group,
1319
  action_phase_group,
1320
  control_panel_group,
1321
  log_output,
1322
  watch_demo_video_btn,
1323
+ ui_phase_state,
1324
  ],
1325
  queue=False,
1326
  show_progress="hidden",
 
 
 
 
 
1327
  ).then(
1328
  fn=touch_session,
1329
  inputs=[uid_state],
 
1333
  )
1334
  video_display.stop(
1335
  fn=on_video_end_transition,
1336
+ inputs=[uid_state, ui_phase_state],
1337
  outputs=[
1338
  video_phase_group,
1339
  action_phase_group,
1340
  control_panel_group,
1341
  log_output,
1342
  watch_demo_video_btn,
1343
+ ui_phase_state,
1344
  ],
1345
  queue=False,
1346
  show_progress="hidden",
 
 
 
 
 
1347
  ).then(
1348
  fn=touch_session,
1349
  inputs=[uid_state],
 
1426
  ],
1427
  queue=False,
1428
  show_progress="hidden",
 
 
 
 
 
1429
  ).then(
1430
  fn=touch_session,
1431
  inputs=[uid_state],
 
1435
  ).then(
1436
  fn=execute_step,
1437
  inputs=[uid_state, options_radio, coords_box],
 
 
 
 
 
 
1438
  outputs=[
1439
+ img_display,
1440
+ log_output,
1441
+ task_info_box,
1442
+ progress_info_box,
1443
  restart_episode_btn,
1444
  next_task_btn,
1445
+ exec_btn,
1446
+ video_display,
1447
+ watch_demo_video_btn,
1448
+ video_phase_group,
1449
+ action_phase_group,
1450
+ control_panel_group,
1451
+ options_radio,
1452
+ coords_box,
1453
  reference_action_btn,
1454
+ ui_phase_state,
1455
  ],
 
 
 
 
 
 
1456
  show_progress="hidden",
1457
+ **action_queue_kwargs,
1458
  ).then(
1459
  fn=touch_session,
1460
  inputs=[uid_state],
 
1463
  show_progress="hidden",
1464
  )
1465
 
 
 
 
 
 
 
 
 
1466
  demo.load(
1467
  fn=None,
1468
  js=THEME_LOCK_JS,