HongzeFu commited on
Commit
6a155d5
·
1 Parent(s): d1f67eb

keypoint change to point

Browse files
gradio-web/config.py CHANGED
@@ -7,8 +7,8 @@ USE_SEGMENTED_VIEW = False # Set to True to use segmented view, False to use or
7
  LIVE_OBS_REFRESH_HZ = 30.0 # Live observation refresh frequency in Hz
8
  KEYFRAME_DOWNSAMPLE_FACTOR = 1 # Keep 1 frame out of every N streamed frames
9
 
10
- # 主界面两列宽度比例 (Keypoint Selection : Right Panel)
11
- KEYPOINT_SELECTION_SCALE = 1
12
  CONTROL_PANEL_SCALE = 2
13
 
14
  # 右侧顶部并排比例 (Action Selection : System Log)
@@ -59,8 +59,8 @@ DEMO_VIDEO_ENV_IDS = [
59
 
60
  UI_TEXT = {
61
  "log": {
62
- "action_selection_prompt": "Please select the action in the left 👈,\nSome actions also need to select keypoint",
63
- "keypoint_selection_prompt": "Current action needs location input, please click on the image to select key pixel",
64
  "demo_video_prompt": 'Press "Watch Video Input🎬" to watch a video\nNote: you can only watch the video once',
65
  "session_error": "Session Error",
66
  "reference_action_error": "Ground Truth Action Error: {error}",
@@ -73,11 +73,11 @@ UI_TEXT = {
73
  },
74
  "coords": {
75
  "not_needed": "No need for coordinates",
76
- "select_keypoint": "Please click the keypoint selection image",
77
- "select_keypoint_before_execute": "Please click the keypoint selection image before execute!",
78
  },
79
  "actions": {
80
- "keypoint_required_suffix": " 🎯",
81
  },
82
  "errors": {
83
  "load_missing_task": "Error loading task: missing current_task",
@@ -119,13 +119,13 @@ ROUTESTICK_OVERLAY_ACTION_TEXTS = [
119
  ]
120
 
121
  LIVE_OBS_BASE_CLASS = "live-obs-resizable"
122
- LIVE_OBS_KEYPOINT_WAIT_CLASS = "live-obs-keypoint-waiting"
123
 
124
 
125
- def get_live_obs_elem_classes(waiting_for_keypoint=False):
126
  classes = [LIVE_OBS_BASE_CLASS]
127
- if waiting_for_keypoint:
128
- classes.append(LIVE_OBS_KEYPOINT_WAIT_CLASS)
129
  return classes
130
 
131
 
 
7
  LIVE_OBS_REFRESH_HZ = 30.0 # Live observation refresh frequency in Hz
8
  KEYFRAME_DOWNSAMPLE_FACTOR = 1 # Keep 1 frame out of every N streamed frames
9
 
10
+ # 主界面两列宽度比例 (Point Selection : Right Panel)
11
+ POINT_SELECTION_SCALE = 1
12
  CONTROL_PANEL_SCALE = 2
13
 
14
  # 右侧顶部并排比例 (Action Selection : System Log)
 
59
 
60
  UI_TEXT = {
61
  "log": {
62
+ "action_selection_prompt": "Please select the action in the left 👈,\nSome actions also need to select a point",
63
+ "point_selection_prompt": "Current action needs location input, please click on the image to select a point",
64
  "demo_video_prompt": 'Press "Watch Video Input🎬" to watch a video\nNote: you can only watch the video once',
65
  "session_error": "Session Error",
66
  "reference_action_error": "Ground Truth Action Error: {error}",
 
73
  },
74
  "coords": {
75
  "not_needed": "No need for coordinates",
76
+ "select_point": "Please click the point selection image",
77
+ "select_point_before_execute": "Please click the point selection image before execute!",
78
  },
79
  "actions": {
80
+ "point_required_suffix": " 🎯",
81
  },
82
  "errors": {
83
  "load_missing_task": "Error loading task: missing current_task",
 
119
  ]
120
 
121
  LIVE_OBS_BASE_CLASS = "live-obs-resizable"
122
+ LIVE_OBS_POINT_WAIT_CLASS = "live-obs-point-waiting"
123
 
124
 
125
+ def get_live_obs_elem_classes(waiting_for_point=False):
126
  classes = [LIVE_OBS_BASE_CLASS]
127
+ if waiting_for_point:
128
+ classes.append(LIVE_OBS_POINT_WAIT_CLASS)
129
  return classes
130
 
131
 
gradio-web/gradio_callbacks.py CHANGED
@@ -67,8 +67,8 @@ def _action_selection_log():
67
  return format_log_markdown(_ui_text("log", "action_selection_prompt"))
68
 
69
 
70
- def _keypoint_selection_log():
71
- return format_log_markdown(_ui_text("log", "keypoint_selection_prompt"))
72
 
73
 
74
  def _live_obs_update(
@@ -76,10 +76,10 @@ def _live_obs_update(
76
  value=_LIVE_OBS_UPDATE_SKIP,
77
  interactive=None,
78
  visible=None,
79
- waiting_for_keypoint=False,
80
  ):
81
  kwargs = {
82
- "elem_classes": get_live_obs_elem_classes(waiting_for_keypoint=waiting_for_keypoint),
83
  }
84
  if value is not _LIVE_OBS_UPDATE_SKIP:
85
  kwargs["value"] = value
@@ -293,7 +293,7 @@ def on_demo_video_play(uid):
293
 
294
 
295
  def switch_to_execute_phase(uid):
296
- """Disable controls and keypoint clicking during execute playback."""
297
  if uid:
298
  session = get_session(uid)
299
  base_count = len(getattr(session, "base_frames", []) or []) if session else 0
@@ -596,7 +596,7 @@ def _load_status_task(uid, status):
596
  if 0 <= opt_idx < len(session.raw_solve_options):
597
  opt = session.raw_solve_options[opt_idx]
598
  if opt.get("available"):
599
- opt_label_with_hint = f"{opt_label}{_ui_text('actions', 'keypoint_required_suffix')}"
600
  else:
601
  opt_label_with_hint = opt_label
602
  else:
@@ -840,7 +840,7 @@ def _is_valid_coords_text(coords_text: str) -> bool:
840
  text = coords_text.strip()
841
  if text in {
842
  "",
843
- _ui_text("coords", "select_keypoint"),
844
  _ui_text("coords", "not_needed"),
845
  }:
846
  return False
@@ -896,9 +896,9 @@ def on_option_select(uid, option_value, coords_str=None, suppress_next_option_ch
896
  _is_valid_coords_text(coords_str),
897
  )
898
  return (
899
- _ui_text("coords", "select_keypoint"),
900
- _live_obs_update(value=base_img, interactive=True, waiting_for_keypoint=True),
901
- _keypoint_selection_log(),
902
  False,
903
  )
904
 
@@ -1050,7 +1050,7 @@ def precheck_execute_inputs(uid, option_idx, coords_str):
1050
  parsed_option_idx,
1051
  coords_str,
1052
  )
1053
- raise gr.Error(_ui_text("coords", "select_keypoint_before_execute"))
1054
  LOGGER.debug(
1055
  "precheck_execute_inputs passed uid=%s option=%s needs_coords=%s",
1056
  _uid_for_log(uid),
@@ -1145,7 +1145,7 @@ def execute_step(uid, option_idx, coords_str):
1145
  coords_str,
1146
  )
1147
  current_img = session.get_pil_image(use_segmented=USE_SEGMENTED_VIEW)
1148
- error_msg = _ui_text("coords", "select_keypoint_before_execute")
1149
  return _live_obs_update(value=current_img, interactive=False), format_log_markdown(error_msg), gr.update(), gr.update(), gr.update(interactive=False), gr.update(interactive=True)
1150
 
1151
  # Parse coords
 
67
  return format_log_markdown(_ui_text("log", "action_selection_prompt"))
68
 
69
 
70
+ def _point_selection_log():
71
+ return format_log_markdown(_ui_text("log", "point_selection_prompt"))
72
 
73
 
74
  def _live_obs_update(
 
76
  value=_LIVE_OBS_UPDATE_SKIP,
77
  interactive=None,
78
  visible=None,
79
+ waiting_for_point=False,
80
  ):
81
  kwargs = {
82
+ "elem_classes": get_live_obs_elem_classes(waiting_for_point=waiting_for_point),
83
  }
84
  if value is not _LIVE_OBS_UPDATE_SKIP:
85
  kwargs["value"] = value
 
293
 
294
 
295
  def switch_to_execute_phase(uid):
296
+ """Disable controls and point clicking during execute playback."""
297
  if uid:
298
  session = get_session(uid)
299
  base_count = len(getattr(session, "base_frames", []) or []) if session else 0
 
596
  if 0 <= opt_idx < len(session.raw_solve_options):
597
  opt = session.raw_solve_options[opt_idx]
598
  if opt.get("available"):
599
+ opt_label_with_hint = f"{opt_label}{_ui_text('actions', 'point_required_suffix')}"
600
  else:
601
  opt_label_with_hint = opt_label
602
  else:
 
840
  text = coords_text.strip()
841
  if text in {
842
  "",
843
+ _ui_text("coords", "select_point"),
844
  _ui_text("coords", "not_needed"),
845
  }:
846
  return False
 
896
  _is_valid_coords_text(coords_str),
897
  )
898
  return (
899
+ _ui_text("coords", "select_point"),
900
+ _live_obs_update(value=base_img, interactive=True, waiting_for_point=True),
901
+ _point_selection_log(),
902
  False,
903
  )
904
 
 
1050
  parsed_option_idx,
1051
  coords_str,
1052
  )
1053
+ raise gr.Error(_ui_text("coords", "select_point_before_execute"))
1054
  LOGGER.debug(
1055
  "precheck_execute_inputs passed uid=%s option=%s needs_coords=%s",
1056
  _uid_for_log(uid),
 
1145
  coords_str,
1146
  )
1147
  current_img = session.get_pil_image(use_segmented=USE_SEGMENTED_VIEW)
1148
+ error_msg = _ui_text("coords", "select_point_before_execute")
1149
  return _live_obs_update(value=current_img, interactive=False), format_log_markdown(error_msg), gr.update(), gr.update(), gr.update(interactive=False), gr.update(interactive=True)
1150
 
1151
  # Parse coords
gradio-web/test/test_live_obs_refresh.py CHANGED
@@ -14,7 +14,7 @@ def test_refresh_live_obs_skips_when_not_execution_phase(monkeypatch, reload_mod
14
  callbacks = reload_module("gradio_callbacks")
15
  monkeypatch.setattr(callbacks, "get_session", lambda uid: _FakeSession([]))
16
 
17
- update = callbacks.refresh_live_obs("uid-1", "action_keypoint")
18
 
19
  assert update.get("__type__") == "update"
20
  assert "value" not in update
 
14
  callbacks = reload_module("gradio_callbacks")
15
  monkeypatch.setattr(callbacks, "get_session", lambda uid: _FakeSession([]))
16
 
17
+ update = callbacks.refresh_live_obs("uid-1", "action_point")
18
 
19
  assert update.get("__type__") == "update"
20
  assert "value" not in update
gradio-web/test/test_precheck_execute_inputs.py CHANGED
@@ -28,10 +28,10 @@ def test_precheck_execute_inputs_requires_coords_when_option_needs_it(monkeypatc
28
 
29
  with pytest.raises(Exception) as excinfo:
30
  callbacks.precheck_execute_inputs(
31
- "uid-1", 0, config.UI_TEXT["coords"]["select_keypoint"]
32
  )
33
 
34
- assert config.UI_TEXT["coords"]["select_keypoint_before_execute"] in str(excinfo.value)
35
 
36
 
37
  def test_precheck_execute_inputs_accepts_valid_coords(monkeypatch, reload_module):
 
28
 
29
  with pytest.raises(Exception) as excinfo:
30
  callbacks.precheck_execute_inputs(
31
+ "uid-1", 0, config.UI_TEXT["coords"]["select_point"]
32
  )
33
 
34
+ assert config.UI_TEXT["coords"]["select_point_before_execute"] in str(excinfo.value)
35
 
36
 
37
  def test_precheck_execute_inputs_accepts_valid_coords(monkeypatch, reload_module):
gradio-web/test/test_reference_action_callbacks.py CHANGED
@@ -119,7 +119,7 @@ def test_on_reference_action_same_selected_option_does_not_set_suppression(monke
119
  assert suppress_flag is False
120
 
121
 
122
- def test_on_option_select_resets_to_keypoint_wait_state_for_point_action(monkeypatch, reload_module):
123
  config = reload_module("config")
124
  callbacks = reload_module("gradio_callbacks")
125
 
@@ -129,10 +129,10 @@ def test_on_option_select_resets_to_keypoint_wait_state_for_point_action(monkeyp
129
 
130
  coords_text, img_update, log_text, suppress_flag = callbacks.on_option_select("uid-1", 0, "12, 34", False)
131
 
132
- assert coords_text == config.UI_TEXT["coords"]["select_keypoint"]
133
  assert img_update.get("interactive") is True
134
- assert img_update.get("elem_classes") == config.get_live_obs_elem_classes(waiting_for_keypoint=True)
135
- assert log_text == config.UI_TEXT["log"]["keypoint_selection_prompt"]
136
  assert suppress_flag is False
137
 
138
 
 
119
  assert suppress_flag is False
120
 
121
 
122
+ def test_on_option_select_resets_to_point_wait_state_for_point_action(monkeypatch, reload_module):
123
  config = reload_module("config")
124
  callbacks = reload_module("gradio_callbacks")
125
 
 
129
 
130
  coords_text, img_update, log_text, suppress_flag = callbacks.on_option_select("uid-1", 0, "12, 34", False)
131
 
132
+ assert coords_text == config.UI_TEXT["coords"]["select_point"]
133
  assert img_update.get("interactive") is True
134
+ assert img_update.get("elem_classes") == config.get_live_obs_elem_classes(waiting_for_point=True)
135
+ assert log_text == config.UI_TEXT["log"]["point_selection_prompt"]
136
  assert suppress_flag is False
137
 
138
 
gradio-web/test/test_ui_native_layout_contract.py CHANGED
@@ -75,13 +75,13 @@ def test_native_ui_css_highlights_media_card_not_live_obs_transform(reload_modul
75
  assert "--media-card-radius: 8px;" in css
76
  assert "#media_card #live_obs button" in css
77
  assert "#media_card #live_obs img" in css
78
- assert "#media_card:has(#live_obs.live-obs-keypoint-waiting)::after" in css
79
  assert "inset: 0;" in css
80
  assert "border-radius: inherit;" in css
81
- assert "animation: media-card-keypoint-ring 1.2s ease-in-out infinite;" in css
82
- assert "@keyframes media-card-keypoint-ring" in css
83
- assert "#live_obs.live-obs-keypoint-waiting .image-frame" not in css
84
- assert "#live_obs.live-obs-keypoint-waiting .upload-container" not in css
85
  assert "transform: scale(" not in css
86
 
87
 
 
75
  assert "--media-card-radius: 8px;" in css
76
  assert "#media_card #live_obs button" in css
77
  assert "#media_card #live_obs img" in css
78
+ assert "#media_card:has(#live_obs.live-obs-point-waiting)::after" in css
79
  assert "inset: 0;" in css
80
  assert "border-radius: inherit;" in css
81
+ assert "animation: media-card-point-ring 1.2s ease-in-out infinite;" in css
82
+ assert "@keyframes media-card-point-ring" in css
83
+ assert "#live_obs.live-obs-point-waiting .image-frame" not in css
84
+ assert "#live_obs.live-obs-point-waiting .upload-container" not in css
85
  assert "transform: scale(" not in css
86
 
87
 
gradio-web/test/test_ui_phase_machine_runtime_e2e.py CHANGED
@@ -374,7 +374,7 @@ def phase_machine_ui_url():
374
 
375
  with gr.Column(visible=False, elem_id="control_panel_group") as control_panel_group:
376
  options_radio = gr.Radio(choices=[("pick", 0)], value=0, elem_id="action_radio")
377
- coords_box = gr.Textbox(value="please click the keypoint selection image", elem_id="coords_box")
378
  with gr.Column(visible=False, elem_id="action_buttons_row") as action_buttons_row:
379
  exec_btn = gr.Button("EXECUTE", elem_id="exec_btn")
380
  reference_action_btn = gr.Button(
@@ -404,7 +404,7 @@ def phase_machine_ui_url():
404
  gr.update(visible=False),
405
  gr.update(visible=False),
406
  gr.update(interactive=False),
407
- gr.update(value="please click the keypoint selection image"),
408
  "demo_video",
409
  )
410
 
@@ -423,13 +423,13 @@ def phase_machine_ui_url():
423
  gr.update(visible=True),
424
  gr.update(interactive=True),
425
  gr.update(visible=False, interactive=False),
426
- "action_keypoint",
427
  )
428
 
429
  def precheck_fn(_option_idx, _coords):
430
  state["precheck_calls"] += 1
431
  if state["precheck_calls"] == 1:
432
- raise gr.Error("please click the keypoint selection image before execute!")
433
 
434
  def to_execute_fn():
435
  return (
@@ -456,7 +456,7 @@ def phase_machine_ui_url():
456
  gr.update(interactive=True),
457
  gr.update(interactive=True),
458
  gr.update(interactive=True),
459
- "action_keypoint",
460
  )
461
 
462
  login_btn.click(
@@ -1230,7 +1230,7 @@ def test_no_video_task_hides_manual_demo_button(monkeypatch):
1230
  demo.close()
1231
 
1232
 
1233
- def test_keypoint_wait_state_pulses_live_obs_and_updates_system_log(monkeypatch):
1234
  config_module = importlib.reload(importlib.import_module("config"))
1235
  callbacks = importlib.reload(importlib.import_module("gradio_callbacks"))
1236
  ui_layout = importlib.reload(importlib.import_module("ui_layout"))
@@ -1248,7 +1248,7 @@ def test_keypoint_wait_state_pulses_live_obs_and_updates_system_log(monkeypatch)
1248
 
1249
  def fake_init_app(_request=None):
1250
  return (
1251
- "uid-keypoint-wait",
1252
  gr.update(visible=True), # main_interface
1253
  gr.update(
1254
  value=fake_obs_img.copy(),
@@ -1265,7 +1265,7 @@ def test_keypoint_wait_state_pulses_live_obs_and_updates_system_log(monkeypatch)
1265
  ), # coords_box
1266
  gr.update(value=None, visible=False), # video_display
1267
  gr.update(visible=False, interactive=False), # watch_demo_video_btn
1268
- "KeypointEnv (Episode 1)", # task_info_box
1269
  "Completed: 0", # progress_info_box
1270
  gr.update(interactive=True), # restart_episode_btn
1271
  gr.update(interactive=True), # next_task_btn
@@ -1288,7 +1288,7 @@ def test_keypoint_wait_state_pulses_live_obs_and_updates_system_log(monkeypatch)
1288
  host = "127.0.0.1"
1289
  root_url = f"http://{host}:{port}/"
1290
 
1291
- app = FastAPI(title="keypoint-wait-state-test")
1292
  app = gr.mount_gradio_app(app, demo, path="/")
1293
 
1294
  config = uvicorn.Config(app, host=host, port=port, log_level="error")
@@ -1308,7 +1308,7 @@ def test_keypoint_wait_state_pulses_live_obs_and_updates_system_log(monkeypatch)
1308
 
1309
  initial_classes = _read_elem_classes(page, "live_obs")
1310
  assert initial_classes is not None
1311
- assert config_module.LIVE_OBS_KEYPOINT_WAIT_CLASS not in initial_classes
1312
  assert _read_log_output_value(page) == config_module.UI_TEXT["log"]["action_selection_prompt"]
1313
  initial_card_wait = _read_media_card_wait_snapshot(page)
1314
  initial_transforms = _read_live_obs_transform_snapshot(page)
@@ -1345,25 +1345,25 @@ def test_keypoint_wait_state_pulses_live_obs_and_updates_system_log(monkeypatch)
1345
  );
1346
  }""",
1347
  arg={
1348
- "cardAnimation": "media-card-keypoint-ring",
1349
- "waitClass": config_module.LIVE_OBS_KEYPOINT_WAIT_CLASS,
1350
- "coordsPrompt": config_module.UI_TEXT["coords"]["select_keypoint"],
1351
- "waitLog": config_module.UI_TEXT["log"]["keypoint_selection_prompt"],
1352
  },
1353
  timeout=5000,
1354
  )
1355
 
1356
  wait_classes = _read_elem_classes(page, "live_obs")
1357
  assert wait_classes is not None
1358
- assert config_module.LIVE_OBS_KEYPOINT_WAIT_CLASS in wait_classes
1359
- assert _read_coords_box_value(page) == config_module.UI_TEXT["coords"]["select_keypoint"]
1360
- assert _read_log_output_value(page) == config_module.UI_TEXT["log"]["keypoint_selection_prompt"]
1361
  wait_card = _read_media_card_wait_snapshot(page)
1362
  wait_transforms = _read_live_obs_transform_snapshot(page)
1363
  wait_img_box = page.locator("#live_obs img").bounding_box()
1364
  wait_frame_box = page.locator("#live_obs .image-frame").bounding_box()
1365
  assert wait_card["opacity"] is not None and wait_card["opacity"] > 0.5
1366
- assert wait_card["animationName"] == "media-card-keypoint-ring"
1367
  assert wait_card["borderColor"] != "rgba(225, 29, 72, 0)"
1368
  assert wait_transforms["imgTransform"] == "none"
1369
  assert wait_transforms["frameTransform"] == "none"
@@ -1401,7 +1401,7 @@ def test_keypoint_wait_state_pulses_live_obs_and_updates_system_log(monkeypatch)
1401
  );
1402
  }""",
1403
  arg={
1404
- "waitClass": config_module.LIVE_OBS_KEYPOINT_WAIT_CLASS,
1405
  "actionLog": config_module.UI_TEXT["log"]["action_selection_prompt"],
1406
  },
1407
  timeout=5000,
@@ -1414,7 +1414,7 @@ def test_keypoint_wait_state_pulses_live_obs_and_updates_system_log(monkeypatch)
1414
  assert abs(coord_y - 8) <= 1
1415
  final_classes = _read_elem_classes(page, "live_obs")
1416
  assert final_classes is not None
1417
- assert config_module.LIVE_OBS_KEYPOINT_WAIT_CLASS not in final_classes
1418
  assert config_module.LIVE_OBS_BASE_CLASS in final_classes
1419
  assert _read_log_output_value(page) == config_module.UI_TEXT["log"]["action_selection_prompt"]
1420
  final_card_wait = _read_media_card_wait_snapshot(page)
@@ -1562,14 +1562,14 @@ def test_reference_action_single_click_applies_coords_without_wait_state(monkeyp
1562
  "checkedValue": "0",
1563
  "coordsValue": "5, 6",
1564
  "logValue": expected_reference_log,
1565
- "waitClass": config_module.LIVE_OBS_KEYPOINT_WAIT_CLASS,
1566
  },
1567
  timeout=5000,
1568
  )
1569
 
1570
  classes_after_reference = _read_elem_classes(page, "live_obs")
1571
  assert classes_after_reference is not None
1572
- assert config_module.LIVE_OBS_KEYPOINT_WAIT_CLASS not in classes_after_reference
1573
  assert _read_coords_box_value(page) == "5, 6"
1574
  assert _read_log_output_value(page) == expected_reference_log
1575
 
@@ -1595,18 +1595,18 @@ def test_reference_action_single_click_applies_coords_without_wait_state(monkeyp
1595
  }""",
1596
  arg={
1597
  "checkedValue": "1",
1598
- "coordsValue": config_module.UI_TEXT["coords"]["select_keypoint"],
1599
- "logValue": config_module.UI_TEXT["log"]["keypoint_selection_prompt"],
1600
- "waitClass": config_module.LIVE_OBS_KEYPOINT_WAIT_CLASS,
1601
  },
1602
  timeout=5000,
1603
  )
1604
 
1605
  classes_after_manual_change = _read_elem_classes(page, "live_obs")
1606
  assert classes_after_manual_change is not None
1607
- assert config_module.LIVE_OBS_KEYPOINT_WAIT_CLASS in classes_after_manual_change
1608
- assert _read_coords_box_value(page) == config_module.UI_TEXT["coords"]["select_keypoint"]
1609
- assert _read_log_output_value(page) == config_module.UI_TEXT["log"]["keypoint_selection_prompt"]
1610
 
1611
  browser.close()
1612
  finally:
@@ -1638,7 +1638,7 @@ def test_live_obs_client_resize_fills_width_and_keeps_click_mapping(monkeypatch)
1638
  gr.update(choices=[("pick", 0)], value=0), # options_radio
1639
  "goal", # goal_box
1640
  gr.update(
1641
- value="please click the keypoint selection image",
1642
  visible=True,
1643
  interactive=False,
1644
  ), # coords_box
 
374
 
375
  with gr.Column(visible=False, elem_id="control_panel_group") as control_panel_group:
376
  options_radio = gr.Radio(choices=[("pick", 0)], value=0, elem_id="action_radio")
377
+ coords_box = gr.Textbox(value="please click the point selection image", elem_id="coords_box")
378
  with gr.Column(visible=False, elem_id="action_buttons_row") as action_buttons_row:
379
  exec_btn = gr.Button("EXECUTE", elem_id="exec_btn")
380
  reference_action_btn = gr.Button(
 
404
  gr.update(visible=False),
405
  gr.update(visible=False),
406
  gr.update(interactive=False),
407
+ gr.update(value="please click the point selection image"),
408
  "demo_video",
409
  )
410
 
 
423
  gr.update(visible=True),
424
  gr.update(interactive=True),
425
  gr.update(visible=False, interactive=False),
426
+ "action_point",
427
  )
428
 
429
  def precheck_fn(_option_idx, _coords):
430
  state["precheck_calls"] += 1
431
  if state["precheck_calls"] == 1:
432
+ raise gr.Error("please click the point selection image before execute!")
433
 
434
  def to_execute_fn():
435
  return (
 
456
  gr.update(interactive=True),
457
  gr.update(interactive=True),
458
  gr.update(interactive=True),
459
+ "action_point",
460
  )
461
 
462
  login_btn.click(
 
1230
  demo.close()
1231
 
1232
 
1233
+ def test_point_wait_state_pulses_live_obs_and_updates_system_log(monkeypatch):
1234
  config_module = importlib.reload(importlib.import_module("config"))
1235
  callbacks = importlib.reload(importlib.import_module("gradio_callbacks"))
1236
  ui_layout = importlib.reload(importlib.import_module("ui_layout"))
 
1248
 
1249
  def fake_init_app(_request=None):
1250
  return (
1251
+ "uid-point-wait",
1252
  gr.update(visible=True), # main_interface
1253
  gr.update(
1254
  value=fake_obs_img.copy(),
 
1265
  ), # coords_box
1266
  gr.update(value=None, visible=False), # video_display
1267
  gr.update(visible=False, interactive=False), # watch_demo_video_btn
1268
+ "PointEnv (Episode 1)", # task_info_box
1269
  "Completed: 0", # progress_info_box
1270
  gr.update(interactive=True), # restart_episode_btn
1271
  gr.update(interactive=True), # next_task_btn
 
1288
  host = "127.0.0.1"
1289
  root_url = f"http://{host}:{port}/"
1290
 
1291
+ app = FastAPI(title="point-wait-state-test")
1292
  app = gr.mount_gradio_app(app, demo, path="/")
1293
 
1294
  config = uvicorn.Config(app, host=host, port=port, log_level="error")
 
1308
 
1309
  initial_classes = _read_elem_classes(page, "live_obs")
1310
  assert initial_classes is not None
1311
+ assert config_module.LIVE_OBS_POINT_WAIT_CLASS not in initial_classes
1312
  assert _read_log_output_value(page) == config_module.UI_TEXT["log"]["action_selection_prompt"]
1313
  initial_card_wait = _read_media_card_wait_snapshot(page)
1314
  initial_transforms = _read_live_obs_transform_snapshot(page)
 
1345
  );
1346
  }""",
1347
  arg={
1348
+ "cardAnimation": "media-card-point-ring",
1349
+ "waitClass": config_module.LIVE_OBS_POINT_WAIT_CLASS,
1350
+ "coordsPrompt": config_module.UI_TEXT["coords"]["select_point"],
1351
+ "waitLog": config_module.UI_TEXT["log"]["point_selection_prompt"],
1352
  },
1353
  timeout=5000,
1354
  )
1355
 
1356
  wait_classes = _read_elem_classes(page, "live_obs")
1357
  assert wait_classes is not None
1358
+ assert config_module.LIVE_OBS_POINT_WAIT_CLASS in wait_classes
1359
+ assert _read_coords_box_value(page) == config_module.UI_TEXT["coords"]["select_point"]
1360
+ assert _read_log_output_value(page) == config_module.UI_TEXT["log"]["point_selection_prompt"]
1361
  wait_card = _read_media_card_wait_snapshot(page)
1362
  wait_transforms = _read_live_obs_transform_snapshot(page)
1363
  wait_img_box = page.locator("#live_obs img").bounding_box()
1364
  wait_frame_box = page.locator("#live_obs .image-frame").bounding_box()
1365
  assert wait_card["opacity"] is not None and wait_card["opacity"] > 0.5
1366
+ assert wait_card["animationName"] == "media-card-point-ring"
1367
  assert wait_card["borderColor"] != "rgba(225, 29, 72, 0)"
1368
  assert wait_transforms["imgTransform"] == "none"
1369
  assert wait_transforms["frameTransform"] == "none"
 
1401
  );
1402
  }""",
1403
  arg={
1404
+ "waitClass": config_module.LIVE_OBS_POINT_WAIT_CLASS,
1405
  "actionLog": config_module.UI_TEXT["log"]["action_selection_prompt"],
1406
  },
1407
  timeout=5000,
 
1414
  assert abs(coord_y - 8) <= 1
1415
  final_classes = _read_elem_classes(page, "live_obs")
1416
  assert final_classes is not None
1417
+ assert config_module.LIVE_OBS_POINT_WAIT_CLASS not in final_classes
1418
  assert config_module.LIVE_OBS_BASE_CLASS in final_classes
1419
  assert _read_log_output_value(page) == config_module.UI_TEXT["log"]["action_selection_prompt"]
1420
  final_card_wait = _read_media_card_wait_snapshot(page)
 
1562
  "checkedValue": "0",
1563
  "coordsValue": "5, 6",
1564
  "logValue": expected_reference_log,
1565
+ "waitClass": config_module.LIVE_OBS_POINT_WAIT_CLASS,
1566
  },
1567
  timeout=5000,
1568
  )
1569
 
1570
  classes_after_reference = _read_elem_classes(page, "live_obs")
1571
  assert classes_after_reference is not None
1572
+ assert config_module.LIVE_OBS_POINT_WAIT_CLASS not in classes_after_reference
1573
  assert _read_coords_box_value(page) == "5, 6"
1574
  assert _read_log_output_value(page) == expected_reference_log
1575
 
 
1595
  }""",
1596
  arg={
1597
  "checkedValue": "1",
1598
+ "coordsValue": config_module.UI_TEXT["coords"]["select_point"],
1599
+ "logValue": config_module.UI_TEXT["log"]["point_selection_prompt"],
1600
+ "waitClass": config_module.LIVE_OBS_POINT_WAIT_CLASS,
1601
  },
1602
  timeout=5000,
1603
  )
1604
 
1605
  classes_after_manual_change = _read_elem_classes(page, "live_obs")
1606
  assert classes_after_manual_change is not None
1607
+ assert config_module.LIVE_OBS_POINT_WAIT_CLASS in classes_after_manual_change
1608
+ assert _read_coords_box_value(page) == config_module.UI_TEXT["coords"]["select_point"]
1609
+ assert _read_log_output_value(page) == config_module.UI_TEXT["log"]["point_selection_prompt"]
1610
 
1611
  browser.close()
1612
  finally:
 
1638
  gr.update(choices=[("pick", 0)], value=0), # options_radio
1639
  "goal", # goal_box
1640
  gr.update(
1641
+ value="please click the point selection image",
1642
  visible=True,
1643
  interactive=False,
1644
  ), # coords_box
gradio-web/test/test_ui_text_config.py CHANGED
@@ -30,14 +30,14 @@ class _FakeLoadSession:
30
  return "IMG"
31
 
32
 
33
- def test_on_option_select_uses_configured_select_keypoint_and_log_messages(monkeypatch, reload_module):
34
  reload_module("config")
35
  callbacks = reload_module("gradio_callbacks")
36
 
37
- monkeypatch.setitem(callbacks.UI_TEXT["coords"], "select_keypoint", "pick a point from config")
38
  monkeypatch.setitem(
39
  callbacks.UI_TEXT["log"],
40
- "keypoint_selection_prompt",
41
  "custom log prompt from config",
42
  )
43
  monkeypatch.setattr(callbacks, "update_session_activity", lambda uid: None)
@@ -47,7 +47,7 @@ def test_on_option_select_uses_configured_select_keypoint_and_log_messages(monke
47
 
48
  assert coords_text == "pick a point from config"
49
  assert img_update.get("interactive") is True
50
- assert callbacks.get_live_obs_elem_classes(waiting_for_keypoint=True) == img_update.get("elem_classes")
51
  assert log_text == "custom log prompt from config"
52
  assert suppress_flag is False
53
 
@@ -56,10 +56,10 @@ def test_precheck_execute_inputs_uses_configured_before_execute_message(monkeypa
56
  reload_module("config")
57
  callbacks = reload_module("gradio_callbacks")
58
 
59
- monkeypatch.setitem(callbacks.UI_TEXT["coords"], "select_keypoint", "pick a point from config")
60
  monkeypatch.setitem(
61
  callbacks.UI_TEXT["coords"],
62
- "select_keypoint_before_execute",
63
  "pick a point before execute from config",
64
  )
65
  monkeypatch.setattr(callbacks, "update_session_activity", lambda uid: None)
@@ -180,7 +180,7 @@ def test_ui_option_label_uses_routestick_configured_action_text(reload_module):
180
  assert callbacks._ui_option_label(session, "fallback", 0) == "d. move right counterclockwise↙︎←↖︎ ◜←◝"
181
 
182
 
183
- def test_load_status_task_appends_configured_keypoint_suffix_after_mapped_label(monkeypatch, reload_module):
184
  config = reload_module("config")
185
  callbacks = reload_module("gradio_callbacks")
186
  session = _FakeLoadSession(
@@ -204,7 +204,7 @@ def test_load_status_task_appends_configured_keypoint_suffix_after_mapped_label(
204
 
205
  assert result[4]["choices"] == [
206
  (
207
- f"a. move forward↓{config.UI_TEXT['actions']['keypoint_required_suffix']}",
208
  0,
209
  )
210
  ]
 
30
  return "IMG"
31
 
32
 
33
+ def test_on_option_select_uses_configured_select_point_and_log_messages(monkeypatch, reload_module):
34
  reload_module("config")
35
  callbacks = reload_module("gradio_callbacks")
36
 
37
+ monkeypatch.setitem(callbacks.UI_TEXT["coords"], "select_point", "pick a point from config")
38
  monkeypatch.setitem(
39
  callbacks.UI_TEXT["log"],
40
+ "point_selection_prompt",
41
  "custom log prompt from config",
42
  )
43
  monkeypatch.setattr(callbacks, "update_session_activity", lambda uid: None)
 
47
 
48
  assert coords_text == "pick a point from config"
49
  assert img_update.get("interactive") is True
50
+ assert callbacks.get_live_obs_elem_classes(waiting_for_point=True) == img_update.get("elem_classes")
51
  assert log_text == "custom log prompt from config"
52
  assert suppress_flag is False
53
 
 
56
  reload_module("config")
57
  callbacks = reload_module("gradio_callbacks")
58
 
59
+ monkeypatch.setitem(callbacks.UI_TEXT["coords"], "select_point", "pick a point from config")
60
  monkeypatch.setitem(
61
  callbacks.UI_TEXT["coords"],
62
+ "select_point_before_execute",
63
  "pick a point before execute from config",
64
  )
65
  monkeypatch.setattr(callbacks, "update_session_activity", lambda uid: None)
 
180
  assert callbacks._ui_option_label(session, "fallback", 0) == "d. move right counterclockwise↙︎←↖︎ ◜←◝"
181
 
182
 
183
+ def test_load_status_task_appends_configured_point_suffix_after_mapped_label(monkeypatch, reload_module):
184
  config = reload_module("config")
185
  callbacks = reload_module("gradio_callbacks")
186
  session = _FakeLoadSession(
 
204
 
205
  assert result[4]["choices"] == [
206
  (
207
+ f"a. move forward↓{config.UI_TEXT['actions']['point_required_suffix']}",
208
  0,
209
  )
210
  ]
gradio-web/ui_layout.py CHANGED
@@ -1,7 +1,7 @@
1
  """
2
  Native Gradio UI layout.
3
- Sequential media phases: Demo Video -> Action+Keypoint.
4
- Two-column layout: Keypoint Selection | Right Panel.
5
  """
6
 
7
  import ast
@@ -11,9 +11,9 @@ import gradio as gr
11
  from config import (
12
  CONTROL_PANEL_SCALE,
13
  LIVE_OBS_BASE_CLASS,
14
- LIVE_OBS_KEYPOINT_WAIT_CLASS,
15
  LIVE_OBS_REFRESH_HZ,
16
- KEYPOINT_SELECTION_SCALE,
17
  RIGHT_TOP_ACTION_SCALE,
18
  RIGHT_TOP_LOG_SCALE,
19
  UI_GLOBAL_FONT_SIZE,
@@ -41,7 +41,7 @@ from user_manager import user_manager
41
 
42
  PHASE_INIT = "init"
43
  PHASE_DEMO_VIDEO = "demo_video"
44
- PHASE_ACTION_KEYPOINT = "action_keypoint"
45
  PHASE_EXECUTION_PLAYBACK = "execution_playback"
46
 
47
  APP_THEME = gr.themes.Default()
@@ -432,7 +432,7 @@ button#watch_demo_video_btn {{
432
  z-index: 0;
433
  }}
434
 
435
- @keyframes media-card-keypoint-ring {{
436
  0% {{
437
  box-shadow: 0 0 0 0 rgba(225, 29, 72, 0.00);
438
  border-color: rgba(225, 29, 72, 0.72);
@@ -460,11 +460,11 @@ button#watch_demo_video_btn {{
460
  }}
461
  }}
462
 
463
- #media_card:has(#live_obs.{LIVE_OBS_KEYPOINT_WAIT_CLASS})::after {{
464
  border-color: rgba(225, 29, 72, 0.94);
465
  box-shadow: 0 0 0 0 rgba(225, 29, 72, 0.00);
466
  opacity: 1;
467
- animation: media-card-keypoint-ring 1.2s ease-in-out infinite;
468
  }}
469
  """
470
 
@@ -509,7 +509,7 @@ def _phase_from_updates(main_interface_update, video_phase_update):
509
  return PHASE_INIT
510
  if isinstance(video_phase_update, dict) and video_phase_update.get("visible") is True:
511
  return PHASE_DEMO_VIDEO
512
- return PHASE_ACTION_KEYPOINT
513
 
514
 
515
  def _with_phase_from_load(load_result):
@@ -528,7 +528,7 @@ def _phase_visibility_updates(phase):
528
  gr.update(visible=False),
529
  gr.update(visible=False),
530
  )
531
- if phase in {PHASE_ACTION_KEYPOINT, PHASE_EXECUTION_PLAYBACK}:
532
  return (
533
  gr.update(visible=False),
534
  gr.update(visible=True),
@@ -596,7 +596,7 @@ def create_ui_blocks():
596
 
597
  with gr.Column(visible=False, elem_id="main_interface_root") as main_interface:
598
  with gr.Row(elem_id="main_layout_row"):
599
- with gr.Column(scale=KEYPOINT_SELECTION_SCALE):
600
  with gr.Column(elem_classes=["native-card"], elem_id="media_card"):
601
  with gr.Column(visible=False, elem_id="video_phase_group") as video_phase_group:
602
  video_display = gr.Video(
@@ -618,7 +618,7 @@ def create_ui_blocks():
618
 
619
  with gr.Column(visible=False, elem_id="action_phase_group") as action_phase_group:
620
  img_display = gr.Image(
621
- label="Keypoint Selection",
622
  interactive=False,
623
  type="pil",
624
  elem_id="live_obs",
@@ -895,7 +895,7 @@ def create_ui_blocks():
895
  queue=False,
896
  show_progress="hidden",
897
  ).then(
898
- fn=lambda: PHASE_ACTION_KEYPOINT,
899
  outputs=[ui_phase_state],
900
  queue=False,
901
  show_progress="hidden",
@@ -913,7 +913,7 @@ def create_ui_blocks():
913
  queue=False,
914
  show_progress="hidden",
915
  ).then(
916
- fn=lambda: PHASE_ACTION_KEYPOINT,
917
  outputs=[ui_phase_state],
918
  queue=False,
919
  show_progress="hidden",
@@ -984,7 +984,7 @@ def create_ui_blocks():
984
  ],
985
  show_progress="hidden",
986
  ).then(
987
- fn=lambda: PHASE_ACTION_KEYPOINT,
988
  outputs=[ui_phase_state],
989
  show_progress="hidden",
990
  )
 
1
  """
2
  Native Gradio UI layout.
3
+ Sequential media phases: Demo Video -> Action+Point.
4
+ Two-column layout: Point Selection | Right Panel.
5
  """
6
 
7
  import ast
 
11
  from config import (
12
  CONTROL_PANEL_SCALE,
13
  LIVE_OBS_BASE_CLASS,
14
+ LIVE_OBS_POINT_WAIT_CLASS,
15
  LIVE_OBS_REFRESH_HZ,
16
+ POINT_SELECTION_SCALE,
17
  RIGHT_TOP_ACTION_SCALE,
18
  RIGHT_TOP_LOG_SCALE,
19
  UI_GLOBAL_FONT_SIZE,
 
41
 
42
  PHASE_INIT = "init"
43
  PHASE_DEMO_VIDEO = "demo_video"
44
+ PHASE_ACTION_POINT = "action_point"
45
  PHASE_EXECUTION_PLAYBACK = "execution_playback"
46
 
47
  APP_THEME = gr.themes.Default()
 
432
  z-index: 0;
433
  }}
434
 
435
+ @keyframes media-card-point-ring {{
436
  0% {{
437
  box-shadow: 0 0 0 0 rgba(225, 29, 72, 0.00);
438
  border-color: rgba(225, 29, 72, 0.72);
 
460
  }}
461
  }}
462
 
463
+ #media_card:has(#live_obs.{LIVE_OBS_POINT_WAIT_CLASS})::after {{
464
  border-color: rgba(225, 29, 72, 0.94);
465
  box-shadow: 0 0 0 0 rgba(225, 29, 72, 0.00);
466
  opacity: 1;
467
+ animation: media-card-point-ring 1.2s ease-in-out infinite;
468
  }}
469
  """
470
 
 
509
  return PHASE_INIT
510
  if isinstance(video_phase_update, dict) and video_phase_update.get("visible") is True:
511
  return PHASE_DEMO_VIDEO
512
+ return PHASE_ACTION_POINT
513
 
514
 
515
  def _with_phase_from_load(load_result):
 
528
  gr.update(visible=False),
529
  gr.update(visible=False),
530
  )
531
+ if phase in {PHASE_ACTION_POINT, PHASE_EXECUTION_PLAYBACK}:
532
  return (
533
  gr.update(visible=False),
534
  gr.update(visible=True),
 
596
 
597
  with gr.Column(visible=False, elem_id="main_interface_root") as main_interface:
598
  with gr.Row(elem_id="main_layout_row"):
599
+ with gr.Column(scale=POINT_SELECTION_SCALE):
600
  with gr.Column(elem_classes=["native-card"], elem_id="media_card"):
601
  with gr.Column(visible=False, elem_id="video_phase_group") as video_phase_group:
602
  video_display = gr.Video(
 
618
 
619
  with gr.Column(visible=False, elem_id="action_phase_group") as action_phase_group:
620
  img_display = gr.Image(
621
+ label="Point Selection",
622
  interactive=False,
623
  type="pil",
624
  elem_id="live_obs",
 
895
  queue=False,
896
  show_progress="hidden",
897
  ).then(
898
+ fn=lambda: PHASE_ACTION_POINT,
899
  outputs=[ui_phase_state],
900
  queue=False,
901
  show_progress="hidden",
 
913
  queue=False,
914
  show_progress="hidden",
915
  ).then(
916
+ fn=lambda: PHASE_ACTION_POINT,
917
  outputs=[ui_phase_state],
918
  queue=False,
919
  show_progress="hidden",
 
984
  ],
985
  show_progress="hidden",
986
  ).then(
987
+ fn=lambda: PHASE_ACTION_POINT,
988
  outputs=[ui_phase_state],
989
  show_progress="hidden",
990
  )