HongzeFu commited on
Commit
5de6fb7
·
1 Parent(s): 347ca04

highlight 1

Browse files
gradio-web/config.py CHANGED
@@ -41,6 +41,7 @@ DEMO_VIDEO_ENV_IDS = [
41
  UI_TEXT = {
42
  "log": {
43
  "action_selection_prompt": "please select the action in the left 👈,\nsome actions also need to select keypoint",
 
44
  "demo_video_prompt": 'press "Watch Video Input🎬" to watch a video\nNote: you can only watch the video once',
45
  "session_error": "Session Error",
46
  "reference_action_error": "Ground Truth Action Error: {error}",
@@ -98,6 +99,16 @@ ROUTESTICK_OVERLAY_ACTION_TEXTS = [
98
  "move to the nearest right target by circling around the stick counterclockwise",
99
  ]
100
 
 
 
 
 
 
 
 
 
 
 
101
 
102
  def get_ui_action_text(env_id, action_text):
103
  """
 
41
  UI_TEXT = {
42
  "log": {
43
  "action_selection_prompt": "please select the action in the left 👈,\nsome actions also need to select keypoint",
44
+ "keypoint_selection_prompt": "current action needs location input, please click on the image to select key pixel",
45
  "demo_video_prompt": 'press "Watch Video Input🎬" to watch a video\nNote: you can only watch the video once',
46
  "session_error": "Session Error",
47
  "reference_action_error": "Ground Truth Action Error: {error}",
 
99
  "move to the nearest right target by circling around the stick counterclockwise",
100
  ]
101
 
102
+ LIVE_OBS_BASE_CLASS = "live-obs-resizable"
103
+ LIVE_OBS_KEYPOINT_WAIT_CLASS = "live-obs-keypoint-waiting"
104
+
105
+
106
+ def get_live_obs_elem_classes(waiting_for_keypoint=False):
107
+ classes = [LIVE_OBS_BASE_CLASS]
108
+ if waiting_for_keypoint:
109
+ classes.append(LIVE_OBS_KEYPOINT_WAIT_CLASS)
110
+ return classes
111
+
112
 
113
  def get_ui_action_text(env_id, action_text):
114
  """
gradio-web/gradio_callbacks.py CHANGED
@@ -40,6 +40,7 @@ from config import (
40
  SESSION_TIMEOUT,
41
  UI_TEXT,
42
  USE_SEGMENTED_VIEW,
 
43
  get_ui_action_text,
44
  should_show_demo_video,
45
  )
@@ -59,6 +60,55 @@ def _ui_text(section, key, **kwargs):
59
  return template.format(**kwargs) if kwargs else template
60
 
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  def _should_enqueue_sample(sample_index: int) -> bool:
63
  factor = max(1, int(KEYFRAME_DOWNSAMPLE_FACTOR))
64
  return sample_index % factor == 0
@@ -224,7 +274,7 @@ def on_video_end(uid):
224
  Called when the demonstration video finishes playing.
225
  Updates the system log to prompt for action selection.
226
  """
227
- return format_log_markdown(_ui_text("log", "action_selection_prompt"))
228
 
229
 
230
  def on_demo_video_play(uid):
@@ -263,7 +313,7 @@ def switch_to_execute_phase(uid):
263
  gr.update(interactive=False), # exec_btn
264
  gr.update(interactive=False), # restart_episode_btn
265
  gr.update(interactive=False), # next_task_btn
266
- gr.update(interactive=False), # img_display
267
  gr.update(interactive=False), # reference_action_btn
268
  )
269
 
@@ -279,7 +329,7 @@ def switch_to_action_phase(uid=None):
279
  gr.update(), # exec_btn (keep execute_step result)
280
  gr.update(), # restart_episode_btn (keep execute_step result)
281
  gr.update(), # next_task_btn (keep execute_step result)
282
- gr.update(interactive=True), # img_display
283
  gr.update(interactive=True), # reference_action_btn
284
  )
285
 
@@ -414,7 +464,7 @@ def refresh_live_obs(uid, ui_phase):
414
  img = _prepare_refresh_frame(latest)
415
  if img is None:
416
  return gr.update()
417
- return gr.update(value=img, interactive=False)
418
 
419
 
420
  def on_video_end_transition(uid):
@@ -423,7 +473,7 @@ def on_video_end_transition(uid):
423
  gr.update(visible=False), # video_phase_group
424
  gr.update(visible=True), # action_phase_group
425
  gr.update(visible=True), # control_panel_group
426
- format_log_markdown(_ui_text("log", "action_selection_prompt")),
427
  gr.update(visible=False, interactive=False), # watch_demo_video_btn
428
  )
429
 
@@ -433,7 +483,7 @@ def _task_load_failed_response(uid, message):
433
  return (
434
  uid,
435
  gr.update(visible=True), # main_interface
436
- gr.update(value=None, interactive=False), # img_display
437
  format_log_markdown(message), # log_output
438
  gr.update(choices=[], value=None), # options_radio
439
  "", # goal_box
@@ -514,7 +564,7 @@ def _load_status_task(uid, status):
514
  return (
515
  uid,
516
  gr.update(visible=True), # main_interface
517
- gr.update(value=None, interactive=False), # img_display
518
  format_log_markdown(_ui_text("errors", "load_episode_error", load_msg=load_msg)), # log_output
519
  gr.update(choices=[], value=None), # options_radio
520
  "", # goal_box
@@ -561,7 +611,7 @@ def _load_status_task(uid, status):
561
 
562
  demo_video_path = None
563
  should_show = should_show_demo_video(actual_env_id) if actual_env_id else False
564
- initial_log_msg = format_log_markdown(_ui_text("log", "action_selection_prompt"))
565
 
566
  if should_show:
567
  if session.demonstration_frames:
@@ -594,7 +644,7 @@ def _load_status_task(uid, status):
594
  return (
595
  uid,
596
  gr.update(visible=True), # main_interface
597
- gr.update(value=img, interactive=False), # img_display
598
  initial_log_msg, # log_output
599
  gr.update(choices=radio_choices, value=None), # options_radio
600
  goal_text, # goal_box
@@ -619,7 +669,7 @@ def _load_status_task(uid, status):
619
  return (
620
  uid,
621
  gr.update(visible=True), # main_interface
622
- gr.update(value=img, interactive=False), # img_display
623
  initial_log_msg, # log_output
624
  gr.update(choices=radio_choices, value=None), # options_radio
625
  goal_text, # goal_box
@@ -743,22 +793,14 @@ def on_map_click(uid, option_value, evt: gr.SelectData):
743
  session = get_session(uid)
744
  if not session:
745
  LOGGER.warning("on_map_click: missing session uid=%s", _uid_for_log(uid))
746
- return None, _ui_text("log", "session_error")
 
 
 
 
747
 
748
  # Check if current option actually needs coordinates
749
- needs_coords = False
750
- if option_value is not None:
751
- # Parse option index similar to on_option_select
752
- option_idx = None
753
- if isinstance(option_value, tuple):
754
- _, option_idx = option_value
755
- else:
756
- option_idx = option_value
757
-
758
- if option_idx is not None and 0 <= option_idx < len(session.raw_solve_options):
759
- opt = session.raw_solve_options[option_idx]
760
- if opt.get("available"):
761
- needs_coords = True
762
 
763
  if not needs_coords:
764
  LOGGER.debug(
@@ -770,7 +812,7 @@ def on_map_click(uid, option_value, evt: gr.SelectData):
770
  # Return current state without changes (or reset to default message if needed, but it should already be there)
771
  # We return the clean image and the "No need" message to enforce state
772
  base_img = session.get_pil_image(use_segmented=USE_SEGMENTED_VIEW)
773
- return base_img, _ui_text("coords", "not_needed")
774
 
775
  x, y = evt.index[0], evt.index[1]
776
  LOGGER.debug(
@@ -789,7 +831,7 @@ def on_map_click(uid, option_value, evt: gr.SelectData):
789
 
790
  coords_str = f"{x}, {y}"
791
 
792
- return marked_img, coords_str
793
 
794
 
795
  def _is_valid_coords_text(coords_text: str) -> bool:
@@ -821,7 +863,9 @@ def on_option_select(uid, option_value, coords_str=None):
821
 
822
  if option_value is None:
823
  LOGGER.debug("on_option_select uid=%s option=None", _uid_for_log(uid))
824
- return default_msg, gr.update(interactive=False)
 
 
825
 
826
  # 更新session活动时间(选择选项操作)
827
  if uid:
@@ -830,30 +874,27 @@ def on_option_select(uid, option_value, coords_str=None):
830
  session = get_session(uid)
831
  if not session:
832
  LOGGER.warning("on_option_select: missing session uid=%s", _uid_for_log(uid))
833
- return default_msg, gr.update(interactive=False)
834
 
835
- # option_value (label, idx) 元组或直接是 idx
836
- if isinstance(option_value, tuple):
837
- _, option_idx = option_value
838
- else:
839
- option_idx = option_value
840
 
841
  # Determine coords message
842
- if 0 <= option_idx < len(session.raw_solve_options):
843
- opt = session.raw_solve_options[option_idx]
844
- if opt.get("available"):
845
- LOGGER.debug(
846
- "on_option_select uid=%s option=%s requires_coords=True valid_coords=%s",
847
- _uid_for_log(uid),
848
- option_idx,
849
- _is_valid_coords_text(coords_str),
850
- )
851
- if _is_valid_coords_text(coords_str):
852
- return coords_str, gr.update(interactive=True)
853
- return _ui_text("coords", "select_keypoint"), gr.update(interactive=True)
854
 
855
  LOGGER.debug("on_option_select uid=%s option=%s requires_coords=False", _uid_for_log(uid), option_idx)
856
- return default_msg, gr.update(interactive=False)
857
 
858
 
859
  def on_reference_action(uid):
@@ -867,7 +908,7 @@ def on_reference_action(uid):
867
  if not session:
868
  LOGGER.warning("on_reference_action: missing session uid=%s", _uid_for_log(uid))
869
  return (
870
- None,
871
  gr.update(),
872
  _ui_text("coords", "not_needed"),
873
  format_log_markdown(_ui_text("log", "session_error")),
@@ -881,7 +922,7 @@ def on_reference_action(uid):
881
  except Exception as exc:
882
  LOGGER.exception("on_reference_action failed uid=%s", _uid_for_log(uid))
883
  return (
884
- current_img,
885
  gr.update(),
886
  gr.update(),
887
  format_log_markdown(_ui_text("log", "reference_action_error", error=exc)),
@@ -892,7 +933,7 @@ def on_reference_action(uid):
892
  if isinstance(reference, dict) and reference.get("message"):
893
  message = str(reference.get("message"))
894
  return (
895
- current_img,
896
  gr.update(),
897
  gr.update(),
898
  format_log_markdown(_ui_text("log", "reference_action_status", message=message)),
@@ -935,7 +976,7 @@ def on_reference_action(uid):
935
  )
936
 
937
  return (
938
- updated_img,
939
  gr.update(value=option_idx),
940
  coords_text,
941
  format_log_markdown(log_text),
@@ -979,21 +1020,13 @@ def precheck_execute_inputs(uid, option_idx, coords_str):
979
  LOGGER.error("precheck_execute_inputs: missing session uid=%s", _uid_for_log(uid))
980
  raise gr.Error(_ui_text("log", "session_error"))
981
 
982
- parsed_option_idx = option_idx
983
- if isinstance(option_idx, tuple):
984
- _, parsed_option_idx = option_idx
985
 
986
  if parsed_option_idx is None:
987
  LOGGER.debug("precheck_execute_inputs uid=%s missing option", _uid_for_log(uid))
988
  raise gr.Error(_ui_text("log", "execute_missing_action"))
989
 
990
- needs_coords = False
991
- if (
992
- isinstance(parsed_option_idx, int)
993
- and 0 <= parsed_option_idx < len(session.raw_solve_options)
994
- ):
995
- opt = session.raw_solve_options[parsed_option_idx]
996
- needs_coords = bool(opt.get("available"))
997
 
998
  if needs_coords and not _is_valid_coords_text(coords_str):
999
  LOGGER.debug(
@@ -1038,7 +1071,7 @@ def execute_step(uid, option_idx, coords_str):
1038
  if not session:
1039
  LOGGER.error("execute_step missing session uid=%s", _uid_for_log(uid))
1040
  return (
1041
- None,
1042
  format_log_markdown(_ui_text("log", "session_error")),
1043
  gr.update(),
1044
  gr.update(),
@@ -1073,10 +1106,11 @@ def execute_step(uid, option_idx, coords_str):
1073
  LOGGER.debug("execute_step uid=%s base_frames empty; triggering update_observation", _uid_for_log(uid))
1074
  session.update_observation(use_segmentation=USE_SEGMENTED_VIEW)
1075
 
 
1076
  if option_idx is None:
1077
  LOGGER.debug("execute_step uid=%s aborted: option_idx is None", _uid_for_log(uid))
1078
  return (
1079
- session.get_pil_image(use_segmented=USE_SEGMENTED_VIEW),
1080
  format_log_markdown(_ui_text("log", "execute_missing_action")),
1081
  gr.update(),
1082
  gr.update(),
@@ -1084,12 +1118,7 @@ def execute_step(uid, option_idx, coords_str):
1084
  gr.update(interactive=True),
1085
  )
1086
 
1087
- # 检查当前选项是否需要坐标
1088
- needs_coords = False
1089
- if option_idx is not None and 0 <= option_idx < len(session.raw_solve_options):
1090
- opt = session.raw_solve_options[option_idx]
1091
- if opt.get("available"):
1092
- needs_coords = True
1093
 
1094
  # 如果选项需要坐标,检查是否已经点击了图片
1095
  if needs_coords:
@@ -1102,7 +1131,7 @@ def execute_step(uid, option_idx, coords_str):
1102
  )
1103
  current_img = session.get_pil_image(use_segmented=USE_SEGMENTED_VIEW)
1104
  error_msg = _ui_text("coords", "select_keypoint_before_execute")
1105
- return current_img, format_log_markdown(error_msg), gr.update(), gr.update(), gr.update(interactive=False), gr.update(interactive=True)
1106
 
1107
  # Parse coords
1108
  click_coords = None
@@ -1257,7 +1286,7 @@ def execute_step(uid, option_idx, coords_str):
1257
  )
1258
 
1259
  return (
1260
- img,
1261
  formatted_status,
1262
  task_update,
1263
  progress_update,
 
40
  SESSION_TIMEOUT,
41
  UI_TEXT,
42
  USE_SEGMENTED_VIEW,
43
+ get_live_obs_elem_classes,
44
  get_ui_action_text,
45
  should_show_demo_video,
46
  )
 
60
  return template.format(**kwargs) if kwargs else template
61
 
62
 
63
+ _LIVE_OBS_UPDATE_SKIP = object()
64
+
65
+
66
+ def _action_selection_log():
67
+ return format_log_markdown(_ui_text("log", "action_selection_prompt"))
68
+
69
+
70
+ def _keypoint_selection_log():
71
+ return format_log_markdown(_ui_text("log", "keypoint_selection_prompt"))
72
+
73
+
74
+ def _live_obs_update(
75
+ *,
76
+ value=_LIVE_OBS_UPDATE_SKIP,
77
+ interactive=None,
78
+ visible=None,
79
+ waiting_for_keypoint=False,
80
+ ):
81
+ kwargs = {
82
+ "elem_classes": get_live_obs_elem_classes(waiting_for_keypoint=waiting_for_keypoint),
83
+ }
84
+ if value is not _LIVE_OBS_UPDATE_SKIP:
85
+ kwargs["value"] = value
86
+ if interactive is not None:
87
+ kwargs["interactive"] = interactive
88
+ if visible is not None:
89
+ kwargs["visible"] = visible
90
+ return gr.update(**kwargs)
91
+
92
+
93
+ def _parse_option_idx(option_value):
94
+ if isinstance(option_value, tuple):
95
+ _, option_idx = option_value
96
+ return option_idx
97
+ return option_value
98
+
99
+
100
+ def _option_requires_coords(session, option_value) -> bool:
101
+ option_idx = _parse_option_idx(option_value)
102
+ if not isinstance(option_idx, int):
103
+ return False
104
+ raw_solve_options = getattr(session, "raw_solve_options", None)
105
+ if not isinstance(raw_solve_options, list):
106
+ return False
107
+ if not (0 <= option_idx < len(raw_solve_options)):
108
+ return False
109
+ return bool(raw_solve_options[option_idx].get("available"))
110
+
111
+
112
  def _should_enqueue_sample(sample_index: int) -> bool:
113
  factor = max(1, int(KEYFRAME_DOWNSAMPLE_FACTOR))
114
  return sample_index % factor == 0
 
274
  Called when the demonstration video finishes playing.
275
  Updates the system log to prompt for action selection.
276
  """
277
+ return _action_selection_log()
278
 
279
 
280
  def on_demo_video_play(uid):
 
313
  gr.update(interactive=False), # exec_btn
314
  gr.update(interactive=False), # restart_episode_btn
315
  gr.update(interactive=False), # next_task_btn
316
+ _live_obs_update(interactive=False), # img_display
317
  gr.update(interactive=False), # reference_action_btn
318
  )
319
 
 
329
  gr.update(), # exec_btn (keep execute_step result)
330
  gr.update(), # restart_episode_btn (keep execute_step result)
331
  gr.update(), # next_task_btn (keep execute_step result)
332
+ _live_obs_update(interactive=True), # img_display
333
  gr.update(interactive=True), # reference_action_btn
334
  )
335
 
 
464
  img = _prepare_refresh_frame(latest)
465
  if img is None:
466
  return gr.update()
467
+ return _live_obs_update(value=img, interactive=False)
468
 
469
 
470
  def on_video_end_transition(uid):
 
473
  gr.update(visible=False), # video_phase_group
474
  gr.update(visible=True), # action_phase_group
475
  gr.update(visible=True), # control_panel_group
476
+ _action_selection_log(),
477
  gr.update(visible=False, interactive=False), # watch_demo_video_btn
478
  )
479
 
 
483
  return (
484
  uid,
485
  gr.update(visible=True), # main_interface
486
+ _live_obs_update(value=None, interactive=False), # img_display
487
  format_log_markdown(message), # log_output
488
  gr.update(choices=[], value=None), # options_radio
489
  "", # goal_box
 
564
  return (
565
  uid,
566
  gr.update(visible=True), # main_interface
567
+ _live_obs_update(value=None, interactive=False), # img_display
568
  format_log_markdown(_ui_text("errors", "load_episode_error", load_msg=load_msg)), # log_output
569
  gr.update(choices=[], value=None), # options_radio
570
  "", # goal_box
 
611
 
612
  demo_video_path = None
613
  should_show = should_show_demo_video(actual_env_id) if actual_env_id else False
614
+ initial_log_msg = _action_selection_log()
615
 
616
  if should_show:
617
  if session.demonstration_frames:
 
644
  return (
645
  uid,
646
  gr.update(visible=True), # main_interface
647
+ _live_obs_update(value=img, interactive=False), # img_display
648
  initial_log_msg, # log_output
649
  gr.update(choices=radio_choices, value=None), # options_radio
650
  goal_text, # goal_box
 
669
  return (
670
  uid,
671
  gr.update(visible=True), # main_interface
672
+ _live_obs_update(value=img, interactive=False), # img_display
673
  initial_log_msg, # log_output
674
  gr.update(choices=radio_choices, value=None), # options_radio
675
  goal_text, # goal_box
 
793
  session = get_session(uid)
794
  if not session:
795
  LOGGER.warning("on_map_click: missing session uid=%s", _uid_for_log(uid))
796
+ return (
797
+ _live_obs_update(value=None, interactive=False),
798
+ _ui_text("coords", "not_needed"),
799
+ format_log_markdown(_ui_text("log", "session_error")),
800
+ )
801
 
802
  # Check if current option actually needs coordinates
803
+ needs_coords = _option_requires_coords(session, option_value)
 
 
 
 
 
 
 
 
 
 
 
 
804
 
805
  if not needs_coords:
806
  LOGGER.debug(
 
812
  # Return current state without changes (or reset to default message if needed, but it should already be there)
813
  # We return the clean image and the "No need" message to enforce state
814
  base_img = session.get_pil_image(use_segmented=USE_SEGMENTED_VIEW)
815
+ return _live_obs_update(value=base_img, interactive=False), _ui_text("coords", "not_needed"), _action_selection_log()
816
 
817
  x, y = evt.index[0], evt.index[1]
818
  LOGGER.debug(
 
831
 
832
  coords_str = f"{x}, {y}"
833
 
834
+ return _live_obs_update(value=marked_img, interactive=True), coords_str, _action_selection_log()
835
 
836
 
837
  def _is_valid_coords_text(coords_text: str) -> bool:
 
863
 
864
  if option_value is None:
865
  LOGGER.debug("on_option_select uid=%s option=None", _uid_for_log(uid))
866
+ session = get_session(uid) if uid else None
867
+ base_img = session.get_pil_image(use_segmented=USE_SEGMENTED_VIEW) if session else _LIVE_OBS_UPDATE_SKIP
868
+ return default_msg, _live_obs_update(value=base_img, interactive=False), _action_selection_log()
869
 
870
  # 更新session活动时间(选择选项操作)
871
  if uid:
 
874
  session = get_session(uid)
875
  if not session:
876
  LOGGER.warning("on_option_select: missing session uid=%s", _uid_for_log(uid))
877
+ return default_msg, _live_obs_update(interactive=False), format_log_markdown(_ui_text("log", "session_error"))
878
 
879
+ option_idx = _parse_option_idx(option_value)
880
+ base_img = session.get_pil_image(use_segmented=USE_SEGMENTED_VIEW)
 
 
 
881
 
882
  # Determine coords message
883
+ if _option_requires_coords(session, option_idx):
884
+ LOGGER.debug(
885
+ "on_option_select uid=%s option=%s requires_coords=True valid_coords=%s",
886
+ _uid_for_log(uid),
887
+ option_idx,
888
+ _is_valid_coords_text(coords_str),
889
+ )
890
+ return (
891
+ _ui_text("coords", "select_keypoint"),
892
+ _live_obs_update(value=base_img, interactive=True, waiting_for_keypoint=True),
893
+ _keypoint_selection_log(),
894
+ )
895
 
896
  LOGGER.debug("on_option_select uid=%s option=%s requires_coords=False", _uid_for_log(uid), option_idx)
897
+ return default_msg, _live_obs_update(value=base_img, interactive=False), _action_selection_log()
898
 
899
 
900
  def on_reference_action(uid):
 
908
  if not session:
909
  LOGGER.warning("on_reference_action: missing session uid=%s", _uid_for_log(uid))
910
  return (
911
+ _live_obs_update(value=None, interactive=False),
912
  gr.update(),
913
  _ui_text("coords", "not_needed"),
914
  format_log_markdown(_ui_text("log", "session_error")),
 
922
  except Exception as exc:
923
  LOGGER.exception("on_reference_action failed uid=%s", _uid_for_log(uid))
924
  return (
925
+ _live_obs_update(value=current_img, interactive=False),
926
  gr.update(),
927
  gr.update(),
928
  format_log_markdown(_ui_text("log", "reference_action_error", error=exc)),
 
933
  if isinstance(reference, dict) and reference.get("message"):
934
  message = str(reference.get("message"))
935
  return (
936
+ _live_obs_update(value=current_img, interactive=False),
937
  gr.update(),
938
  gr.update(),
939
  format_log_markdown(_ui_text("log", "reference_action_status", message=message)),
 
976
  )
977
 
978
  return (
979
+ _live_obs_update(value=updated_img, interactive=False),
980
  gr.update(value=option_idx),
981
  coords_text,
982
  format_log_markdown(log_text),
 
1020
  LOGGER.error("precheck_execute_inputs: missing session uid=%s", _uid_for_log(uid))
1021
  raise gr.Error(_ui_text("log", "session_error"))
1022
 
1023
+ parsed_option_idx = _parse_option_idx(option_idx)
 
 
1024
 
1025
  if parsed_option_idx is None:
1026
  LOGGER.debug("precheck_execute_inputs uid=%s missing option", _uid_for_log(uid))
1027
  raise gr.Error(_ui_text("log", "execute_missing_action"))
1028
 
1029
+ needs_coords = _option_requires_coords(session, parsed_option_idx)
 
 
 
 
 
 
1030
 
1031
  if needs_coords and not _is_valid_coords_text(coords_str):
1032
  LOGGER.debug(
 
1071
  if not session:
1072
  LOGGER.error("execute_step missing session uid=%s", _uid_for_log(uid))
1073
  return (
1074
+ _live_obs_update(value=None, interactive=False),
1075
  format_log_markdown(_ui_text("log", "session_error")),
1076
  gr.update(),
1077
  gr.update(),
 
1106
  LOGGER.debug("execute_step uid=%s base_frames empty; triggering update_observation", _uid_for_log(uid))
1107
  session.update_observation(use_segmentation=USE_SEGMENTED_VIEW)
1108
 
1109
+ option_idx = _parse_option_idx(option_idx)
1110
  if option_idx is None:
1111
  LOGGER.debug("execute_step uid=%s aborted: option_idx is None", _uid_for_log(uid))
1112
  return (
1113
+ _live_obs_update(value=session.get_pil_image(use_segmented=USE_SEGMENTED_VIEW), interactive=False),
1114
  format_log_markdown(_ui_text("log", "execute_missing_action")),
1115
  gr.update(),
1116
  gr.update(),
 
1118
  gr.update(interactive=True),
1119
  )
1120
 
1121
+ needs_coords = _option_requires_coords(session, option_idx)
 
 
 
 
 
1122
 
1123
  # 如果选项需要坐标,检查是否已经点击了图片
1124
  if needs_coords:
 
1131
  )
1132
  current_img = session.get_pil_image(use_segmented=USE_SEGMENTED_VIEW)
1133
  error_msg = _ui_text("coords", "select_keypoint_before_execute")
1134
+ return _live_obs_update(value=current_img, interactive=False), format_log_markdown(error_msg), gr.update(), gr.update(), gr.update(interactive=False), gr.update(interactive=True)
1135
 
1136
  # Parse coords
1137
  click_coords = None
 
1286
  )
1287
 
1288
  return (
1289
+ _live_obs_update(value=img, interactive=False),
1290
  formatted_status,
1291
  task_update,
1292
  progress_update,
gradio-web/main.py CHANGED
@@ -17,15 +17,9 @@ TEMP_DEMOS_DIR = PROJECT_ROOT / "temp_demos"
17
  CWD_TEMP_DEMOS_DIR = Path.cwd() / "temp_demos"
18
 
19
 
20
- def configure_runtime_devices():
21
- """Restrict the app to physical GPU 1 and map rendering to the visible device."""
22
- os.environ["CUDA_VISIBLE_DEVICES"] = "1"
23
- os.environ.setdefault("NVIDIA_VISIBLE_DEVICES", "1")
24
- # After masking to physical GPU 1, libraries should use logical cuda:0.
25
- os.environ["SAPIEN_RENDER_DEVICE"] = "cuda:0"
26
 
27
 
28
- configure_runtime_devices()
29
 
30
  if str(PROJECT_ROOT) not in sys.path:
31
  sys.path.insert(0, str(PROJECT_ROOT))
@@ -132,7 +126,8 @@ def main():
132
 
133
  os.environ.setdefault("ROBOMME_TEMP_DEMOS_DIR", str(TEMP_DEMOS_DIR))
134
  allowed_paths = build_allowed_paths()
135
- server_port = int(os.getenv("PORT", "7860"))
 
136
  LOGGER.info(
137
  "Launching UI with server_name=%s server_port=%s ROBOMME_TEMP_DEMOS_DIR=%s",
138
  "0.0.0.0",
 
17
  CWD_TEMP_DEMOS_DIR = Path.cwd() / "temp_demos"
18
 
19
 
 
 
 
 
 
 
20
 
21
 
22
+
23
 
24
  if str(PROJECT_ROOT) not in sys.path:
25
  sys.path.insert(0, str(PROJECT_ROOT))
 
126
 
127
  os.environ.setdefault("ROBOMME_TEMP_DEMOS_DIR", str(TEMP_DEMOS_DIR))
128
  allowed_paths = build_allowed_paths()
129
+ #server_port = int(os.getenv("PORT", "7860"))
130
+ server_port = 7861
131
  LOGGER.info(
132
  "Launching UI with server_name=%s server_port=%s ROBOMME_TEMP_DEMOS_DIR=%s",
133
  "0.0.0.0",
gradio-web/test/test_live_obs_refresh.py CHANGED
@@ -21,6 +21,7 @@ def test_refresh_live_obs_skips_when_not_execution_phase(monkeypatch, reload_mod
21
 
22
 
23
  def test_refresh_live_obs_updates_image_from_latest_frame(monkeypatch, reload_module):
 
24
  callbacks = reload_module("gradio_callbacks")
25
  frame0 = np.zeros((8, 8, 3), dtype=np.uint8)
26
  frame1 = np.full((8, 8, 3), 11, dtype=np.uint8)
@@ -29,6 +30,7 @@ def test_refresh_live_obs_updates_image_from_latest_frame(monkeypatch, reload_mo
29
  frame4 = np.full((8, 8, 3), 44, dtype=np.uint8)
30
  session = _FakeSession([frame0])
31
  monkeypatch.setattr(callbacks, "get_session", lambda uid: session)
 
32
 
33
  # Reset queue state at execute start (cursor anchored at current base_frames length).
34
  callbacks.switch_to_execute_phase("uid-2")
@@ -41,11 +43,13 @@ def test_refresh_live_obs_updates_image_from_latest_frame(monkeypatch, reload_mo
41
 
42
  assert update1.get("__type__") == "update"
43
  assert update1.get("interactive") is False
 
44
  assert isinstance(update1.get("value"), Image.Image)
45
  assert update1["value"].getpixel((0, 0)) == (11, 11, 11)
46
 
47
  assert update2.get("__type__") == "update"
48
  assert update2.get("interactive") is False
 
49
  assert isinstance(update2.get("value"), Image.Image)
50
  assert update2["value"].getpixel((0, 0)) == (33, 33, 33)
51
 
@@ -55,16 +59,19 @@ def test_refresh_live_obs_updates_image_from_latest_frame(monkeypatch, reload_mo
55
 
56
 
57
  def test_switch_phase_keeps_live_obs_visible_and_toggles_interactive(reload_module):
 
58
  callbacks = reload_module("gradio_callbacks")
59
 
60
  to_exec = callbacks.switch_to_execute_phase("uid-3")
61
  assert len(to_exec) == 6
62
  assert to_exec[0].get("interactive") is False
63
  assert to_exec[4].get("interactive") is False
 
64
  assert to_exec[5].get("interactive") is False
65
 
66
  to_action = callbacks.switch_to_action_phase()
67
  assert len(to_action) == 6
68
  assert to_action[0].get("interactive") is True
69
  assert to_action[4].get("interactive") is True
 
70
  assert to_action[5].get("interactive") is True
 
21
 
22
 
23
  def test_refresh_live_obs_updates_image_from_latest_frame(monkeypatch, reload_module):
24
+ config = reload_module("config")
25
  callbacks = reload_module("gradio_callbacks")
26
  frame0 = np.zeros((8, 8, 3), dtype=np.uint8)
27
  frame1 = np.full((8, 8, 3), 11, dtype=np.uint8)
 
30
  frame4 = np.full((8, 8, 3), 44, dtype=np.uint8)
31
  session = _FakeSession([frame0])
32
  monkeypatch.setattr(callbacks, "get_session", lambda uid: session)
33
+ monkeypatch.setattr(callbacks, "KEYFRAME_DOWNSAMPLE_FACTOR", 2)
34
 
35
  # Reset queue state at execute start (cursor anchored at current base_frames length).
36
  callbacks.switch_to_execute_phase("uid-2")
 
43
 
44
  assert update1.get("__type__") == "update"
45
  assert update1.get("interactive") is False
46
+ assert update1.get("elem_classes") == config.get_live_obs_elem_classes()
47
  assert isinstance(update1.get("value"), Image.Image)
48
  assert update1["value"].getpixel((0, 0)) == (11, 11, 11)
49
 
50
  assert update2.get("__type__") == "update"
51
  assert update2.get("interactive") is False
52
+ assert update2.get("elem_classes") == config.get_live_obs_elem_classes()
53
  assert isinstance(update2.get("value"), Image.Image)
54
  assert update2["value"].getpixel((0, 0)) == (33, 33, 33)
55
 
 
59
 
60
 
61
  def test_switch_phase_keeps_live_obs_visible_and_toggles_interactive(reload_module):
62
+ config = reload_module("config")
63
  callbacks = reload_module("gradio_callbacks")
64
 
65
  to_exec = callbacks.switch_to_execute_phase("uid-3")
66
  assert len(to_exec) == 6
67
  assert to_exec[0].get("interactive") is False
68
  assert to_exec[4].get("interactive") is False
69
+ assert to_exec[4].get("elem_classes") == config.get_live_obs_elem_classes()
70
  assert to_exec[5].get("interactive") is False
71
 
72
  to_action = callbacks.switch_to_action_phase()
73
  assert len(to_action) == 6
74
  assert to_action[0].get("interactive") is True
75
  assert to_action[4].get("interactive") is True
76
+ assert to_action[4].get("elem_classes") == config.get_live_obs_elem_classes()
77
  assert to_action[5].get("interactive") is True
gradio-web/test/test_reference_action_callbacks.py CHANGED
@@ -20,6 +20,10 @@ class _FakeOptionSession:
20
  self.raw_solve_options = [{"available": [object()]}]
21
  self.available_options = [("pick", 0)]
22
 
 
 
 
 
23
 
24
  def test_on_reference_action_success_updates_option_and_coords(monkeypatch, reload_module):
25
  config = reload_module("config")
@@ -40,10 +44,12 @@ def test_on_reference_action_success_updates_option_and_coords(monkeypatch, relo
40
  monkeypatch.setattr(callbacks, "update_session_activity", lambda uid: None)
41
  monkeypatch.setattr(callbacks, "get_session", lambda uid: session)
42
 
43
- img, option_update, coords_text, log_html = callbacks.on_reference_action("uid-1")
44
 
45
- assert isinstance(img, Image.Image)
46
- assert img.getpixel((5, 6)) != (0, 0, 0)
 
 
47
  assert option_update.get("value") == 2
48
  assert coords_text == "5, 6"
49
  expected_log = config.UI_TEXT["log"]["reference_action_message_with_coords"].format(
@@ -61,9 +67,10 @@ def test_on_reference_action_session_missing(monkeypatch, reload_module):
61
  monkeypatch.setattr(callbacks, "update_session_activity", lambda uid: None)
62
  monkeypatch.setattr(callbacks, "get_session", lambda uid: None)
63
 
64
- img, option_update, coords_text, log_html = callbacks.on_reference_action("uid-missing")
65
 
66
- assert img is None
 
67
  assert option_update.get("__type__") == "update"
68
  assert coords_text == config.UI_TEXT["coords"]["not_needed"]
69
  assert log_html == config.UI_TEXT["log"]["session_error"]
@@ -81,18 +88,39 @@ def test_on_reference_action_error_message_from_reference(monkeypatch, reload_mo
81
  assert log_html == config.UI_TEXT["log"]["reference_action_status"].format(message="bad ref")
82
 
83
 
84
- def test_on_option_select_keeps_valid_coords_when_option_needs_coords(monkeypatch, reload_module):
85
- reload_module("config")
86
  callbacks = reload_module("gradio_callbacks")
87
 
88
  session = _FakeOptionSession()
89
  monkeypatch.setattr(callbacks, "update_session_activity", lambda uid: None)
90
  monkeypatch.setattr(callbacks, "get_session", lambda uid: session)
91
 
92
- coords_text, img_update = callbacks.on_option_select("uid-1", 0, "12, 34")
93
 
94
- assert coords_text == "12, 34"
95
  assert img_update.get("interactive") is True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
 
98
  def test_on_reference_action_uses_configured_action_text_override(monkeypatch, reload_module):
@@ -120,5 +148,5 @@ def test_on_reference_action_uses_configured_action_text_override(monkeypatch, r
120
  assert coords_text == config.UI_TEXT["coords"]["not_needed"]
121
  assert log_html == config.UI_TEXT["log"]["reference_action_message"].format(
122
  option_label="a",
123
- option_action="move forward",
124
  )
 
20
  self.raw_solve_options = [{"available": [object()]}]
21
  self.available_options = [("pick", 0)]
22
 
23
+ def get_pil_image(self, use_segmented=True):
24
+ _ = use_segmented
25
+ return Image.new("RGB", (24, 24), color=(0, 0, 0))
26
+
27
 
28
  def test_on_reference_action_success_updates_option_and_coords(monkeypatch, reload_module):
29
  config = reload_module("config")
 
44
  monkeypatch.setattr(callbacks, "update_session_activity", lambda uid: None)
45
  monkeypatch.setattr(callbacks, "get_session", lambda uid: session)
46
 
47
+ img_update, option_update, coords_text, log_html = callbacks.on_reference_action("uid-1")
48
 
49
+ assert img_update.get("__type__") == "update"
50
+ assert isinstance(img_update.get("value"), Image.Image)
51
+ assert img_update["value"].getpixel((5, 6)) != (0, 0, 0)
52
+ assert img_update.get("elem_classes") == config.get_live_obs_elem_classes()
53
  assert option_update.get("value") == 2
54
  assert coords_text == "5, 6"
55
  expected_log = config.UI_TEXT["log"]["reference_action_message_with_coords"].format(
 
67
  monkeypatch.setattr(callbacks, "update_session_activity", lambda uid: None)
68
  monkeypatch.setattr(callbacks, "get_session", lambda uid: None)
69
 
70
+ img_update, option_update, coords_text, log_html = callbacks.on_reference_action("uid-missing")
71
 
72
+ assert img_update.get("__type__") == "update"
73
+ assert img_update.get("value") is None
74
  assert option_update.get("__type__") == "update"
75
  assert coords_text == config.UI_TEXT["coords"]["not_needed"]
76
  assert log_html == config.UI_TEXT["log"]["session_error"]
 
88
  assert log_html == config.UI_TEXT["log"]["reference_action_status"].format(message="bad ref")
89
 
90
 
91
+ def test_on_option_select_resets_to_keypoint_wait_state_for_point_action(monkeypatch, reload_module):
92
+ config = reload_module("config")
93
  callbacks = reload_module("gradio_callbacks")
94
 
95
  session = _FakeOptionSession()
96
  monkeypatch.setattr(callbacks, "update_session_activity", lambda uid: None)
97
  monkeypatch.setattr(callbacks, "get_session", lambda uid: session)
98
 
99
+ coords_text, img_update, log_text = callbacks.on_option_select("uid-1", 0, "12, 34")
100
 
101
+ assert coords_text == config.UI_TEXT["coords"]["select_keypoint"]
102
  assert img_update.get("interactive") is True
103
+ assert img_update.get("elem_classes") == config.get_live_obs_elem_classes(waiting_for_keypoint=True)
104
+ assert log_text == config.UI_TEXT["log"]["keypoint_selection_prompt"]
105
+
106
+
107
+ def test_on_map_click_clears_wait_state_and_restores_action_prompt(monkeypatch, reload_module):
108
+ config = reload_module("config")
109
+ callbacks = reload_module("gradio_callbacks")
110
+
111
+ session = _FakeOptionSession()
112
+ event = type("Evt", (), {"index": (5, 6)})()
113
+ monkeypatch.setattr(callbacks, "update_session_activity", lambda uid: None)
114
+ monkeypatch.setattr(callbacks, "get_session", lambda uid: session)
115
+
116
+ img_update, coords_text, log_text = callbacks.on_map_click("uid-1", 0, event)
117
+
118
+ assert img_update.get("__type__") == "update"
119
+ assert isinstance(img_update.get("value"), Image.Image)
120
+ assert img_update["value"].getpixel((5, 6)) != (0, 0, 0)
121
+ assert img_update.get("elem_classes") == config.get_live_obs_elem_classes()
122
+ assert coords_text == "5, 6"
123
+ assert log_text == config.UI_TEXT["log"]["action_selection_prompt"]
124
 
125
 
126
  def test_on_reference_action_uses_configured_action_text_override(monkeypatch, reload_module):
 
148
  assert coords_text == config.UI_TEXT["coords"]["not_needed"]
149
  assert log_html == config.UI_TEXT["log"]["reference_action_message"].format(
150
  option_label="a",
151
+ option_action="move forward",
152
  )
gradio-web/test/test_ui_native_layout_contract.py CHANGED
@@ -45,6 +45,20 @@ def test_native_ui_css_excludes_header_title_from_global_font_size(reload_module
45
  assert "font-size: var(--text-xxl) !important;" in ui_layout.CSS
46
 
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  def test_extract_last_goal_prefers_last_list_item(reload_module):
49
  ui_layout = reload_module("ui_layout")
50
 
 
45
  assert "font-size: var(--text-xxl) !important;" in ui_layout.CSS
46
 
47
 
48
+ def test_native_ui_css_highlights_media_card_not_live_obs_transform(reload_module):
49
+ ui_layout = reload_module("ui_layout")
50
+
51
+ css = ui_layout.CSS
52
+
53
+ assert "#media_card::after" in css
54
+ assert "#media_card:has(#live_obs.live-obs-keypoint-waiting)::after" in css
55
+ assert "animation: media-card-keypoint-ring 1.2s ease-in-out infinite;" in css
56
+ assert "@keyframes media-card-keypoint-ring" in css
57
+ assert "#live_obs.live-obs-keypoint-waiting .image-frame" not in css
58
+ assert "#live_obs.live-obs-keypoint-waiting .upload-container" not in css
59
+ assert "transform: scale(" not in css
60
+
61
+
62
  def test_extract_last_goal_prefers_last_list_item(reload_module):
63
  ui_layout = reload_module("ui_layout")
64
 
gradio-web/test/test_ui_phase_machine_runtime_e2e.py CHANGED
@@ -100,6 +100,68 @@ def _read_coords_box_value(page) -> str | None:
100
  )
101
 
102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  def _read_phase_visibility(page) -> dict[str, bool | str | None]:
104
  return page.evaluate(
105
  """() => {
@@ -986,6 +1048,207 @@ def test_no_video_task_hides_manual_demo_button(monkeypatch):
986
  demo.close()
987
 
988
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
989
  def test_live_obs_client_resize_fills_width_and_keeps_click_mapping(monkeypatch):
990
  callbacks = importlib.reload(importlib.import_module("gradio_callbacks"))
991
  ui_layout = importlib.reload(importlib.import_module("ui_layout"))
 
100
  )
101
 
102
 
103
+ def _read_log_output_value(page) -> str | None:
104
+ return page.evaluate(
105
+ """() => {
106
+ const root = document.getElementById('log_output');
107
+ if (!root) return null;
108
+ const field = root.querySelector('textarea, input');
109
+ if (field && typeof field.value === 'string') {
110
+ const value = field.value.trim();
111
+ return value || null;
112
+ }
113
+ const value = (root.textContent || '').trim();
114
+ return value || null;
115
+ }"""
116
+ )
117
+
118
+
119
+ def _read_elem_classes(page, elem_id: str) -> list[str] | None:
120
+ return page.evaluate(
121
+ """(elemId) => {
122
+ const root = document.getElementById(elemId);
123
+ return root ? Array.from(root.classList) : null;
124
+ }""",
125
+ elem_id,
126
+ )
127
+
128
+
129
+ def _read_media_card_wait_snapshot(page) -> dict[str, str | float | None]:
130
+ return page.evaluate(
131
+ """() => {
132
+ const card = document.getElementById('media_card');
133
+ if (!card) {
134
+ return {
135
+ opacity: null,
136
+ borderColor: null,
137
+ boxShadow: null,
138
+ animationName: null,
139
+ };
140
+ }
141
+ const style = getComputedStyle(card, '::after');
142
+ return {
143
+ opacity: Number.parseFloat(style.opacity || '0'),
144
+ borderColor: style.borderColor || null,
145
+ boxShadow: style.boxShadow || null,
146
+ animationName: style.animationName || null,
147
+ };
148
+ }"""
149
+ )
150
+
151
+
152
+ def _read_live_obs_transform_snapshot(page) -> dict[str, str | None]:
153
+ return page.evaluate(
154
+ """() => {
155
+ const img = document.querySelector('#live_obs img');
156
+ const frame = document.querySelector('#live_obs .image-frame');
157
+ return {
158
+ imgTransform: img ? getComputedStyle(img).transform : null,
159
+ frameTransform: frame ? getComputedStyle(frame).transform : null,
160
+ };
161
+ }"""
162
+ )
163
+
164
+
165
  def _read_phase_visibility(page) -> dict[str, bool | str | None]:
166
  return page.evaluate(
167
  """() => {
 
1048
  demo.close()
1049
 
1050
 
1051
+ def test_keypoint_wait_state_pulses_live_obs_and_updates_system_log(monkeypatch):
1052
+ config_module = importlib.reload(importlib.import_module("config"))
1053
+ callbacks = importlib.reload(importlib.import_module("gradio_callbacks"))
1054
+ ui_layout = importlib.reload(importlib.import_module("ui_layout"))
1055
+
1056
+ fake_obs = np.zeros((24, 48, 3), dtype=np.uint8)
1057
+ fake_obs[:, :] = [15, 20, 25]
1058
+ fake_obs_img = Image.fromarray(fake_obs)
1059
+
1060
+ class FakeSession:
1061
+ raw_solve_options = [{"available": [object()]}, {"available": False}]
1062
+
1063
+ def get_pil_image(self, use_segmented=False):
1064
+ _ = use_segmented
1065
+ return fake_obs_img.copy()
1066
+
1067
+ def fake_init_app(_request=None):
1068
+ return (
1069
+ "uid-keypoint-wait",
1070
+ gr.update(visible=True), # main_interface
1071
+ gr.update(
1072
+ value=fake_obs_img.copy(),
1073
+ interactive=False,
1074
+ elem_classes=config_module.get_live_obs_elem_classes(),
1075
+ ), # img_display
1076
+ config_module.UI_TEXT["log"]["action_selection_prompt"], # log_output
1077
+ gr.update(choices=[("pick", 0), ("skip", 1)], value=None), # options_radio
1078
+ "goal", # goal_box
1079
+ gr.update(
1080
+ value=config_module.UI_TEXT["coords"]["not_needed"],
1081
+ visible=True,
1082
+ interactive=False,
1083
+ ), # coords_box
1084
+ gr.update(value=None, visible=False), # video_display
1085
+ gr.update(visible=False, interactive=False), # watch_demo_video_btn
1086
+ "KeypointEnv (Episode 1)", # task_info_box
1087
+ "Completed: 0", # progress_info_box
1088
+ gr.update(interactive=True), # restart_episode_btn
1089
+ gr.update(interactive=True), # next_task_btn
1090
+ gr.update(interactive=True), # exec_btn
1091
+ gr.update(visible=False), # video_phase_group
1092
+ gr.update(visible=True), # action_phase_group
1093
+ gr.update(visible=True), # control_panel_group
1094
+ gr.update(value="hint"), # task_hint_display
1095
+ gr.update(visible=False), # loading_overlay
1096
+ gr.update(interactive=True), # reference_action_btn
1097
+ )
1098
+
1099
+ monkeypatch.setattr(ui_layout, "init_app", fake_init_app)
1100
+ monkeypatch.setattr(callbacks, "get_session", lambda uid: FakeSession())
1101
+ monkeypatch.setattr(callbacks, "update_session_activity", lambda uid: None)
1102
+
1103
+ demo = ui_layout.create_ui_blocks()
1104
+
1105
+ port = _free_port()
1106
+ host = "127.0.0.1"
1107
+ root_url = f"http://{host}:{port}/"
1108
+
1109
+ app = FastAPI(title="keypoint-wait-state-test")
1110
+ app = gr.mount_gradio_app(app, demo, path="/")
1111
+
1112
+ config = uvicorn.Config(app, host=host, port=port, log_level="error")
1113
+ server = uvicorn.Server(config)
1114
+ thread = threading.Thread(target=server.run, daemon=True)
1115
+ thread.start()
1116
+ _wait_http_ready(root_url)
1117
+
1118
+ try:
1119
+ with sync_playwright() as p:
1120
+ browser = p.chromium.launch(headless=True)
1121
+ page = browser.new_page(viewport={"width": 1280, "height": 900})
1122
+ page.goto(root_url, wait_until="domcontentloaded")
1123
+ page.add_style_tag(content=ui_layout.CSS)
1124
+ page.wait_for_selector("#main_interface_root", state="visible", timeout=15000)
1125
+ page.wait_for_selector("#live_obs img", timeout=15000)
1126
+
1127
+ initial_classes = _read_elem_classes(page, "live_obs")
1128
+ assert initial_classes is not None
1129
+ assert config_module.LIVE_OBS_KEYPOINT_WAIT_CLASS not in initial_classes
1130
+ assert _read_log_output_value(page) == config_module.UI_TEXT["log"]["action_selection_prompt"]
1131
+ initial_card_wait = _read_media_card_wait_snapshot(page)
1132
+ initial_transforms = _read_live_obs_transform_snapshot(page)
1133
+ initial_img_box = page.locator("#live_obs img").bounding_box()
1134
+ initial_frame_box = page.locator("#live_obs .image-frame").bounding_box()
1135
+ assert initial_card_wait["opacity"] == 0
1136
+ assert initial_card_wait["animationName"] == "none"
1137
+ assert initial_transforms["imgTransform"] == "none"
1138
+ assert initial_transforms["frameTransform"] == "none"
1139
+ assert initial_img_box is not None
1140
+ assert initial_frame_box is not None
1141
+
1142
+ page.locator("#action_radio input[type='radio']").first.check(force=True)
1143
+
1144
+ page.wait_for_function(
1145
+ """(state) => {
1146
+ const liveObs = document.getElementById('live_obs');
1147
+ const coordsRoot = document.getElementById('coords_box');
1148
+ const coordsField = coordsRoot?.querySelector('textarea, input');
1149
+ const logRoot = document.getElementById('log_output');
1150
+ const logField = logRoot?.querySelector('textarea, input');
1151
+ const mediaCard = document.getElementById('media_card');
1152
+ const mediaAfter = mediaCard ? getComputedStyle(mediaCard, '::after') : null;
1153
+ const coordsValue = coordsField ? coordsField.value.trim() : '';
1154
+ const logValue = logField ? logField.value.trim() : (logRoot?.textContent || '').trim();
1155
+ return (
1156
+ !!liveObs &&
1157
+ liveObs.classList.contains(state.waitClass) &&
1158
+ !!mediaAfter &&
1159
+ Number.parseFloat(mediaAfter.opacity || '0') > 0.5 &&
1160
+ mediaAfter.animationName === state.cardAnimation &&
1161
+ coordsValue === state.coordsPrompt &&
1162
+ logValue === state.waitLog
1163
+ );
1164
+ }""",
1165
+ arg={
1166
+ "cardAnimation": "media-card-keypoint-ring",
1167
+ "waitClass": config_module.LIVE_OBS_KEYPOINT_WAIT_CLASS,
1168
+ "coordsPrompt": config_module.UI_TEXT["coords"]["select_keypoint"],
1169
+ "waitLog": config_module.UI_TEXT["log"]["keypoint_selection_prompt"],
1170
+ },
1171
+ timeout=5000,
1172
+ )
1173
+
1174
+ wait_classes = _read_elem_classes(page, "live_obs")
1175
+ assert wait_classes is not None
1176
+ assert config_module.LIVE_OBS_KEYPOINT_WAIT_CLASS in wait_classes
1177
+ assert _read_coords_box_value(page) == config_module.UI_TEXT["coords"]["select_keypoint"]
1178
+ assert _read_log_output_value(page) == config_module.UI_TEXT["log"]["keypoint_selection_prompt"]
1179
+ wait_card = _read_media_card_wait_snapshot(page)
1180
+ wait_transforms = _read_live_obs_transform_snapshot(page)
1181
+ wait_img_box = page.locator("#live_obs img").bounding_box()
1182
+ wait_frame_box = page.locator("#live_obs .image-frame").bounding_box()
1183
+ assert wait_card["opacity"] is not None and wait_card["opacity"] > 0.5
1184
+ assert wait_card["animationName"] == "media-card-keypoint-ring"
1185
+ assert wait_card["borderColor"] != "rgba(225, 29, 72, 0)"
1186
+ assert wait_transforms["imgTransform"] == "none"
1187
+ assert wait_transforms["frameTransform"] == "none"
1188
+ assert wait_img_box is not None
1189
+ assert wait_frame_box is not None
1190
+ assert wait_img_box["x"] == pytest.approx(initial_img_box["x"], abs=1.0)
1191
+ assert wait_img_box["y"] == pytest.approx(initial_img_box["y"], abs=1.0)
1192
+ assert wait_img_box["width"] == pytest.approx(initial_img_box["width"], abs=1.0)
1193
+ assert wait_img_box["height"] == pytest.approx(initial_img_box["height"], abs=1.0)
1194
+ assert wait_frame_box["x"] == pytest.approx(initial_frame_box["x"], abs=1.0)
1195
+ assert wait_frame_box["y"] == pytest.approx(initial_frame_box["y"], abs=1.0)
1196
+ assert wait_frame_box["width"] == pytest.approx(initial_frame_box["width"], abs=1.0)
1197
+ assert wait_frame_box["height"] == pytest.approx(initial_frame_box["height"], abs=1.0)
1198
+
1199
+ box = page.locator("#live_obs img").bounding_box()
1200
+ assert box is not None
1201
+ target_x = box["x"] + ((24.5) / 48.0) * box["width"]
1202
+ target_y = box["y"] + ((8.5) / 24.0) * box["height"]
1203
+ page.mouse.click(target_x, target_y)
1204
+
1205
+ page.wait_for_function(
1206
+ """(state) => {
1207
+ const liveObs = document.getElementById('live_obs');
1208
+ const coordsRoot = document.getElementById('coords_box');
1209
+ const coordsField = coordsRoot?.querySelector('textarea, input');
1210
+ const logRoot = document.getElementById('log_output');
1211
+ const logField = logRoot?.querySelector('textarea, input');
1212
+ const coordsValue = coordsField ? coordsField.value.trim() : '';
1213
+ const logValue = logField ? logField.value.trim() : (logRoot?.textContent || '').trim();
1214
+ return (
1215
+ !!liveObs &&
1216
+ !liveObs.classList.contains(state.waitClass) &&
1217
+ /^\\d+\\s*,\\s*\\d+$/.test(coordsValue) &&
1218
+ logValue === state.actionLog
1219
+ );
1220
+ }""",
1221
+ arg={
1222
+ "waitClass": config_module.LIVE_OBS_KEYPOINT_WAIT_CLASS,
1223
+ "actionLog": config_module.UI_TEXT["log"]["action_selection_prompt"],
1224
+ },
1225
+ timeout=5000,
1226
+ )
1227
+
1228
+ coords_value = _read_coords_box_value(page)
1229
+ assert coords_value is not None
1230
+ coord_x, coord_y = [int(part.strip()) for part in coords_value.split(",", 1)]
1231
+ assert abs(coord_x - 24) <= 1
1232
+ assert abs(coord_y - 8) <= 1
1233
+ final_classes = _read_elem_classes(page, "live_obs")
1234
+ assert final_classes is not None
1235
+ assert config_module.LIVE_OBS_KEYPOINT_WAIT_CLASS not in final_classes
1236
+ assert config_module.LIVE_OBS_BASE_CLASS in final_classes
1237
+ assert _read_log_output_value(page) == config_module.UI_TEXT["log"]["action_selection_prompt"]
1238
+ final_card_wait = _read_media_card_wait_snapshot(page)
1239
+ final_transforms = _read_live_obs_transform_snapshot(page)
1240
+ assert final_card_wait["opacity"] == 0
1241
+ assert final_card_wait["animationName"] == "none"
1242
+ assert final_transforms["imgTransform"] == "none"
1243
+ assert final_transforms["frameTransform"] == "none"
1244
+
1245
+ browser.close()
1246
+ finally:
1247
+ server.should_exit = True
1248
+ thread.join(timeout=10)
1249
+ demo.close()
1250
+
1251
+
1252
  def test_live_obs_client_resize_fills_width_and_keeps_click_mapping(monkeypatch):
1253
  callbacks = importlib.reload(importlib.import_module("gradio_callbacks"))
1254
  ui_layout = importlib.reload(importlib.import_module("ui_layout"))
gradio-web/test/test_ui_text_config.py CHANGED
@@ -1,6 +1,7 @@
1
  from __future__ import annotations
2
 
3
  import pytest
 
4
 
5
 
6
  class _FakeOptionSession:
@@ -8,6 +9,10 @@ class _FakeOptionSession:
8
  self.env_id = env_id
9
  self.raw_solve_options = raw_solve_options or [{"available": True}]
10
 
 
 
 
 
11
 
12
  class _FakeLoadSession:
13
  def __init__(self, env_id, available_options, raw_solve_options, demonstration_frames=None, language_goal=""):
@@ -25,18 +30,25 @@ class _FakeLoadSession:
25
  return "IMG"
26
 
27
 
28
- def test_on_option_select_uses_configured_select_keypoint_message(monkeypatch, reload_module):
29
  reload_module("config")
30
  callbacks = reload_module("gradio_callbacks")
31
 
32
  monkeypatch.setitem(callbacks.UI_TEXT["coords"], "select_keypoint", "pick a point from config")
 
 
 
 
 
33
  monkeypatch.setattr(callbacks, "update_session_activity", lambda uid: None)
34
  monkeypatch.setattr(callbacks, "get_session", lambda uid: _FakeOptionSession())
35
 
36
- coords_text, img_update = callbacks.on_option_select("uid-1", 0, None)
37
 
38
  assert coords_text == "pick a point from config"
39
  assert img_update.get("interactive") is True
 
 
40
 
41
 
42
  def test_precheck_execute_inputs_uses_configured_before_execute_message(monkeypatch, reload_module):
@@ -101,12 +113,14 @@ def test_missing_session_paths_use_configured_session_error(monkeypatch, reload_
101
  monkeypatch.setattr(callbacks, "get_session", lambda uid: None)
102
 
103
  _img, _option_update, coords_text, log_text = callbacks.on_reference_action("uid-missing")
104
- map_img, map_text = callbacks.on_map_click("uid-missing", None, None)
105
 
106
  assert coords_text == callbacks.UI_TEXT["coords"]["not_needed"]
107
  assert log_text == "Session Error From Config"
108
- assert map_img is None
109
- assert map_text == "Session Error From Config"
 
 
110
 
111
 
112
  def test_get_ui_action_text_uses_configured_overrides_and_fallback(reload_module):
 
1
  from __future__ import annotations
2
 
3
  import pytest
4
+ from PIL import Image
5
 
6
 
7
  class _FakeOptionSession:
 
9
  self.env_id = env_id
10
  self.raw_solve_options = raw_solve_options or [{"available": True}]
11
 
12
+ def get_pil_image(self, use_segmented=False):
13
+ _ = use_segmented
14
+ return Image.new("RGB", (8, 8), color=(0, 0, 0))
15
+
16
 
17
  class _FakeLoadSession:
18
  def __init__(self, env_id, available_options, raw_solve_options, demonstration_frames=None, language_goal=""):
 
30
  return "IMG"
31
 
32
 
33
+ def test_on_option_select_uses_configured_select_keypoint_and_log_messages(monkeypatch, reload_module):
34
  reload_module("config")
35
  callbacks = reload_module("gradio_callbacks")
36
 
37
  monkeypatch.setitem(callbacks.UI_TEXT["coords"], "select_keypoint", "pick a point from config")
38
+ monkeypatch.setitem(
39
+ callbacks.UI_TEXT["log"],
40
+ "keypoint_selection_prompt",
41
+ "custom log prompt from config",
42
+ )
43
  monkeypatch.setattr(callbacks, "update_session_activity", lambda uid: None)
44
  monkeypatch.setattr(callbacks, "get_session", lambda uid: _FakeOptionSession())
45
 
46
+ coords_text, img_update, log_text = callbacks.on_option_select("uid-1", 0, None)
47
 
48
  assert coords_text == "pick a point from config"
49
  assert img_update.get("interactive") is True
50
+ assert callbacks.get_live_obs_elem_classes(waiting_for_keypoint=True) == img_update.get("elem_classes")
51
+ assert log_text == "custom log prompt from config"
52
 
53
 
54
  def test_precheck_execute_inputs_uses_configured_before_execute_message(monkeypatch, reload_module):
 
113
  monkeypatch.setattr(callbacks, "get_session", lambda uid: None)
114
 
115
  _img, _option_update, coords_text, log_text = callbacks.on_reference_action("uid-missing")
116
+ map_img, map_coords, map_log = callbacks.on_map_click("uid-missing", None, None)
117
 
118
  assert coords_text == callbacks.UI_TEXT["coords"]["not_needed"]
119
  assert log_text == "Session Error From Config"
120
+ assert map_img.get("__type__") == "update"
121
+ assert map_img.get("value") is None
122
+ assert map_coords == callbacks.UI_TEXT["coords"]["not_needed"]
123
+ assert map_log == "Session Error From Config"
124
 
125
 
126
  def test_get_ui_action_text_uses_configured_overrides_and_fallback(reload_module):
gradio-web/ui_layout.py CHANGED
@@ -10,11 +10,14 @@ import gradio as gr
10
 
11
  from config import (
12
  CONTROL_PANEL_SCALE,
 
 
13
  LIVE_OBS_REFRESH_HZ,
14
  KEYPOINT_SELECTION_SCALE,
15
  RIGHT_TOP_ACTION_SCALE,
16
  RIGHT_TOP_LOG_SCALE,
17
  UI_GLOBAL_FONT_SIZE,
 
18
  )
19
  from gradio_callbacks import (
20
  execute_step,
@@ -299,6 +302,60 @@ button#reference_action_btn:not(:disabled):hover {{
299
  button#watch_demo_video_btn {{
300
  width: 100%;
301
  }}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
302
  """
303
 
304
 
@@ -439,7 +496,7 @@ def create_ui_blocks():
439
  interactive=False,
440
  type="pil",
441
  elem_id="live_obs",
442
- elem_classes=["live-obs-resizable"],
443
  show_label=True,
444
  buttons=[],
445
  sources=[],
@@ -739,13 +796,13 @@ def create_ui_blocks():
739
  img_display.select(
740
  fn=on_map_click,
741
  inputs=[uid_state, options_radio],
742
- outputs=[img_display, coords_box],
743
  )
744
 
745
  options_radio.change(
746
  fn=on_option_select,
747
  inputs=[uid_state, options_radio, coords_box],
748
- outputs=[coords_box, img_display],
749
  )
750
 
751
  watch_demo_video_btn.click(
 
10
 
11
  from config import (
12
  CONTROL_PANEL_SCALE,
13
+ LIVE_OBS_BASE_CLASS,
14
+ LIVE_OBS_KEYPOINT_WAIT_CLASS,
15
  LIVE_OBS_REFRESH_HZ,
16
  KEYPOINT_SELECTION_SCALE,
17
  RIGHT_TOP_ACTION_SCALE,
18
  RIGHT_TOP_LOG_SCALE,
19
  UI_GLOBAL_FONT_SIZE,
20
+ get_live_obs_elem_classes,
21
  )
22
  from gradio_callbacks import (
23
  execute_step,
 
302
  button#watch_demo_video_btn {{
303
  width: 100%;
304
  }}
305
+
306
+ #media_card {{
307
+ position: relative;
308
+ border-radius: 24px;
309
+ overflow: visible;
310
+ }}
311
+
312
+ #media_card::after {{
313
+ content: "";
314
+ position: absolute;
315
+ inset: -8px;
316
+ border-radius: 30px;
317
+ border: 3px solid rgba(225, 29, 72, 0.00);
318
+ box-shadow: 0 0 0 0 rgba(225, 29, 72, 0.00);
319
+ opacity: 0;
320
+ pointer-events: none;
321
+ transition: opacity 180ms ease, border-color 180ms ease, box-shadow 180ms ease;
322
+ z-index: 0;
323
+ }}
324
+
325
+ @keyframes media-card-keypoint-ring {{
326
+ 0% {{
327
+ box-shadow: 0 0 0 0 rgba(225, 29, 72, 0.00);
328
+ border-color: rgba(225, 29, 72, 0.72);
329
+ opacity: 0.72;
330
+ }}
331
+ 18% {{
332
+ box-shadow: 0 0 0 4px rgba(225, 29, 72, 0.28);
333
+ border-color: rgba(225, 29, 72, 0.94);
334
+ opacity: 1;
335
+ }}
336
+ 36% {{
337
+ box-shadow: 0 0 0 10px rgba(225, 29, 72, 0.12);
338
+ border-color: rgba(225, 29, 72, 0.88);
339
+ opacity: 0.94;
340
+ }}
341
+ 62% {{
342
+ box-shadow: 0 0 0 2px rgba(225, 29, 72, 0.18);
343
+ border-color: rgba(225, 29, 72, 0.96);
344
+ opacity: 1;
345
+ }}
346
+ 100% {{
347
+ box-shadow: 0 0 0 0 rgba(225, 29, 72, 0.00);
348
+ border-color: rgba(225, 29, 72, 0.72);
349
+ opacity: 0.72;
350
+ }}
351
+ }}
352
+
353
+ #media_card:has(#live_obs.{LIVE_OBS_KEYPOINT_WAIT_CLASS})::after {{
354
+ border-color: rgba(225, 29, 72, 0.94);
355
+ box-shadow: 0 0 0 0 rgba(225, 29, 72, 0.00);
356
+ opacity: 1;
357
+ animation: media-card-keypoint-ring 1.2s ease-in-out infinite;
358
+ }}
359
  """
360
 
361
 
 
496
  interactive=False,
497
  type="pil",
498
  elem_id="live_obs",
499
+ elem_classes=get_live_obs_elem_classes(),
500
  show_label=True,
501
  buttons=[],
502
  sources=[],
 
796
  img_display.select(
797
  fn=on_map_click,
798
  inputs=[uid_state, options_radio],
799
+ outputs=[img_display, coords_box, log_output],
800
  )
801
 
802
  options_radio.change(
803
  fn=on_option_select,
804
  inputs=[uid_state, options_radio, coords_box],
805
+ outputs=[coords_box, img_display, log_output],
806
  )
807
 
808
  watch_demo_video_btn.click(