Spaces:

HongzeFu
/

RoboMME

Running on T4

App Files Files Community

HongzeFu commited on 4 days ago

Commit

6a155d5

1 Parent(s): d1f67eb

keypoint change to point

Browse files

Files changed (9) hide show

gradio-web/config.py +11 -11
gradio-web/gradio_callbacks.py +12 -12
gradio-web/test/test_live_obs_refresh.py +1 -1
gradio-web/test/test_precheck_execute_inputs.py +2 -2
gradio-web/test/test_reference_action_callbacks.py +4 -4
gradio-web/test/test_ui_native_layout_contract.py +5 -5
gradio-web/test/test_ui_phase_machine_runtime_e2e.py +29 -29
gradio-web/test/test_ui_text_config.py +8 -8
gradio-web/ui_layout.py +15 -15

gradio-web/config.py CHANGED Viewed

@@ -7,8 +7,8 @@ USE_SEGMENTED_VIEW = False  # Set to True to use segmented view, False to use or
 LIVE_OBS_REFRESH_HZ = 30.0  # Live observation refresh frequency in Hz
 KEYFRAME_DOWNSAMPLE_FACTOR = 1  # Keep 1 frame out of every N streamed frames
-# 主界面两列宽度比例 (Keypoint Selection : Right Panel)
-KEYPOINT_SELECTION_SCALE = 1
 CONTROL_PANEL_SCALE = 2
 # 右侧顶部并排比例 (Action Selection : System Log)
@@ -59,8 +59,8 @@ DEMO_VIDEO_ENV_IDS = [
 UI_TEXT = {
     "log": {
-        "action_selection_prompt": "Please select the action in the left 👈,\nSome actions also need to select keypoint",
-        "keypoint_selection_prompt": "Current action needs location input, please click on the image to select key pixel",
         "demo_video_prompt": 'Press "Watch Video Input🎬" to watch a video\nNote: you can only watch the video once',
         "session_error": "Session Error",
         "reference_action_error": "Ground Truth Action Error: {error}",
@@ -73,11 +73,11 @@ UI_TEXT = {
     },
     "coords": {
         "not_needed": "No need for coordinates",
-        "select_keypoint": "Please click the keypoint selection image",
-        "select_keypoint_before_execute": "Please click the keypoint selection image before execute!",
     },
     "actions": {
-        "keypoint_required_suffix": " 🎯",
     },
     "errors": {
         "load_missing_task": "Error loading task: missing current_task",
@@ -119,13 +119,13 @@ ROUTESTICK_OVERLAY_ACTION_TEXTS = [
 ]
 LIVE_OBS_BASE_CLASS = "live-obs-resizable"
-LIVE_OBS_KEYPOINT_WAIT_CLASS = "live-obs-keypoint-waiting"
-def get_live_obs_elem_classes(waiting_for_keypoint=False):
     classes = [LIVE_OBS_BASE_CLASS]
-    if waiting_for_keypoint:
-        classes.append(LIVE_OBS_KEYPOINT_WAIT_CLASS)
     return classes

 LIVE_OBS_REFRESH_HZ = 30.0  # Live observation refresh frequency in Hz
 KEYFRAME_DOWNSAMPLE_FACTOR = 1  # Keep 1 frame out of every N streamed frames
+# 主界面两列宽度比例 (Point Selection : Right Panel)
+POINT_SELECTION_SCALE = 1
 CONTROL_PANEL_SCALE = 2
 # 右侧顶部并排比例 (Action Selection : System Log)
 UI_TEXT = {
     "log": {
+        "action_selection_prompt": "Please select the action in the left 👈,\nSome actions also need to select a point",
+        "point_selection_prompt": "Current action needs location input, please click on the image to select a point",
         "demo_video_prompt": 'Press "Watch Video Input🎬" to watch a video\nNote: you can only watch the video once',
         "session_error": "Session Error",
         "reference_action_error": "Ground Truth Action Error: {error}",
     },
     "coords": {
         "not_needed": "No need for coordinates",
+        "select_point": "Please click the point selection image",
+        "select_point_before_execute": "Please click the point selection image before execute!",
     },
     "actions": {
+        "point_required_suffix": " 🎯",
     },
     "errors": {
         "load_missing_task": "Error loading task: missing current_task",
 ]
 LIVE_OBS_BASE_CLASS = "live-obs-resizable"
+LIVE_OBS_POINT_WAIT_CLASS = "live-obs-point-waiting"
+def get_live_obs_elem_classes(waiting_for_point=False):
     classes = [LIVE_OBS_BASE_CLASS]
+    if waiting_for_point:
+        classes.append(LIVE_OBS_POINT_WAIT_CLASS)
     return classes

gradio-web/gradio_callbacks.py CHANGED Viewed

@@ -67,8 +67,8 @@ def _action_selection_log():
     return format_log_markdown(_ui_text("log", "action_selection_prompt"))
-def _keypoint_selection_log():
-    return format_log_markdown(_ui_text("log", "keypoint_selection_prompt"))
 def _live_obs_update(
@@ -76,10 +76,10 @@ def _live_obs_update(
     value=_LIVE_OBS_UPDATE_SKIP,
     interactive=None,
     visible=None,
-    waiting_for_keypoint=False,
 ):
     kwargs = {
-        "elem_classes": get_live_obs_elem_classes(waiting_for_keypoint=waiting_for_keypoint),
     }
     if value is not _LIVE_OBS_UPDATE_SKIP:
         kwargs["value"] = value
@@ -293,7 +293,7 @@ def on_demo_video_play(uid):
 def switch_to_execute_phase(uid):
-    """Disable controls and keypoint clicking during execute playback."""
     if uid:
         session = get_session(uid)
         base_count = len(getattr(session, "base_frames", []) or []) if session else 0
@@ -596,7 +596,7 @@ def _load_status_task(uid, status):
         if 0 <= opt_idx < len(session.raw_solve_options):
             opt = session.raw_solve_options[opt_idx]
             if opt.get("available"):
-                opt_label_with_hint = f"{opt_label}{_ui_text('actions', 'keypoint_required_suffix')}"
             else:
                 opt_label_with_hint = opt_label
         else:
@@ -840,7 +840,7 @@ def _is_valid_coords_text(coords_text: str) -> bool:
     text = coords_text.strip()
     if text in {
         "",
-        _ui_text("coords", "select_keypoint"),
         _ui_text("coords", "not_needed"),
     }:
         return False
@@ -896,9 +896,9 @@ def on_option_select(uid, option_value, coords_str=None, suppress_next_option_ch
             _is_valid_coords_text(coords_str),
         )
         return (
-            _ui_text("coords", "select_keypoint"),
-            _live_obs_update(value=base_img, interactive=True, waiting_for_keypoint=True),
-            _keypoint_selection_log(),
             False,
         )
@@ -1050,7 +1050,7 @@ def precheck_execute_inputs(uid, option_idx, coords_str):
             parsed_option_idx,
             coords_str,
         )
-        raise gr.Error(_ui_text("coords", "select_keypoint_before_execute"))
     LOGGER.debug(
         "precheck_execute_inputs passed uid=%s option=%s needs_coords=%s",
         _uid_for_log(uid),
@@ -1145,7 +1145,7 @@ def execute_step(uid, option_idx, coords_str):
                 coords_str,
             )
             current_img = session.get_pil_image(use_segmented=USE_SEGMENTED_VIEW)
-            error_msg = _ui_text("coords", "select_keypoint_before_execute")
             return _live_obs_update(value=current_img, interactive=False), format_log_markdown(error_msg), gr.update(), gr.update(), gr.update(interactive=False), gr.update(interactive=True)
     # Parse coords

     return format_log_markdown(_ui_text("log", "action_selection_prompt"))
+def _point_selection_log():
+    return format_log_markdown(_ui_text("log", "point_selection_prompt"))
 def _live_obs_update(
     value=_LIVE_OBS_UPDATE_SKIP,
     interactive=None,
     visible=None,
+    waiting_for_point=False,
 ):
     kwargs = {
+        "elem_classes": get_live_obs_elem_classes(waiting_for_point=waiting_for_point),
     }
     if value is not _LIVE_OBS_UPDATE_SKIP:
         kwargs["value"] = value
 def switch_to_execute_phase(uid):
+    """Disable controls and point clicking during execute playback."""
     if uid:
         session = get_session(uid)
         base_count = len(getattr(session, "base_frames", []) or []) if session else 0
         if 0 <= opt_idx < len(session.raw_solve_options):
             opt = session.raw_solve_options[opt_idx]
             if opt.get("available"):
+                opt_label_with_hint = f"{opt_label}{_ui_text('actions', 'point_required_suffix')}"
             else:
                 opt_label_with_hint = opt_label
         else:
     text = coords_text.strip()
     if text in {
         "",
+        _ui_text("coords", "select_point"),
         _ui_text("coords", "not_needed"),
     }:
         return False
             _is_valid_coords_text(coords_str),
         )
         return (
+            _ui_text("coords", "select_point"),
+            _live_obs_update(value=base_img, interactive=True, waiting_for_point=True),
+            _point_selection_log(),
             False,
         )
             parsed_option_idx,
             coords_str,
         )
+        raise gr.Error(_ui_text("coords", "select_point_before_execute"))
     LOGGER.debug(
         "precheck_execute_inputs passed uid=%s option=%s needs_coords=%s",
         _uid_for_log(uid),
                 coords_str,
             )
             current_img = session.get_pil_image(use_segmented=USE_SEGMENTED_VIEW)
+            error_msg = _ui_text("coords", "select_point_before_execute")
             return _live_obs_update(value=current_img, interactive=False), format_log_markdown(error_msg), gr.update(), gr.update(), gr.update(interactive=False), gr.update(interactive=True)
     # Parse coords

gradio-web/test/test_live_obs_refresh.py CHANGED Viewed

@@ -14,7 +14,7 @@ def test_refresh_live_obs_skips_when_not_execution_phase(monkeypatch, reload_mod
     callbacks = reload_module("gradio_callbacks")
     monkeypatch.setattr(callbacks, "get_session", lambda uid: _FakeSession([]))
-    update = callbacks.refresh_live_obs("uid-1", "action_keypoint")
     assert update.get("__type__") == "update"
     assert "value" not in update

     callbacks = reload_module("gradio_callbacks")
     monkeypatch.setattr(callbacks, "get_session", lambda uid: _FakeSession([]))
+    update = callbacks.refresh_live_obs("uid-1", "action_point")
     assert update.get("__type__") == "update"
     assert "value" not in update

gradio-web/test/test_precheck_execute_inputs.py CHANGED Viewed

@@ -28,10 +28,10 @@ def test_precheck_execute_inputs_requires_coords_when_option_needs_it(monkeypatc
     with pytest.raises(Exception) as excinfo:
         callbacks.precheck_execute_inputs(
-            "uid-1", 0, config.UI_TEXT["coords"]["select_keypoint"]
         )
-    assert config.UI_TEXT["coords"]["select_keypoint_before_execute"] in str(excinfo.value)
 def test_precheck_execute_inputs_accepts_valid_coords(monkeypatch, reload_module):

     with pytest.raises(Exception) as excinfo:
         callbacks.precheck_execute_inputs(
+            "uid-1", 0, config.UI_TEXT["coords"]["select_point"]
         )
+    assert config.UI_TEXT["coords"]["select_point_before_execute"] in str(excinfo.value)
 def test_precheck_execute_inputs_accepts_valid_coords(monkeypatch, reload_module):

gradio-web/test/test_reference_action_callbacks.py CHANGED Viewed

@@ -119,7 +119,7 @@ def test_on_reference_action_same_selected_option_does_not_set_suppression(monke
     assert suppress_flag is False
-def test_on_option_select_resets_to_keypoint_wait_state_for_point_action(monkeypatch, reload_module):
     config = reload_module("config")
     callbacks = reload_module("gradio_callbacks")
@@ -129,10 +129,10 @@ def test_on_option_select_resets_to_keypoint_wait_state_for_point_action(monkeyp
     coords_text, img_update, log_text, suppress_flag = callbacks.on_option_select("uid-1", 0, "12, 34", False)
-    assert coords_text == config.UI_TEXT["coords"]["select_keypoint"]
     assert img_update.get("interactive") is True
-    assert img_update.get("elem_classes") == config.get_live_obs_elem_classes(waiting_for_keypoint=True)
-    assert log_text == config.UI_TEXT["log"]["keypoint_selection_prompt"]
     assert suppress_flag is False

     assert suppress_flag is False
+def test_on_option_select_resets_to_point_wait_state_for_point_action(monkeypatch, reload_module):
     config = reload_module("config")
     callbacks = reload_module("gradio_callbacks")
     coords_text, img_update, log_text, suppress_flag = callbacks.on_option_select("uid-1", 0, "12, 34", False)
+    assert coords_text == config.UI_TEXT["coords"]["select_point"]
     assert img_update.get("interactive") is True
+    assert img_update.get("elem_classes") == config.get_live_obs_elem_classes(waiting_for_point=True)
+    assert log_text == config.UI_TEXT["log"]["point_selection_prompt"]
     assert suppress_flag is False

gradio-web/test/test_ui_native_layout_contract.py CHANGED Viewed

@@ -75,13 +75,13 @@ def test_native_ui_css_highlights_media_card_not_live_obs_transform(reload_modul
     assert "--media-card-radius: 8px;" in css
     assert "#media_card #live_obs button" in css
     assert "#media_card #live_obs img" in css
-    assert "#media_card:has(#live_obs.live-obs-keypoint-waiting)::after" in css
     assert "inset: 0;" in css
     assert "border-radius: inherit;" in css
-    assert "animation: media-card-keypoint-ring 1.2s ease-in-out infinite;" in css
-    assert "@keyframes media-card-keypoint-ring" in css
-    assert "#live_obs.live-obs-keypoint-waiting .image-frame" not in css
-    assert "#live_obs.live-obs-keypoint-waiting .upload-container" not in css
     assert "transform: scale(" not in css

     assert "--media-card-radius: 8px;" in css
     assert "#media_card #live_obs button" in css
     assert "#media_card #live_obs img" in css
+    assert "#media_card:has(#live_obs.live-obs-point-waiting)::after" in css
     assert "inset: 0;" in css
     assert "border-radius: inherit;" in css
+    assert "animation: media-card-point-ring 1.2s ease-in-out infinite;" in css
+    assert "@keyframes media-card-point-ring" in css
+    assert "#live_obs.live-obs-point-waiting .image-frame" not in css
+    assert "#live_obs.live-obs-point-waiting .upload-container" not in css
     assert "transform: scale(" not in css

gradio-web/test/test_ui_phase_machine_runtime_e2e.py CHANGED Viewed

@@ -374,7 +374,7 @@ def phase_machine_ui_url():
             with gr.Column(visible=False, elem_id="control_panel_group") as control_panel_group:
                 options_radio = gr.Radio(choices=[("pick", 0)], value=0, elem_id="action_radio")
-                coords_box = gr.Textbox(value="please click the keypoint selection image", elem_id="coords_box")
                 with gr.Column(visible=False, elem_id="action_buttons_row") as action_buttons_row:
                     exec_btn = gr.Button("EXECUTE", elem_id="exec_btn")
                     reference_action_btn = gr.Button(
@@ -404,7 +404,7 @@ def phase_machine_ui_url():
                 gr.update(visible=False),
                 gr.update(visible=False),
                 gr.update(interactive=False),
-                gr.update(value="please click the keypoint selection image"),
                 "demo_video",
             )
@@ -423,13 +423,13 @@ def phase_machine_ui_url():
                 gr.update(visible=True),
                 gr.update(interactive=True),
                 gr.update(visible=False, interactive=False),
-                "action_keypoint",
             )
         def precheck_fn(_option_idx, _coords):
             state["precheck_calls"] += 1
             if state["precheck_calls"] == 1:
-                raise gr.Error("please click the keypoint selection image before execute!")
         def to_execute_fn():
             return (
@@ -456,7 +456,7 @@ def phase_machine_ui_url():
                 gr.update(interactive=True),
                 gr.update(interactive=True),
                 gr.update(interactive=True),
-                "action_keypoint",
             )
         login_btn.click(
@@ -1230,7 +1230,7 @@ def test_no_video_task_hides_manual_demo_button(monkeypatch):
         demo.close()
-def test_keypoint_wait_state_pulses_live_obs_and_updates_system_log(monkeypatch):
     config_module = importlib.reload(importlib.import_module("config"))
     callbacks = importlib.reload(importlib.import_module("gradio_callbacks"))
     ui_layout = importlib.reload(importlib.import_module("ui_layout"))
@@ -1248,7 +1248,7 @@ def test_keypoint_wait_state_pulses_live_obs_and_updates_system_log(monkeypatch)
     def fake_init_app(_request=None):
         return (
-            "uid-keypoint-wait",
             gr.update(visible=True),  # main_interface
             gr.update(
                 value=fake_obs_img.copy(),
@@ -1265,7 +1265,7 @@ def test_keypoint_wait_state_pulses_live_obs_and_updates_system_log(monkeypatch)
             ),  # coords_box
             gr.update(value=None, visible=False),  # video_display
             gr.update(visible=False, interactive=False),  # watch_demo_video_btn
-            "KeypointEnv (Episode 1)",  # task_info_box
             "Completed: 0",  # progress_info_box
             gr.update(interactive=True),  # restart_episode_btn
             gr.update(interactive=True),  # next_task_btn
@@ -1288,7 +1288,7 @@ def test_keypoint_wait_state_pulses_live_obs_and_updates_system_log(monkeypatch)
     host = "127.0.0.1"
     root_url = f"http://{host}:{port}/"
-    app = FastAPI(title="keypoint-wait-state-test")
     app = gr.mount_gradio_app(app, demo, path="/")
     config = uvicorn.Config(app, host=host, port=port, log_level="error")
@@ -1308,7 +1308,7 @@ def test_keypoint_wait_state_pulses_live_obs_and_updates_system_log(monkeypatch)
             initial_classes = _read_elem_classes(page, "live_obs")
             assert initial_classes is not None
-            assert config_module.LIVE_OBS_KEYPOINT_WAIT_CLASS not in initial_classes
             assert _read_log_output_value(page) == config_module.UI_TEXT["log"]["action_selection_prompt"]
             initial_card_wait = _read_media_card_wait_snapshot(page)
             initial_transforms = _read_live_obs_transform_snapshot(page)
@@ -1345,25 +1345,25 @@ def test_keypoint_wait_state_pulses_live_obs_and_updates_system_log(monkeypatch)
                     );
                 }""",
                 arg={
-                    "cardAnimation": "media-card-keypoint-ring",
-                    "waitClass": config_module.LIVE_OBS_KEYPOINT_WAIT_CLASS,
-                    "coordsPrompt": config_module.UI_TEXT["coords"]["select_keypoint"],
-                    "waitLog": config_module.UI_TEXT["log"]["keypoint_selection_prompt"],
                 },
                 timeout=5000,
             )
             wait_classes = _read_elem_classes(page, "live_obs")
             assert wait_classes is not None
-            assert config_module.LIVE_OBS_KEYPOINT_WAIT_CLASS in wait_classes
-            assert _read_coords_box_value(page) == config_module.UI_TEXT["coords"]["select_keypoint"]
-            assert _read_log_output_value(page) == config_module.UI_TEXT["log"]["keypoint_selection_prompt"]
             wait_card = _read_media_card_wait_snapshot(page)
             wait_transforms = _read_live_obs_transform_snapshot(page)
             wait_img_box = page.locator("#live_obs img").bounding_box()
             wait_frame_box = page.locator("#live_obs .image-frame").bounding_box()
             assert wait_card["opacity"] is not None and wait_card["opacity"] > 0.5
-            assert wait_card["animationName"] == "media-card-keypoint-ring"
             assert wait_card["borderColor"] != "rgba(225, 29, 72, 0)"
             assert wait_transforms["imgTransform"] == "none"
             assert wait_transforms["frameTransform"] == "none"
@@ -1401,7 +1401,7 @@ def test_keypoint_wait_state_pulses_live_obs_and_updates_system_log(monkeypatch)
                     );
                 }""",
                 arg={
-                    "waitClass": config_module.LIVE_OBS_KEYPOINT_WAIT_CLASS,
                     "actionLog": config_module.UI_TEXT["log"]["action_selection_prompt"],
                 },
                 timeout=5000,
@@ -1414,7 +1414,7 @@ def test_keypoint_wait_state_pulses_live_obs_and_updates_system_log(monkeypatch)
             assert abs(coord_y - 8) <= 1
             final_classes = _read_elem_classes(page, "live_obs")
             assert final_classes is not None
-            assert config_module.LIVE_OBS_KEYPOINT_WAIT_CLASS not in final_classes
             assert config_module.LIVE_OBS_BASE_CLASS in final_classes
             assert _read_log_output_value(page) == config_module.UI_TEXT["log"]["action_selection_prompt"]
             final_card_wait = _read_media_card_wait_snapshot(page)
@@ -1562,14 +1562,14 @@ def test_reference_action_single_click_applies_coords_without_wait_state(monkeyp
                     "checkedValue": "0",
                     "coordsValue": "5, 6",
                     "logValue": expected_reference_log,
-                    "waitClass": config_module.LIVE_OBS_KEYPOINT_WAIT_CLASS,
                 },
                 timeout=5000,
             )
             classes_after_reference = _read_elem_classes(page, "live_obs")
             assert classes_after_reference is not None
-            assert config_module.LIVE_OBS_KEYPOINT_WAIT_CLASS not in classes_after_reference
             assert _read_coords_box_value(page) == "5, 6"
             assert _read_log_output_value(page) == expected_reference_log
@@ -1595,18 +1595,18 @@ def test_reference_action_single_click_applies_coords_without_wait_state(monkeyp
                 }""",
                 arg={
                     "checkedValue": "1",
-                    "coordsValue": config_module.UI_TEXT["coords"]["select_keypoint"],
-                    "logValue": config_module.UI_TEXT["log"]["keypoint_selection_prompt"],
-                    "waitClass": config_module.LIVE_OBS_KEYPOINT_WAIT_CLASS,
                 },
                 timeout=5000,
             )
             classes_after_manual_change = _read_elem_classes(page, "live_obs")
             assert classes_after_manual_change is not None
-            assert config_module.LIVE_OBS_KEYPOINT_WAIT_CLASS in classes_after_manual_change
-            assert _read_coords_box_value(page) == config_module.UI_TEXT["coords"]["select_keypoint"]
-            assert _read_log_output_value(page) == config_module.UI_TEXT["log"]["keypoint_selection_prompt"]
             browser.close()
     finally:
@@ -1638,7 +1638,7 @@ def test_live_obs_client_resize_fills_width_and_keeps_click_mapping(monkeypatch)
             gr.update(choices=[("pick", 0)], value=0),  # options_radio
             "goal",  # goal_box
             gr.update(
-                value="please click the keypoint selection image",
                 visible=True,
                 interactive=False,
             ),  # coords_box

             with gr.Column(visible=False, elem_id="control_panel_group") as control_panel_group:
                 options_radio = gr.Radio(choices=[("pick", 0)], value=0, elem_id="action_radio")
+                coords_box = gr.Textbox(value="please click the point selection image", elem_id="coords_box")
                 with gr.Column(visible=False, elem_id="action_buttons_row") as action_buttons_row:
                     exec_btn = gr.Button("EXECUTE", elem_id="exec_btn")
                     reference_action_btn = gr.Button(
                 gr.update(visible=False),
                 gr.update(visible=False),
                 gr.update(interactive=False),
+                gr.update(value="please click the point selection image"),
                 "demo_video",
             )
                 gr.update(visible=True),
                 gr.update(interactive=True),
                 gr.update(visible=False, interactive=False),
+                "action_point",
             )
         def precheck_fn(_option_idx, _coords):
             state["precheck_calls"] += 1
             if state["precheck_calls"] == 1:
+                raise gr.Error("please click the point selection image before execute!")
         def to_execute_fn():
             return (
                 gr.update(interactive=True),
                 gr.update(interactive=True),
                 gr.update(interactive=True),
+                "action_point",
             )
         login_btn.click(
         demo.close()
+def test_point_wait_state_pulses_live_obs_and_updates_system_log(monkeypatch):
     config_module = importlib.reload(importlib.import_module("config"))
     callbacks = importlib.reload(importlib.import_module("gradio_callbacks"))
     ui_layout = importlib.reload(importlib.import_module("ui_layout"))
     def fake_init_app(_request=None):
         return (
+            "uid-point-wait",
             gr.update(visible=True),  # main_interface
             gr.update(
                 value=fake_obs_img.copy(),
             ),  # coords_box
             gr.update(value=None, visible=False),  # video_display
             gr.update(visible=False, interactive=False),  # watch_demo_video_btn
+            "PointEnv (Episode 1)",  # task_info_box
             "Completed: 0",  # progress_info_box
             gr.update(interactive=True),  # restart_episode_btn
             gr.update(interactive=True),  # next_task_btn
     host = "127.0.0.1"
     root_url = f"http://{host}:{port}/"
+    app = FastAPI(title="point-wait-state-test")
     app = gr.mount_gradio_app(app, demo, path="/")
     config = uvicorn.Config(app, host=host, port=port, log_level="error")
             initial_classes = _read_elem_classes(page, "live_obs")
             assert initial_classes is not None
+            assert config_module.LIVE_OBS_POINT_WAIT_CLASS not in initial_classes
             assert _read_log_output_value(page) == config_module.UI_TEXT["log"]["action_selection_prompt"]
             initial_card_wait = _read_media_card_wait_snapshot(page)
             initial_transforms = _read_live_obs_transform_snapshot(page)
                     );
                 }""",
                 arg={
+                    "cardAnimation": "media-card-point-ring",
+                    "waitClass": config_module.LIVE_OBS_POINT_WAIT_CLASS,
+                    "coordsPrompt": config_module.UI_TEXT["coords"]["select_point"],
+                    "waitLog": config_module.UI_TEXT["log"]["point_selection_prompt"],
                 },
                 timeout=5000,
             )
             wait_classes = _read_elem_classes(page, "live_obs")
             assert wait_classes is not None
+            assert config_module.LIVE_OBS_POINT_WAIT_CLASS in wait_classes
+            assert _read_coords_box_value(page) == config_module.UI_TEXT["coords"]["select_point"]
+            assert _read_log_output_value(page) == config_module.UI_TEXT["log"]["point_selection_prompt"]
             wait_card = _read_media_card_wait_snapshot(page)
             wait_transforms = _read_live_obs_transform_snapshot(page)
             wait_img_box = page.locator("#live_obs img").bounding_box()
             wait_frame_box = page.locator("#live_obs .image-frame").bounding_box()
             assert wait_card["opacity"] is not None and wait_card["opacity"] > 0.5
+            assert wait_card["animationName"] == "media-card-point-ring"
             assert wait_card["borderColor"] != "rgba(225, 29, 72, 0)"
             assert wait_transforms["imgTransform"] == "none"
             assert wait_transforms["frameTransform"] == "none"
                     );
                 }""",
                 arg={
+                    "waitClass": config_module.LIVE_OBS_POINT_WAIT_CLASS,
                     "actionLog": config_module.UI_TEXT["log"]["action_selection_prompt"],
                 },
                 timeout=5000,
             assert abs(coord_y - 8) <= 1
             final_classes = _read_elem_classes(page, "live_obs")
             assert final_classes is not None
+            assert config_module.LIVE_OBS_POINT_WAIT_CLASS not in final_classes
             assert config_module.LIVE_OBS_BASE_CLASS in final_classes
             assert _read_log_output_value(page) == config_module.UI_TEXT["log"]["action_selection_prompt"]
             final_card_wait = _read_media_card_wait_snapshot(page)
                     "checkedValue": "0",
                     "coordsValue": "5, 6",
                     "logValue": expected_reference_log,
+                    "waitClass": config_module.LIVE_OBS_POINT_WAIT_CLASS,
                 },
                 timeout=5000,
             )
             classes_after_reference = _read_elem_classes(page, "live_obs")
             assert classes_after_reference is not None
+            assert config_module.LIVE_OBS_POINT_WAIT_CLASS not in classes_after_reference
             assert _read_coords_box_value(page) == "5, 6"
             assert _read_log_output_value(page) == expected_reference_log
                 }""",
                 arg={
                     "checkedValue": "1",
+                    "coordsValue": config_module.UI_TEXT["coords"]["select_point"],
+                    "logValue": config_module.UI_TEXT["log"]["point_selection_prompt"],
+                    "waitClass": config_module.LIVE_OBS_POINT_WAIT_CLASS,
                 },
                 timeout=5000,
             )
             classes_after_manual_change = _read_elem_classes(page, "live_obs")
             assert classes_after_manual_change is not None
+            assert config_module.LIVE_OBS_POINT_WAIT_CLASS in classes_after_manual_change
+            assert _read_coords_box_value(page) == config_module.UI_TEXT["coords"]["select_point"]
+            assert _read_log_output_value(page) == config_module.UI_TEXT["log"]["point_selection_prompt"]
             browser.close()
     finally:
             gr.update(choices=[("pick", 0)], value=0),  # options_radio
             "goal",  # goal_box
             gr.update(
+                value="please click the point selection image",
                 visible=True,
                 interactive=False,
             ),  # coords_box

gradio-web/test/test_ui_text_config.py CHANGED Viewed

@@ -30,14 +30,14 @@ class _FakeLoadSession:
         return "IMG"
-def test_on_option_select_uses_configured_select_keypoint_and_log_messages(monkeypatch, reload_module):
     reload_module("config")
     callbacks = reload_module("gradio_callbacks")
-    monkeypatch.setitem(callbacks.UI_TEXT["coords"], "select_keypoint", "pick a point from config")
     monkeypatch.setitem(
         callbacks.UI_TEXT["log"],
-        "keypoint_selection_prompt",
         "custom log prompt from config",
     )
     monkeypatch.setattr(callbacks, "update_session_activity", lambda uid: None)
@@ -47,7 +47,7 @@ def test_on_option_select_uses_configured_select_keypoint_and_log_messages(monke
     assert coords_text == "pick a point from config"
     assert img_update.get("interactive") is True
-    assert callbacks.get_live_obs_elem_classes(waiting_for_keypoint=True) == img_update.get("elem_classes")
     assert log_text == "custom log prompt from config"
     assert suppress_flag is False
@@ -56,10 +56,10 @@ def test_precheck_execute_inputs_uses_configured_before_execute_message(monkeypa
     reload_module("config")
     callbacks = reload_module("gradio_callbacks")
-    monkeypatch.setitem(callbacks.UI_TEXT["coords"], "select_keypoint", "pick a point from config")
     monkeypatch.setitem(
         callbacks.UI_TEXT["coords"],
-        "select_keypoint_before_execute",
         "pick a point before execute from config",
     )
     monkeypatch.setattr(callbacks, "update_session_activity", lambda uid: None)
@@ -180,7 +180,7 @@ def test_ui_option_label_uses_routestick_configured_action_text(reload_module):
     assert callbacks._ui_option_label(session, "fallback", 0) == "d. move right counterclockwise↙︎←↖︎ ◜←◝"
-def test_load_status_task_appends_configured_keypoint_suffix_after_mapped_label(monkeypatch, reload_module):
     config = reload_module("config")
     callbacks = reload_module("gradio_callbacks")
     session = _FakeLoadSession(
@@ -204,7 +204,7 @@ def test_load_status_task_appends_configured_keypoint_suffix_after_mapped_label(
     assert result[4]["choices"] == [
         (
-            f"a. move forward↓{config.UI_TEXT['actions']['keypoint_required_suffix']}",
             0,
         )
     ]

         return "IMG"
+def test_on_option_select_uses_configured_select_point_and_log_messages(monkeypatch, reload_module):
     reload_module("config")
     callbacks = reload_module("gradio_callbacks")
+    monkeypatch.setitem(callbacks.UI_TEXT["coords"], "select_point", "pick a point from config")
     monkeypatch.setitem(
         callbacks.UI_TEXT["log"],
+        "point_selection_prompt",
         "custom log prompt from config",
     )
     monkeypatch.setattr(callbacks, "update_session_activity", lambda uid: None)
     assert coords_text == "pick a point from config"
     assert img_update.get("interactive") is True
+    assert callbacks.get_live_obs_elem_classes(waiting_for_point=True) == img_update.get("elem_classes")
     assert log_text == "custom log prompt from config"
     assert suppress_flag is False
     reload_module("config")
     callbacks = reload_module("gradio_callbacks")
+    monkeypatch.setitem(callbacks.UI_TEXT["coords"], "select_point", "pick a point from config")
     monkeypatch.setitem(
         callbacks.UI_TEXT["coords"],
+        "select_point_before_execute",
         "pick a point before execute from config",
     )
     monkeypatch.setattr(callbacks, "update_session_activity", lambda uid: None)
     assert callbacks._ui_option_label(session, "fallback", 0) == "d. move right counterclockwise↙︎←↖︎ ◜←◝"
+def test_load_status_task_appends_configured_point_suffix_after_mapped_label(monkeypatch, reload_module):
     config = reload_module("config")
     callbacks = reload_module("gradio_callbacks")
     session = _FakeLoadSession(
     assert result[4]["choices"] == [
         (
+            f"a. move forward↓{config.UI_TEXT['actions']['point_required_suffix']}",
             0,
         )
     ]

gradio-web/ui_layout.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """
 Native Gradio UI layout.
-Sequential media phases: Demo Video -> Action+Keypoint.
-Two-column layout: Keypoint Selection | Right Panel.
 """
 import ast
@@ -11,9 +11,9 @@ import gradio as gr
 from config import (
     CONTROL_PANEL_SCALE,
     LIVE_OBS_BASE_CLASS,
-    LIVE_OBS_KEYPOINT_WAIT_CLASS,
     LIVE_OBS_REFRESH_HZ,
-    KEYPOINT_SELECTION_SCALE,
     RIGHT_TOP_ACTION_SCALE,
     RIGHT_TOP_LOG_SCALE,
     UI_GLOBAL_FONT_SIZE,
@@ -41,7 +41,7 @@ from user_manager import user_manager
 PHASE_INIT = "init"
 PHASE_DEMO_VIDEO = "demo_video"
-PHASE_ACTION_KEYPOINT = "action_keypoint"
 PHASE_EXECUTION_PLAYBACK = "execution_playback"
 APP_THEME = gr.themes.Default()
@@ -432,7 +432,7 @@ button#watch_demo_video_btn {{
     z-index: 0;
 }}
-@keyframes media-card-keypoint-ring {{
     0% {{
         box-shadow: 0 0 0 0 rgba(225, 29, 72, 0.00);
         border-color: rgba(225, 29, 72, 0.72);
@@ -460,11 +460,11 @@ button#watch_demo_video_btn {{
     }}
 }}
-#media_card:has(#live_obs.{LIVE_OBS_KEYPOINT_WAIT_CLASS})::after {{
     border-color: rgba(225, 29, 72, 0.94);
     box-shadow: 0 0 0 0 rgba(225, 29, 72, 0.00);
     opacity: 1;
-    animation: media-card-keypoint-ring 1.2s ease-in-out infinite;
 }}
 """
@@ -509,7 +509,7 @@ def _phase_from_updates(main_interface_update, video_phase_update):
         return PHASE_INIT
     if isinstance(video_phase_update, dict) and video_phase_update.get("visible") is True:
         return PHASE_DEMO_VIDEO
-    return PHASE_ACTION_KEYPOINT
 def _with_phase_from_load(load_result):
@@ -528,7 +528,7 @@ def _phase_visibility_updates(phase):
             gr.update(visible=False),
             gr.update(visible=False),
         )
-    if phase in {PHASE_ACTION_KEYPOINT, PHASE_EXECUTION_PLAYBACK}:
         return (
             gr.update(visible=False),
             gr.update(visible=True),
@@ -596,7 +596,7 @@ def create_ui_blocks():
         with gr.Column(visible=False, elem_id="main_interface_root") as main_interface:
             with gr.Row(elem_id="main_layout_row"):
-                with gr.Column(scale=KEYPOINT_SELECTION_SCALE):
                     with gr.Column(elem_classes=["native-card"], elem_id="media_card"):
                         with gr.Column(visible=False, elem_id="video_phase_group") as video_phase_group:
                             video_display = gr.Video(
@@ -618,7 +618,7 @@ def create_ui_blocks():
                         with gr.Column(visible=False, elem_id="action_phase_group") as action_phase_group:
                             img_display = gr.Image(
-                                label="Keypoint Selection",
                                 interactive=False,
                                 type="pil",
                                 elem_id="live_obs",
@@ -895,7 +895,7 @@ def create_ui_blocks():
             queue=False,
             show_progress="hidden",
         ).then(
-            fn=lambda: PHASE_ACTION_KEYPOINT,
             outputs=[ui_phase_state],
             queue=False,
             show_progress="hidden",
@@ -913,7 +913,7 @@ def create_ui_blocks():
             queue=False,
             show_progress="hidden",
         ).then(
-            fn=lambda: PHASE_ACTION_KEYPOINT,
             outputs=[ui_phase_state],
             queue=False,
             show_progress="hidden",
@@ -984,7 +984,7 @@ def create_ui_blocks():
             ],
             show_progress="hidden",
         ).then(
-            fn=lambda: PHASE_ACTION_KEYPOINT,
             outputs=[ui_phase_state],
             show_progress="hidden",
         )

 """
 Native Gradio UI layout.
+Sequential media phases: Demo Video -> Action+Point.
+Two-column layout: Point Selection | Right Panel.
 """
 import ast
 from config import (
     CONTROL_PANEL_SCALE,
     LIVE_OBS_BASE_CLASS,
+    LIVE_OBS_POINT_WAIT_CLASS,
     LIVE_OBS_REFRESH_HZ,
+    POINT_SELECTION_SCALE,
     RIGHT_TOP_ACTION_SCALE,
     RIGHT_TOP_LOG_SCALE,
     UI_GLOBAL_FONT_SIZE,
 PHASE_INIT = "init"
 PHASE_DEMO_VIDEO = "demo_video"
+PHASE_ACTION_POINT = "action_point"
 PHASE_EXECUTION_PLAYBACK = "execution_playback"
 APP_THEME = gr.themes.Default()
     z-index: 0;
 }}
+@keyframes media-card-point-ring {{
     0% {{
         box-shadow: 0 0 0 0 rgba(225, 29, 72, 0.00);
         border-color: rgba(225, 29, 72, 0.72);
     }}
 }}
+#media_card:has(#live_obs.{LIVE_OBS_POINT_WAIT_CLASS})::after {{
     border-color: rgba(225, 29, 72, 0.94);
     box-shadow: 0 0 0 0 rgba(225, 29, 72, 0.00);
     opacity: 1;
+    animation: media-card-point-ring 1.2s ease-in-out infinite;
 }}
 """
         return PHASE_INIT
     if isinstance(video_phase_update, dict) and video_phase_update.get("visible") is True:
         return PHASE_DEMO_VIDEO
+    return PHASE_ACTION_POINT
 def _with_phase_from_load(load_result):
             gr.update(visible=False),
             gr.update(visible=False),
         )
+    if phase in {PHASE_ACTION_POINT, PHASE_EXECUTION_PLAYBACK}:
         return (
             gr.update(visible=False),
             gr.update(visible=True),
         with gr.Column(visible=False, elem_id="main_interface_root") as main_interface:
             with gr.Row(elem_id="main_layout_row"):
+                with gr.Column(scale=POINT_SELECTION_SCALE):
                     with gr.Column(elem_classes=["native-card"], elem_id="media_card"):
                         with gr.Column(visible=False, elem_id="video_phase_group") as video_phase_group:
                             video_display = gr.Video(
                         with gr.Column(visible=False, elem_id="action_phase_group") as action_phase_group:
                             img_display = gr.Image(
+                                label="Point Selection",
                                 interactive=False,
                                 type="pil",
                                 elem_id="live_obs",
             queue=False,
             show_progress="hidden",
         ).then(
+            fn=lambda: PHASE_ACTION_POINT,
             outputs=[ui_phase_state],
             queue=False,
             show_progress="hidden",
             queue=False,
             show_progress="hidden",
         ).then(
+            fn=lambda: PHASE_ACTION_POINT,
             outputs=[ui_phase_state],
             queue=False,
             show_progress="hidden",
             ],
             show_progress="hidden",
         ).then(
+            fn=lambda: PHASE_ACTION_POINT,
             outputs=[ui_phase_state],
             show_progress="hidden",
         )