Spaces:

HongzeFu
/

RoboMME

Running on T4

App Files Files Community

HongzeFu commited on 1 day ago

Commit

41a86ed

1 Parent(s): 3b27796

unified system log

Browse files

Files changed (6) hide show

gradio-web/config.py +4 -3
gradio-web/gradio_callbacks.py +88 -51
gradio-web/test/test_live_obs_refresh.py +32 -1
gradio-web/test/test_reference_action_callbacks.py +2 -4
gradio-web/test/test_ui_phase_machine_runtime_e2e.py +7 -3
gradio-web/test/test_ui_text_config.py +4 -4

gradio-web/config.py CHANGED Viewed

@@ -66,12 +66,13 @@ UI_TEXT = {
         "action_selection_prompt": "Please select the action.\nActions with 🎯 need to select a point on the image as input",
         "point_selection_prompt": "Current action needs location input, please click on the image to select key pixel",
         "point_selected_message": "Select: {label} | point <{x}, {y}>",
-        "execute_action_prompt": "Executing Action {label}",
         "demo_video_prompt": 'Press "Watch Video Input 🎬" to watch a video\nNote: you can only watch the video once',
         "session_error": "Session expired. Please refresh the page and try again.",
         "reference_action_error": "Ground Truth Action Error: {error}",
-        "reference_action_message": "Ground Truth Action: {option_label}. {option_action}",
-        "reference_action_message_with_coords": "Ground Truth Action: {option_label}. {option_action} | coords: {coords_text}",
         "reference_action_status": "Ground Truth Action: {message}",
         "execute_missing_action": "Error: No action selected",
         "episode_success_banner": "********************************\n****   episode success      ****\n********************************\n  ---please press change episode----   ",

         "action_selection_prompt": "Please select the action.\nActions with 🎯 need to select a point on the image as input",
         "point_selection_prompt": "Current action needs location input, please click on the image to select key pixel",
         "point_selected_message": "Select: {label} | point <{x}, {y}>",
+        "execute_action_prompt": "Executing: {label}",
+        "execute_action_prompt_with_coords": "Executing: {label} | point <{coords_text}>",
         "demo_video_prompt": 'Press "Watch Video Input 🎬" to watch a video\nNote: you can only watch the video once',
         "session_error": "Session expired. Please refresh the page and try again.",
         "reference_action_error": "Ground Truth Action Error: {error}",
+        "reference_action_message": "Ground Truth Action: {option_label}",
+        "reference_action_message_with_coords": "Ground Truth Action: {option_label} | point <{coords_text}>",
         "reference_action_status": "Ground Truth Action: {message}",
         "execute_missing_action": "Error: No action selected",
         "episode_success_banner": "********************************\n****   episode success      ****\n********************************\n  ---please press change episode----   ",

gradio-web/gradio_callbacks.py CHANGED Viewed

@@ -82,10 +82,71 @@ def _get_raw_option_label(session, option_idx):
     return label or None
-def _execution_video_log(session, option_idx, fallback_status=None):
     label = _get_raw_option_label(session, option_idx)
-    if label:
-        return format_log_markdown(_ui_text("log", "execute_action_prompt", label=label))
     if fallback_status is None:
         return None
     return format_log_markdown(fallback_status)
@@ -101,11 +162,11 @@ def _default_post_execute_log_state():
 def _point_selected_log(session, option_value, x, y):
-    label = _get_raw_option_label(session, option_value)
-    if label:
-        display_label = _format_choice_prefix(label)
-    else:
-        display_label = str(option_value).strip() if option_value is not None else "?"
     return format_log_markdown(
         _ui_text("log", "point_selected_message", label=display_label, x=int(x), y=int(y))
     )
@@ -931,24 +992,7 @@ def on_map_click(uid, option_value, evt: gr.SelectData):
 def _is_valid_coords_text(coords_text: str) -> bool:
-    if not isinstance(coords_text, str):
-        return False
-    text = coords_text.strip()
-    if text in {
-        "",
-        _ui_text("coords", "select_point"),
-        _ui_text("coords", "not_needed"),
-    }:
-        return False
-    if "," not in text:
-        return False
-    try:
-        x_raw, y_raw = text.split(",", 1)
-        int(x_raw.strip())
-        int(y_raw.strip())
-    except Exception:
-        return False
-    return True
 def on_option_select(
@@ -1096,33 +1140,29 @@ def on_reference_action(uid, current_option_value=None):
     option_idx = reference.get("option_idx")
     current_option_idx = _parse_option_idx(current_option_value)
-    option_label = str(reference.get("option_label", "")).strip()
-    option_action = str(reference.get("option_action", "")).strip()
-    option_action = get_ui_action_text(getattr(session, "env_id", None), option_action)
     need_coords = bool(reference.get("need_coords", False))
     coords_xy = reference.get("coords_xy")
     suppress_next_option_change = option_idx != current_option_idx
     updated_img = current_img
     coords_text = _ui_text("coords", "not_needed")
-    log_text = _ui_text(
-        "log",
-        "reference_action_message",
-        option_label=option_label,
-        option_action=option_action,
-    ).strip()
     if need_coords and isinstance(coords_xy, (list, tuple)) and len(coords_xy) >= 2:
         x = int(coords_xy[0])
         y = int(coords_xy[1])
         updated_img = draw_marker(current_img, x, y)
-        coords_text = f"{x}, {y}"
-        log_text = _ui_text(
-            "log",
-            "reference_action_message_with_coords",
-            option_label=option_label,
-            option_action=option_action,
-            coords_text=coords_text,
         )
     LOGGER.debug(
         "on_reference_action resolved uid=%s option_idx=%s need_coords=%s coords=%s",
@@ -1136,7 +1176,7 @@ def on_reference_action(uid, current_option_value=None):
         _live_obs_update(value=updated_img, interactive=False),
         gr.update(value=option_idx),
         coords_text,
-        format_log_markdown(log_text),
         suppress_next_option_change,
     )
@@ -1363,13 +1403,7 @@ def execute_step(uid, option_idx, coords_str):
             )
     # Parse coords
-    click_coords = None
-    if coords_str and "," in coords_str:
-        try:
-            parts = coords_str.split(",")
-            click_coords = (int(parts[0].strip()), int(parts[1].strip()))
-        except:
-            pass
     # Execute
     # 如果达到 execute 次数限制，模拟失败状态（使用和任务失败一样的机制）
@@ -1517,7 +1551,10 @@ def execute_step(uid, option_idx, coords_str):
     # 格式化日志消息为 HTML 格式（支持颜色显示）
     formatted_status = format_log_markdown(status)
     if show_execution_video and not done:
-        formatted_status = _execution_video_log(session, option_idx, fallback_status=status) or formatted_status
         post_execute_log_state = {
             "preserve_terminal_log": False,
             "terminal_log_value": None,

     return label or None
+def _choice_label_token(text):
+    if not isinstance(text, str):
+        return None
+    stripped = text.strip()
+    if not stripped:
+        return None
+    prefix, dot, _rest = stripped.partition(".")
+    if dot and prefix.isalpha() and len(prefix) <= 4:
+        return prefix.upper()
+    if stripped.isalpha() and len(stripped) <= 4:
+        return stripped.upper()
+    return stripped
+def _display_choice_label(session, option_idx=None, fallback_label=None):
     label = _get_raw_option_label(session, option_idx)
+    if label is None:
+        label = fallback_label
+    return _choice_label_token(label)
+def _parse_coords_text(coords_text):
+    if not isinstance(coords_text, str):
+        return None
+    text = coords_text.strip()
+    if text in {
+        "",
+        _ui_text("coords", "select_point"),
+        _ui_text("coords", "not_needed"),
+    }:
+        return None
+    if "," not in text:
+        return None
+    try:
+        x_raw, y_raw = text.split(",", 1)
+        return int(x_raw.strip()), int(y_raw.strip())
+    except Exception:
+        return None
+def _coords_text(x, y):
+    return f"{int(x)}, {int(y)}"
+def _format_action_log(message_key, label_key, label, coords=None):
+    display_label = _choice_label_token(label)
+    if not display_label:
+        return None
+    kwargs = {label_key: display_label}
+    if isinstance(coords, (list, tuple)) and len(coords) >= 2:
+        kwargs["coords_text"] = _coords_text(coords[0], coords[1])
+        return format_log_markdown(_ui_text("log", f"{message_key}_with_coords", **kwargs))
+    return format_log_markdown(_ui_text("log", message_key, **kwargs))
+def _execution_video_log(session, option_idx, coords=None, fallback_status=None):
+    label = _display_choice_label(session, option_idx)
+    log_text = _format_action_log("execute_action_prompt", "label", label, coords=coords)
+    if log_text is not None:
+        return log_text
     if fallback_status is None:
         return None
     return format_log_markdown(fallback_status)
 def _point_selected_log(session, option_value, x, y):
+    display_label = _display_choice_label(
+        session,
+        option_value,
+        fallback_label=str(option_value).strip() if option_value is not None else "?",
+    )
     return format_log_markdown(
         _ui_text("log", "point_selected_message", label=display_label, x=int(x), y=int(y))
     )
 def _is_valid_coords_text(coords_text: str) -> bool:
+    return _parse_coords_text(coords_text) is not None
 def on_option_select(
     option_idx = reference.get("option_idx")
     current_option_idx = _parse_option_idx(current_option_value)
+    option_label = _display_choice_label(
+        session,
+        option_idx,
+        fallback_label=str(reference.get("option_label", "")).strip(),
+    )
     need_coords = bool(reference.get("need_coords", False))
     coords_xy = reference.get("coords_xy")
     suppress_next_option_change = option_idx != current_option_idx
     updated_img = current_img
     coords_text = _ui_text("coords", "not_needed")
+    log_text = _format_action_log("reference_action_message", "option_label", option_label)
     if need_coords and isinstance(coords_xy, (list, tuple)) and len(coords_xy) >= 2:
         x = int(coords_xy[0])
         y = int(coords_xy[1])
         updated_img = draw_marker(current_img, x, y)
+        coords_text = _coords_text(x, y)
+        log_text = _format_action_log(
+            "reference_action_message",
+            "option_label",
+            option_label,
+            coords=(x, y),
         )
     LOGGER.debug(
         "on_reference_action resolved uid=%s option_idx=%s need_coords=%s coords=%s",
         _live_obs_update(value=updated_img, interactive=False),
         gr.update(value=option_idx),
         coords_text,
+        log_text,
         suppress_next_option_change,
     )
             )
     # Parse coords
+    click_coords = _parse_coords_text(coords_str)
     # Execute
     # 如果达到 execute 次数限制，模拟失败状态（使用和任务失败一样的机制）
     # 格式化日志消息为 HTML 格式（支持颜色显示）
     formatted_status = format_log_markdown(status)
     if show_execution_video and not done:
+        formatted_status = (
+            _execution_video_log(session, option_idx, coords=click_coords, fallback_status=status)
+            or formatted_status
+        )
         post_execute_log_state = {
             "preserve_terminal_log": False,
             "terminal_log_value": None,

gradio-web/test/test_live_obs_refresh.py CHANGED Viewed

@@ -63,7 +63,7 @@ def test_execute_step_builds_video_from_last_execution_frames(monkeypatch, reloa
     assert result[11]["value"] is None
     assert result[11]["interactive"] is False
     assert result[14]["interactive"] is False
-    expected_log = callbacks.UI_TEXT["log"]["execute_action_prompt"].format(label="a")
     assert result[1] == expected_log
     assert result[15] == {
         "exec_btn_interactive": True,
@@ -78,6 +78,37 @@ def test_execute_step_builds_video_from_last_execution_frames(monkeypatch, reloa
     assert result[17] == "execution_video"
 def test_execute_step_falls_back_to_single_frame_clip_when_no_new_frames(monkeypatch, reload_module):
     callbacks = reload_module("gradio_callbacks")

     assert result[11]["value"] is None
     assert result[11]["interactive"] is False
     assert result[14]["interactive"] is False
+    expected_log = callbacks.UI_TEXT["log"]["execute_action_prompt"].format(label="A")
     assert result[1] == expected_log
     assert result[15] == {
         "exec_btn_interactive": True,
     assert result[17] == "execution_video"
+def test_execute_step_execution_log_includes_point_when_coords_selected(monkeypatch, reload_module):
+    callbacks = reload_module("gradio_callbacks")
+    frame = np.full((8, 8, 3), 44, dtype=np.uint8)
+    session = _FakeSession()
+    session.raw_solve_options = [{"label": "b", "available": [object()]}]
+    session.base_frames = [frame]
+    captured = {}
+    def _execute_action(_option_idx, coords):
+        captured["coords"] = coords
+        session.last_execution_frames = [frame]
+        return "IMG", "Executing: pick", False
+    session.execute_action = _execute_action
+    monkeypatch.setattr(callbacks, "get_session", lambda uid: session)
+    monkeypatch.setattr(callbacks, "increment_execute_count", lambda uid, env_id, episode_idx: 1)
+    monkeypatch.setattr(callbacks, "concatenate_frames_horizontally", lambda frames, env_id=None: list(frames))
+    monkeypatch.setattr(callbacks, "save_video", lambda frames, suffix="": "/tmp/exec-point.mp4")
+    monkeypatch.setattr(callbacks.os.path, "exists", lambda path: True)
+    monkeypatch.setattr(callbacks.os.path, "getsize", lambda path: 10)
+    result = callbacks.execute_step("uid-1", 0, "12, 34")
+    assert captured["coords"] == (12, 34)
+    assert result[1] == "Executing: B | point <12, 34>"
+    assert result[16]["execute_video_log_value"] == "Executing: B | point <12, 34>"
 def test_execute_step_falls_back_to_single_frame_clip_when_no_new_frames(monkeypatch, reload_module):
     callbacks = reload_module("gradio_callbacks")

gradio-web/test/test_reference_action_callbacks.py CHANGED Viewed

@@ -57,8 +57,7 @@ def test_on_reference_action_success_updates_option_and_coords(monkeypatch, relo
     assert coords_text == "5, 6"
     assert suppress_flag is True
     expected_log = config.UI_TEXT["log"]["reference_action_message_with_coords"].format(
-        option_label="c",
-        option_action="press the button",
         coords_text="5, 6",
     )
     assert log_html == expected_log
@@ -189,7 +188,6 @@ def test_on_reference_action_uses_configured_action_text_override(monkeypatch, r
     assert coords_text == config.UI_TEXT["coords"]["not_needed"]
     assert log_html == config.UI_TEXT["log"]["reference_action_message"].format(
-        option_label="a",
-        option_action="move forward↓",
     )
     assert suppress_flag is True

     assert coords_text == "5, 6"
     assert suppress_flag is True
     expected_log = config.UI_TEXT["log"]["reference_action_message_with_coords"].format(
+        option_label="C",
         coords_text="5, 6",
     )
     assert log_html == expected_log
     assert coords_text == config.UI_TEXT["coords"]["not_needed"]
     assert log_html == config.UI_TEXT["log"]["reference_action_message"].format(
+        option_label="A",
     )
     assert suppress_flag is True

gradio-web/test/test_ui_phase_machine_runtime_e2e.py CHANGED Viewed

@@ -1855,8 +1855,7 @@ def test_reference_action_single_click_applies_coords_without_wait_state(monkeyp
             page.wait_for_selector("#reference_action_btn button, button#reference_action_btn", timeout=15000)
             expected_reference_log = config_module.UI_TEXT["log"]["reference_action_message_with_coords"].format(
-                option_label="a",
-                option_action="pick the left cube",
                 coords_text="5, 6",
             )
             page.locator("#reference_action_btn button, button#reference_action_btn").first.click()
@@ -2817,6 +2816,8 @@ def _run_local_execute_video_transition_test(
                     arg={"label": "B"},
                     timeout=5000,
                 )
                 page.locator("#exec_btn button, button#exec_btn").first.click()
                 page.wait_for_selector("#execute_video video", timeout=5000)
                 page.wait_for_function(
@@ -2842,7 +2843,10 @@ def _run_local_execute_video_transition_test(
                 )
                 if not done:
                     execution_log = _read_log_output_value(page)
-                    assert execution_log == config_module.UI_TEXT["log"]["execute_action_prompt"].format(label="b")
                     assert execution_log != config_module.UI_TEXT["log"]["point_selection_prompt"]
                 controls_after_execute = _read_demo_video_controls(page, elem_id="execute_video", button_elem_id=None)
                 assert controls_after_execute["autoplay"] is True

             page.wait_for_selector("#reference_action_btn button, button#reference_action_btn", timeout=15000)
             expected_reference_log = config_module.UI_TEXT["log"]["reference_action_message_with_coords"].format(
+                option_label="A",
                 coords_text="5, 6",
             )
             page.locator("#reference_action_btn button, button#reference_action_btn").first.click()
                     arg={"label": "B"},
                     timeout=5000,
                 )
+                selected_coords = _read_coords_box_value(page)
+                assert selected_coords is not None
                 page.locator("#exec_btn button, button#exec_btn").first.click()
                 page.wait_for_selector("#execute_video video", timeout=5000)
                 page.wait_for_function(
                 )
                 if not done:
                     execution_log = _read_log_output_value(page)
+                    assert execution_log == config_module.UI_TEXT["log"]["execute_action_prompt_with_coords"].format(
+                        label="B",
+                        coords_text=selected_coords,
+                    )
                     assert execution_log != config_module.UI_TEXT["log"]["point_selection_prompt"]
                 controls_after_execute = _read_demo_video_controls(page, elem_id="execute_video", button_elem_id=None)
                 assert controls_after_execute["autoplay"] is True

gradio-web/test/test_ui_text_config.py CHANGED Viewed

@@ -141,7 +141,7 @@ def test_on_execute_video_end_transition_clears_execution_video_log_state(reload
             "preserve_terminal_log": False,
             "terminal_log_value": None,
             "preserve_execute_video_log": True,
-            "execute_video_log_value": "Executing Action b",
         },
     )
@@ -222,19 +222,19 @@ def test_on_option_select_preserves_execution_video_log_state(reload_module):
             "preserve_terminal_log": False,
             "terminal_log_value": None,
             "preserve_execute_video_log": True,
-            "execute_video_log_value": "Executing Action b",
         },
     )
     assert coords_update.get("__type__") == "update"
     assert img_update.get("__type__") == "update"
-    assert log_update["value"] == "Executing Action b"
     assert suppress_flag is False
     assert log_state == {
         "preserve_terminal_log": False,
         "terminal_log_value": None,
         "preserve_execute_video_log": True,
-        "execute_video_log_value": "Executing Action b",
     }

             "preserve_terminal_log": False,
             "terminal_log_value": None,
             "preserve_execute_video_log": True,
+            "execute_video_log_value": "Executing: B",
         },
     )
             "preserve_terminal_log": False,
             "terminal_log_value": None,
             "preserve_execute_video_log": True,
+            "execute_video_log_value": "Executing: B",
         },
     )
     assert coords_update.get("__type__") == "update"
     assert img_update.get("__type__") == "update"
+    assert log_update["value"] == "Executing: B"
     assert suppress_flag is False
     assert log_state == {
         "preserve_terminal_log": False,
         "terminal_log_value": None,
         "preserve_execute_video_log": True,
+        "execute_video_log_value": "Executing: B",
     }