Spaces:

HongzeFu
/

RoboMME

Running on T4

App Files Files Community

HongzeFu commited on 1 day ago

Commit

7f121df

1 Parent(s): a64124d

stopcube remain static vqa override

Browse files

Files changed (4) hide show

gradio-web/oracle_logic.py +1 -1
gradio-web/test/test_stopcube_vqa_override.py +114 -0
gradio-web/test/test_ui_phase_machine_runtime_e2e.py +294 -0
gradio-web/vqa_options_override.py +137 -0

gradio-web/oracle_logic.py CHANGED Viewed

@@ -31,7 +31,7 @@ except Exception as e:
 # --- Project Imports ---
 from robomme.env_record_wrapper import BenchmarkEnvBuilder
 from robomme.robomme_env import *  # noqa: F401,F403; ensure gym envs are registered
-from robomme.robomme_env.utils.vqa_options import get_vqa_options
 from robomme.robomme_env.utils.oracle_action_matcher import (
     find_exact_label_option_index,
     map_action_text_to_option_label,

 # --- Project Imports ---
 from robomme.env_record_wrapper import BenchmarkEnvBuilder
 from robomme.robomme_env import *  # noqa: F401,F403; ensure gym envs are registered
+from vqa_options_override import get_vqa_options
 from robomme.robomme_env.utils.oracle_action_matcher import (
     find_exact_label_option_index,
     map_action_text_to_option_label,

gradio-web/test/test_stopcube_vqa_override.py ADDED Viewed

	@@ -0,0 +1,114 @@

+from __future__ import annotations
+from robomme.robomme_env.utils import vqa_options as upstream_vqa_options
+class _DummyBase:
+    def __init__(self, steps_press, interval=30):
+        self.steps_press = steps_press
+        self.interval = interval
+        self.button = object()
+class _DummyEnv:
+    def __init__(self, base, elapsed_steps=0):
+        self.unwrapped = base
+        self.elapsed_steps = elapsed_steps
+def _get_stopcube_options(module, env):
+    return module.get_vqa_options(env, planner=None, selected_target={"obj": None}, env_id="StopCube")
+def _get_remain_static_solver(options):
+    for option in options:
+        if option.get("action") == "remain static":
+            return option["solve"]
+    raise AssertionError("Missing 'remain static' option")
+def test_stopcube_remain_static_merges_short_tail(monkeypatch, reload_module):
+    override = reload_module("vqa_options_override")
+    hold_calls = []
+    def _hold_spy(env, planner, absTimestep):
+        _ = planner
+        hold_calls.append(int(absTimestep))
+        env.elapsed_steps = int(absTimestep)
+        return None
+    monkeypatch.setattr(override, "solve_hold_obj_absTimestep", _hold_spy)
+    base = _DummyBase(steps_press=270, interval=30)
+    env = _DummyEnv(base, elapsed_steps=0)
+    options = _get_stopcube_options(override, env)
+    actions = [option.get("action") for option in options]
+    assert actions == [
+        "move to the top of the button to prepare",
+        "remain static",
+        "press button to stop the cube",
+    ]
+    solve_remain_static = _get_remain_static_solver(options)
+    for _ in range(3):
+        solve_remain_static()
+    assert hold_calls == [100, 240, 240]
+def test_stopcube_remain_static_keeps_boundary_tail(monkeypatch, reload_module):
+    override = reload_module("vqa_options_override")
+    hold_calls = []
+    def _hold_spy(env, planner, absTimestep):
+        _ = planner
+        hold_calls.append(int(absTimestep))
+        env.elapsed_steps = int(absTimestep)
+        return None
+    monkeypatch.setattr(override, "solve_hold_obj_absTimestep", _hold_spy)
+    base = _DummyBase(steps_press=280, interval=30)
+    env = _DummyEnv(base, elapsed_steps=0)
+    solve_remain_static = _get_remain_static_solver(_get_stopcube_options(override, env))
+    for _ in range(4):
+        solve_remain_static()
+    assert hold_calls == [100, 200, 250, 250]
+def test_stopcube_remain_static_resets_after_elapsed_steps_go_back(monkeypatch, reload_module):
+    override = reload_module("vqa_options_override")
+    hold_calls = []
+    def _hold_spy(env, planner, absTimestep):
+        _ = planner
+        hold_calls.append(int(absTimestep))
+        env.elapsed_steps = int(absTimestep)
+        return None
+    monkeypatch.setattr(override, "solve_hold_obj_absTimestep", _hold_spy)
+    base = _DummyBase(steps_press=270, interval=30)
+    env = _DummyEnv(base, elapsed_steps=0)
+    solve_remain_static = _get_remain_static_solver(_get_stopcube_options(override, env))
+    solve_remain_static()
+    solve_remain_static()
+    env.elapsed_steps = 0
+    solve_remain_static()
+    assert hold_calls == [100, 240, 100]
+def test_non_stopcube_builders_passthrough_to_upstream(reload_module):
+    override = reload_module("vqa_options_override")
+    assert override.OPTION_BUILDERS["StopCube"] is override._options_stopcube_override
+    assert override.OPTION_BUILDERS["StopCube"] is not upstream_vqa_options.OPTION_BUILDERS["StopCube"]
+    assert override.OPTION_BUILDERS["BinFill"] is upstream_vqa_options.OPTION_BUILDERS["BinFill"]

gradio-web/test/test_ui_phase_machine_runtime_e2e.py CHANGED Viewed

@@ -2940,3 +2940,297 @@ def test_phase_machine_runtime_local_video_path_end_transition_terminal_failed()
         expect_terminal_buttons_disabled=True,
         expected_terminal_log="episode failed",
     )

         expect_terminal_buttons_disabled=True,
         expected_terminal_log="episode failed",
     )
+def test_phase_machine_runtime_stopcube_remain_static_merges_short_tail(monkeypatch):
+    import gradio_callbacks as cb
+    import config as config_module
+    import vqa_options_override as override
+    demo_video_path = gr.get_video("world.mp4")
+    fake_obs = np.zeros((24, 24, 3), dtype=np.uint8)
+    hold_calls = []
+    def _hold_spy(env, planner, absTimestep):
+        _ = planner
+        hold_calls.append(int(absTimestep))
+        env.elapsed_steps = int(absTimestep)
+        return None
+    monkeypatch.setattr(override, "solve_hold_obj_absTimestep", _hold_spy)
+    class FakeBase:
+        def __init__(self):
+            self.steps_press = 270
+            self.interval = 30
+            self.button = object()
+    class FakeEnv:
+        def __init__(self):
+            self.unwrapped = FakeBase()
+            self.elapsed_steps = 0
+    class FakeSession:
+        def __init__(self):
+            self.env_id = "StopCube"
+            self.episode_idx = 1
+            self.language_goal = "stop the moving cube"
+            self.difficulty = "easy"
+            self.seed = 123
+            self.non_demonstration_task_length = None
+            self.demonstration_frames = []
+            self.last_execution_frames = []
+            self.base_frames = [fake_obs.copy()]
+            self.env = FakeEnv()
+            self.planner = object()
+            self.raw_solve_options = override.get_vqa_options(
+                self.env,
+                self.planner,
+                {"obj": None},
+                self.env_id,
+            )
+            self.available_options = [
+                (f"{opt['label']}. {opt['action']}", idx)
+                for idx, opt in enumerate(self.raw_solve_options)
+            ]
+        def get_pil_image(self, use_segmented=False):
+            _ = use_segmented
+            return fake_obs.copy()
+        def update_observation(self, use_segmentation=False):
+            _ = use_segmentation
+            return None
+        def execute_action(self, option_idx, click_coords):
+            _ = click_coords
+            current_options = override.get_vqa_options(
+                self.env,
+                self.planner,
+                {"obj": None},
+                self.env_id,
+            )
+            current_options[option_idx]["solve"]()
+            frame_value = hold_calls[-1] if hold_calls else 0
+            frame = np.full((24, 24, 3), frame_value, dtype=np.uint8)
+            self.last_execution_frames = [frame.copy(), frame.copy()]
+            self.base_frames.extend(self.last_execution_frames)
+            return frame.copy(), f"Executing: {current_options[option_idx]['label']}", False
+    fake_session = FakeSession()
+    monkeypatch.setattr(cb, "get_session", lambda uid: fake_session)
+    monkeypatch.setattr(cb, "increment_execute_count", lambda uid, env_id, ep_num: 1)
+    monkeypatch.setattr(cb, "save_video", lambda frames, suffix="": demo_video_path)
+    monkeypatch.setattr(cb, "concatenate_frames_horizontally", lambda frames, env_id=None: list(frames))
+    monkeypatch.setattr(cb.os.path, "exists", lambda path: True)
+    monkeypatch.setattr(cb.os.path, "getsize", lambda path: 10)
+    with gr.Blocks(title="Native StopCube merge test") as demo:
+        uid_state = gr.State(value="uid-stopcube-merge")
+        phase_state = gr.State(value="action_point")
+        post_execute_controls_state = gr.State(
+            value={
+                "exec_btn_interactive": True,
+                "reference_action_interactive": True,
+            }
+        )
+        post_execute_log_state = gr.State(
+            value={
+                "preserve_terminal_log": False,
+                "terminal_log_value": None,
+            }
+        )
+        suppress_state = gr.State(value=False)
+        with gr.Column(visible=True, elem_id="main_interface") as main_interface:
+            with gr.Column(visible=False, elem_id="video_phase_group") as video_phase_group:
+                video_display = gr.Video(value=None, elem_id="demo_video", autoplay=False)
+                watch_demo_video_btn = gr.Button(
+                    "Watch Video Input🎬",
+                    elem_id="watch_demo_video_btn",
+                    interactive=False,
+                    visible=False,
+                )
+            with gr.Column(visible=False, elem_id="execution_video_group") as execution_video_group:
+                execute_video_display = gr.Video(value=None, elem_id="execute_video", autoplay=True)
+            with gr.Column(visible=True, elem_id="action_phase_group") as action_phase_group:
+                img_display = gr.Image(value=fake_obs.copy(), elem_id="live_obs")
+            with gr.Column(visible=True, elem_id="control_panel_group") as control_panel_group:
+                options_radio = gr.Radio(
+                    choices=fake_session.available_options,
+                    value=None,
+                    elem_id="action_radio",
+                )
+                coords_box = gr.Textbox(config_module.UI_TEXT["coords"]["not_needed"], elem_id="coords_box")
+                exec_btn = gr.Button("execute", interactive=True, elem_id="exec_btn")
+                reference_action_btn = gr.Button("reference", interactive=True, elem_id="reference_action_btn")
+                restart_episode_btn = gr.Button("restart", interactive=True, elem_id="restart_episode_btn")
+                next_task_btn = gr.Button("next", interactive=True, elem_id="next_task_btn")
+                task_hint_display = gr.Textbox("hint", interactive=True, elem_id="task_hint_display")
+        log_output = gr.Markdown("", elem_id="log_output")
+        task_info_box = gr.Textbox("")
+        progress_info_box = gr.Textbox("")
+        exec_btn.click(
+            fn=cb.precheck_execute_inputs,
+            inputs=[uid_state, options_radio, coords_box],
+            outputs=[],
+            queue=False,
+        ).then(
+            fn=cb.switch_to_execute_phase,
+            inputs=[uid_state],
+            outputs=[
+                options_radio,
+                exec_btn,
+                restart_episode_btn,
+                next_task_btn,
+                img_display,
+                reference_action_btn,
+                task_hint_display,
+            ],
+            queue=False,
+        ).then(
+            fn=cb.execute_step,
+            inputs=[uid_state, options_radio, coords_box],
+            outputs=[
+                img_display,
+                log_output,
+                task_info_box,
+                progress_info_box,
+                restart_episode_btn,
+                next_task_btn,
+                exec_btn,
+                execute_video_display,
+                action_phase_group,
+                control_panel_group,
+                execution_video_group,
+                options_radio,
+                coords_box,
+                reference_action_btn,
+                task_hint_display,
+                post_execute_controls_state,
+                post_execute_log_state,
+                phase_state,
+            ],
+            queue=False,
+        )
+        options_radio.change(
+            fn=cb.on_option_select,
+            inputs=[uid_state, options_radio, coords_box, suppress_state, post_execute_log_state],
+            outputs=[coords_box, img_display, log_output, suppress_state, post_execute_log_state],
+            queue=False,
+        )
+        execute_video_display.end(
+            fn=cb.on_execute_video_end_transition,
+            inputs=[uid_state, post_execute_controls_state, post_execute_log_state],
+            outputs=[
+                execution_video_group,
+                action_phase_group,
+                control_panel_group,
+                options_radio,
+                exec_btn,
+                restart_episode_btn,
+                next_task_btn,
+                img_display,
+                log_output,
+                reference_action_btn,
+                task_hint_display,
+                phase_state,
+            ],
+            queue=False,
+        )
+        execute_video_display.stop(
+            fn=cb.on_execute_video_end_transition,
+            inputs=[uid_state, post_execute_controls_state, post_execute_log_state],
+            outputs=[
+                execution_video_group,
+                action_phase_group,
+                control_panel_group,
+                options_radio,
+                exec_btn,
+                restart_episode_btn,
+                next_task_btn,
+                img_display,
+                log_output,
+                reference_action_btn,
+                task_hint_display,
+                phase_state,
+            ],
+            queue=False,
+        )
+    port = _free_port()
+    host = "127.0.0.1"
+    root_url = f"http://{host}:{port}/"
+    app = FastAPI(title="native-stopcube-merge-test")
+    app = gr.mount_gradio_app(app, demo, path="/")
+    config = uvicorn.Config(app, host=host, port=port, log_level="error")
+    server = uvicorn.Server(config)
+    thread = threading.Thread(target=server.run, daemon=True)
+    thread.start()
+    _wait_http_ready(root_url)
+    try:
+        with sync_playwright() as p:
+            browser = p.chromium.launch(headless=True)
+            page = browser.new_page(viewport={"width": 1280, "height": 900})
+            page.goto(root_url, wait_until="domcontentloaded")
+            page.wait_for_selector("#main_interface", state="visible", timeout=20000)
+            for _ in range(2):
+                page.locator("#action_radio input[type='radio']").nth(1).check(force=True)
+                page.locator("#exec_btn button, button#exec_btn").first.click()
+                page.wait_for_selector("#execute_video video", timeout=5000)
+                page.wait_for_function(
+                    """() => {
+                        const visible = (id) => {
+                            const el = document.getElementById(id);
+                            if (!el) return false;
+                            const st = getComputedStyle(el);
+                            return st.display !== 'none' && st.visibility !== 'hidden' && el.getClientRects().length > 0;
+                        };
+                        const videoEl = document.querySelector('#execute_video video');
+                        return (
+                            visible('execution_video_group') &&
+                            visible('execute_video') &&
+                            !visible('action_phase_group') &&
+                            !!videoEl &&
+                            videoEl.autoplay === true
+                        );
+                    }""",
+                    timeout=10000,
+                )
+                assert _dispatch_video_event(page, "ended", elem_id="execute_video")
+                page.wait_for_function(
+                    """() => {
+                        const visible = (id) => {
+                            const el = document.getElementById(id);
+                            if (!el) return false;
+                            const st = getComputedStyle(el);
+                            return st.display !== 'none' && st.visibility !== 'hidden' && el.getClientRects().length > 0;
+                        };
+                        const execBtn = document.querySelector('#exec_btn button') || document.querySelector('button#exec_btn');
+                        return (
+                            visible('action_phase_group') &&
+                            visible('control_panel_group') &&
+                            !visible('execute_video') &&
+                            !!execBtn &&
+                            execBtn.disabled === false
+                        );
+                    }""",
+                    timeout=5000,
+                )
+            assert hold_calls == [100, 240]
+            browser.close()
+    finally:
+        server.should_exit = True
+        thread.join(timeout=10)
+        demo.close()

gradio-web/vqa_options_override.py ADDED Viewed

	@@ -0,0 +1,137 @@

+from __future__ import annotations
+from typing import Callable, Dict, List
+from robomme.robomme_env.utils import vqa_options as upstream_vqa_options
+solve_button = upstream_vqa_options.solve_button
+solve_button_ready = upstream_vqa_options.solve_button_ready
+solve_hold_obj_absTimestep = upstream_vqa_options.solve_hold_obj_absTimestep
+def _build_stopcube_static_checkpoints(final_target: int) -> List[int]:
+    checkpoints = list(range(100, final_target, 100))
+    if not checkpoints or checkpoints[-1] != final_target:
+        checkpoints.append(final_target)
+    if len(checkpoints) >= 2 and checkpoints[-1] - checkpoints[-2] < 50:
+        del checkpoints[-2]
+    return checkpoints
+def _options_stopcube_override(env, planner, require_target, base) -> List[dict]:
+    _ = require_target
+    options: List[dict] = []
+    button_obj = getattr(base, "button", None)
+    if button_obj is not None:
+        options.append(
+            {
+                "label": "a",
+                "action": "move to the top of the button to prepare",
+                "solve": lambda button_obj=button_obj: solve_button_ready(
+                    env, planner, obj=button_obj
+                ),
+            }
+        )
+    steps_press = getattr(base, "steps_press", None)
+    if steps_press is not None:
+        def solve_with_incremental_steps():
+            steps_press_value = getattr(base, "steps_press", None)
+            if steps_press_value is None:
+                return None
+            interval = getattr(base, "interval", 30)
+            final_target = max(0, int(steps_press_value - interval))
+            current_step = int(getattr(env, "elapsed_steps", 0))
+            checkpoints_key = "_stopcube_static_checkpoints"
+            index_key = "_stopcube_static_index"
+            cached_final_target_key = "_stopcube_static_final_target"
+            last_elapsed_step_key = "_stopcube_static_last_elapsed_step"
+            checkpoints = getattr(base, checkpoints_key, None)
+            index = getattr(base, index_key, None)
+            cached_final_target = getattr(base, cached_final_target_key, None)
+            last_elapsed_step = getattr(base, last_elapsed_step_key, None)
+            needs_rebuild = (
+                not isinstance(checkpoints, list)
+                or len(checkpoints) == 0
+                or index is None
+                or cached_final_target is None
+                or int(cached_final_target) != final_target
+                or (
+                    last_elapsed_step is not None
+                    and current_step < int(last_elapsed_step)
+                )
+            )
+            if needs_rebuild:
+                checkpoints = _build_stopcube_static_checkpoints(final_target)
+                index = 0
+            else:
+                index = int(index)
+                if index < 0:
+                    index = 0
+                if index >= len(checkpoints):
+                    index = len(checkpoints) - 1
+            target = checkpoints[index]
+            solve_hold_obj_absTimestep(env, planner, absTimestep=target)
+            index += 1
+            setattr(base, checkpoints_key, checkpoints)
+            setattr(base, index_key, index)
+            setattr(base, cached_final_target_key, final_target)
+            setattr(base, last_elapsed_step_key, current_step)
+            return None
+        options.append(
+            {
+                "label": "b",
+                "action": "remain static",
+                "solve": solve_with_incremental_steps,
+            }
+        )
+    if button_obj is not None:
+        options.append(
+            {
+                "label": "c",
+                "action": "press button to stop the cube",
+                "solve": lambda button_obj=button_obj: solve_button(
+                    env, planner, obj=button_obj, without_hold=True
+                ),
+            }
+        )
+    return options
+OPTION_BUILDERS: Dict[str, Callable] = dict(upstream_vqa_options.OPTION_BUILDERS)
+OPTION_BUILDERS["StopCube"] = _options_stopcube_override
+def get_vqa_options(env, planner, selected_target, env_id: str) -> List[dict]:
+    """Return Gradio-specific solve options without mutating the upstream src module."""
+    def _require_target():
+        obj = selected_target.get("obj")
+        if obj is None:
+            raise ValueError(
+                "No available target cube found, please click target in segmentation map first."
+            )
+        return obj
+    base = env.unwrapped
+    builder = OPTION_BUILDERS.get(
+        env_id, getattr(upstream_vqa_options, "_options_default")
+    )
+    return builder(env, planner, _require_target, base)