nnh-pbbb commited on Dec 23, 2025

Commit

cd793b5

verified ·

1 Parent(s): 5dac561

Add files using upload-large-folder tool

Browse files

Files changed (20) hide show

robot/cam.py +144 -0
starforce.egg-info/dependency_links.txt +1 -0
starhelm/starhelm/__init__.py +3 -0
starhelm/starhelm/image_tools_test.py +37 -0
test_starhelm.py +88 -0
tests/alter_lerobot_key.py +53 -0
tests/async_client.py +602 -0
tests/install_av_opencv.sh +5 -0
tests/modality.json +54 -0
tests/replay_sl.py +628 -0
tests/save_s1.py +70 -0
tests/save_s1_7B.py +69 -0
tests/test_cv.py +22 -0
tests/test_hf.py +19 -0
tests/test_pi0.py +58 -0
tests/test_starhelm.py +0 -0
tests/test_tensor.py +21 -0
tests/vis_lerobot_data.py +200 -0
tests/vis_lerobot_data_v1.py +125 -0
wandb/debug.log +30 -0

robot/cam.py ADDED Viewed

	@@ -0,0 +1,144 @@

+import threading
+import json_numpy
+import numpy as np
+import requests
+import queue
+import cv2
+import pickle, os
+from matplotlib.pyplot import step
+from typing import Any, cast
+json_numpy.patch()
+from starforce.experiment.data_config import DATA_CONFIG_MAP
+from starforce.model.policy import Gr00tPolicy
+# Import RTCController from separate module
+from loguru import logger
+try:
+    import pyrealsense2 as rs
+except ImportError:
+    print("Warning: pyrealsense2 not available. Camera functionality will be limited.")
+    rs = None
+# Help static analyzers: treat rs as dynamic Any when available
+if rs is not None:
+    rs = cast(Any, rs)
+class CameraWrapper:
+    def __init__(
+        self, devices=None, width=640, height=480, fps=30, num_realsense=0, cv_format="MJPEG"
+    ):
+        self.width = width
+        self.height = height
+        self.fps = fps
+        self.num_realsense = max(0, int(num_realsense))
+        self.cv_format = cv_format
+        self.cameras = []  # list of dicts: {type: 'rs'|'cv', handle: pipeline|cap}
+        self.device_ids = devices if devices is not None else []
+        self._open_cameras()
+        print(f"successfully opened {len(self.cameras)} cameras!")
+    def _open_cameras(self):
+        if not self.device_ids:
+            print("No devices provided for CameraWrapper")
+            return
+        for idx, dev in enumerate(self.device_ids):
+            # Decide camera type
+            use_realsense = idx < self.num_realsense
+            if use_realsense:
+                if rs is None:
+                    print(
+                        f"pyrealsense2 not available, skipping RealSense device at index {idx} (id: {dev})"
+                    )
+                    continue
+                try:
+                    serial = str(dev)
+                    pipeline = rs.pipeline()  # type: ignore[attr-defined]
+                    config = rs.config()  # type: ignore[attr-defined]
+                    config.enable_device(serial)
+                    config.enable_stream(rs.stream.color, self.width, self.height, rs.format.bgr8, self.fps)  # type: ignore[attr-defined]
+                    pipeline.start(config)
+                    self.cameras.append({"type": "rs", "handle": pipeline})
+                    print(f"RealSense camera {serial} opened successfully")
+                except Exception as e:
+                    print(f"Failed to open RealSense camera {dev}: {e}")
+            else:
+                try:
+                    device_index = int(dev)
+                    print(f"Ready to read deive: {device_index}")
+                    cap = cv2.VideoCapture(device_index)
+                    if self.cv_format == "MJPEG":
+                        cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*"MJPG"))  # type: ignore[attr-defined]
+                    elif self.cv_format == "YUYV":
+                        cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*"YUYV"))  # type: ignore[attr-defined]
+                    cap.set(cv2.CAP_PROP_FRAME_WIDTH, self.width)
+                    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, self.height)
+                    cap.set(cv2.CAP_PROP_FPS, self.fps)
+                    if not cap.isOpened():
+                        raise ValueError(f"Cannot open OpenCV camera {device_index}")
+                    self.cameras.append({"type": "cv", "handle": cap})
+                    print(f"OpenCV camera {device_index} opened successfully")
+                except Exception as e:
+                    print(f"Failed to open OpenCV camera {dev}: {e}")
+    def get_images(self):
+        images = []
+        if len(self.cameras) == 0:
+            # Return dummy images if no cameras available - use 640x480 which is expected by the model
+            for _ in range(max(1, len(self.device_ids))):
+                dummy_img = np.zeros((self.height, self.width, 3), dtype=np.uint8)
+                dummy_img[:, :, :] = 128  # Gray color instead of black
+                images.append(dummy_img)
+            return images
+        for cam in self.cameras:
+            if cam["type"] == "rs":
+                try:
+                    pipeline = cam["handle"]
+                    frames = pipeline.wait_for_frames()
+                    color_frame = frames.get_color_frame()
+                    if not color_frame:
+                        dummy_img = np.zeros((self.height, self.width, 3), dtype=np.uint8)
+                        dummy_img[:, :, :] = 128
+                        images.append(dummy_img)
+                    else:
+                        img = np.asanyarray(color_frame.get_data())
+                        images.append(img)
+                except Exception as e:
+                    print(f"Error reading from RealSense: {e}")
+                    dummy_img = np.zeros((self.height, self.width, 3), dtype=np.uint8)
+                    dummy_img[:, :, :] = 128
+                    images.append(dummy_img)
+            elif cam["type"] == "cv":
+                cap = cam["handle"]
+                ret, frame = cap.read()
+                if not ret or frame is None:
+                    dummy_img = np.zeros((self.height, self.width, 3), dtype=np.uint8)
+                    dummy_img[:, :, :] = 128
+                    images.append(dummy_img)
+                else:
+                    images.append(frame)
+        return images
+    def release(self):
+        for cam in self.cameras:
+            if cam["type"] == "rs":
+                try:
+                    cam["handle"].stop()
+                except Exception:
+                    pass
+            elif cam["type"] == "cv":
+                try:
+                    cam["handle"].release()
+                except Exception:
+                    pass
+        self.cameras = []

starforce.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+

starhelm/starhelm/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .websocket_client_policy import WebsocketClientPolicy
2	+
3	+ __version__ = "0.1.0"

starhelm/starhelm/image_tools_test.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import numpy as np
+import openpi_client.image_tools as image_tools
+def test_resize_with_pad_shapes():
+    # Test case 1: Resize image with larger dimensions
+    images = np.zeros((2, 10, 10, 3), dtype=np.uint8)  # Input images of shape (batch_size, height, width, channels)
+    height = 20
+    width = 20
+    resized_images = image_tools.resize_with_pad(images, height, width)
+    assert resized_images.shape == (2, height, width, 3)
+    assert np.all(resized_images == 0)
+    # Test case 2: Resize image with smaller dimensions
+    images = np.zeros((3, 30, 30, 3), dtype=np.uint8)
+    height = 15
+    width = 15
+    resized_images = image_tools.resize_with_pad(images, height, width)
+    assert resized_images.shape == (3, height, width, 3)
+    assert np.all(resized_images == 0)
+    # Test case 3: Resize image with the same dimensions
+    images = np.zeros((1, 50, 50, 3), dtype=np.uint8)
+    height = 50
+    width = 50
+    resized_images = image_tools.resize_with_pad(images, height, width)
+    assert resized_images.shape == (1, height, width, 3)
+    assert np.all(resized_images == 0)
+    # Test case 3: Resize image with odd-numbered padding
+    images = np.zeros((1, 256, 320, 3), dtype=np.uint8)
+    height = 60
+    width = 80
+    resized_images = image_tools.resize_with_pad(images, height, width)
+    assert resized_images.shape == (1, height, width, 3)
+    assert np.all(resized_images == 0)

test_starhelm.py ADDED Viewed

	@@ -0,0 +1,88 @@

+"""
+read data from lerobot
+requesting the server
+"""
+from starhelm.websocket_client_policy import WebsocketClientPolicy
+import time
+import torch
+from vlaholo.datasets.lerobot_dataset import LeRobotDataset
+# from lerobot.datasets.lerobot_dataset import LeRobotDataset
+# from starforce.data.dataset import LeRobotSingleDataset as LeRobotDataset
+import sys
+import os
+os.environ.pop("http_proxy", None)
+os.environ.pop("https_proxy", None)
+os.environ.pop("all_proxy", None)
+def get_dummy_data():
+    """
+    Same data loader from vlaholo
+    """
+    dataset_repo_id = "data/qz_zz/0801_task9/pick"
+    dataset = LeRobotDataset(dataset_repo_id, episodes=[10], video_backend="pyav")
+    dataloader = torch.utils.data.DataLoader(
+        dataset,
+        num_workers=0,
+        batch_size=1,
+    )
+    batch = next(batch for i, batch in enumerate(dataloader) if i == 29)
+    return dataset, batch
+def tensor_as_np_image(t):
+    return t[0].permute(1, 2, 0).cpu().unsqueeze(0).numpy()
+if __name__ == "__main__":
+    # vla_model = WebsocketClientPolicy(host="172.16.0.171", port=9001)
+    vla_model = WebsocketClientPolicy(host="172.16.0.111", port=9001)
+    ds, batch_data = get_dummy_data()
+    print(batch_data.keys())
+    t0 = time.time()
+    benchmark_iters = 30
+    for _ in range(benchmark_iters):
+        # print(batch)
+        t00 = time.time()
+        # hwc 0-1 numpy array
+        image_cam_high = tensor_as_np_image(batch_data["observation.images.cam_high"])
+        image_cam_left = tensor_as_np_image(batch_data["observation.images.cam_left_wrist"])
+        image_cam_right = tensor_as_np_image(batch_data["observation.images.cam_right_wrist"])
+        # [1, H, W, 3] 0-1 pixelvalue
+        print(f"image_cam_high: {image_cam_high.shape}")
+        # obs format
+        obs = {
+            "images": {
+                "cam_high": image_cam_high,
+                "cam_left_wrist": image_cam_left,
+                "cam_right_wrist": image_cam_right,
+            },
+            # state: [1, 14]
+            "state": batch_data["observation.state"].cpu().numpy(),
+            # str language
+            "prompt": batch_data["task"],
+            # for verbose
+            "debug": True,
+        }
+        action = vla_model.infer(obs=obs)
+        print(
+            "##info, action:",
+            action,
+            time.time() - t00,
+        )
+        break
+    t1 = time.time()
+    print(f"cost: {t1-t0:.3f}, avg: {(t1-t0)/benchmark_iters}")

tests/alter_lerobot_key.py ADDED Viewed

	@@ -0,0 +1,53 @@

+"""
+change a key of an existed lerobot dataset
+"""
+import os
+import shutil
+from pathlib import Path
+from datasets import load_dataset
+from vlaholo.datasets.lerobot_dataset import LeRobotDataset
+# Paths
+# old_root = Path("data/qz/lerobot_data/airbot_datasets/airbot_data_0724/pick")
+old_root = Path("data/sl/0721pre_data_v3")
+old_root = Path("/pfs/data/yangcheng/.cache/huggingface/lerobot/pick")
+new_root = old_root / 'new'
+# 1. Load the existing dataset using LeRobotDataset
+old_ds = LeRobotDataset(repo_id=old_root)
+print(f'done')
+# 2. Create a fresh LeRobotDataset at the new location with the same metadata
+new_ds = LeRobotDataset.create(
+    repo_id="converted_dataset",
+    fps=old_ds.meta.info["fps"],
+    root=new_root,
+    features=old_ds.meta.info["features"],
+    use_videos=old_ds.meta.info.get("video", True),
+)
+# 3. Copy auxiliary folders (videos, tasks, stats, info)
+for folder in ["videos", "meta"]:
+    src = old_root / folder
+    dst = new_root / folder
+    if src.exists():
+        shutil.copytree(src, dst, dirs_exist_ok=True)
+# 4. Iterate through episodes and rewrite parquet with renamed key
+for ep_idx in range(old_ds.meta.total_episodes):
+    # Load episode data via Hugging Face datasets
+    data_path = old_root / old_ds.meta.get_data_file_path(ep_idx)
+    ds = load_dataset(
+        "parquet",
+        data_files=[str(data_path)],
+        split="train",
+    )
+    # Rename column
+    ds = ds.rename_column("state", "observation.state")
+    # Write back via LeRobotDataset's internal method
+    out_path = new_root / new_ds.meta.get_data_file_path(ep_idx)
+    ds.to_parquet(str(out_path))
+print(f"✅ New dataset with renamed key saved to {new_root}")

tests/async_client.py ADDED Viewed

	@@ -0,0 +1,602 @@

+import time
+import threading
+import json_numpy
+import numpy as np
+import requests
+import queue
+import cv2
+import pickle, os
+from matplotlib.pyplot import step
+from typing import Any, cast
+json_numpy.patch()
+from airbot_py.arm import AIRBOTPlay, RobotMode, SpeedProfile
+# Import RTCController from separate module
+from rtc_controller import RTCController
+from ruckig_planner import RuckigPlanner
+try:
+    import pyrealsense2 as rs
+except ImportError:
+    print("Warning: pyrealsense2 not available. Camera functionality will be limited.")
+    rs = None
+# Help static analyzers: treat rs as dynamic Any when available
+if rs is not None:
+    rs = cast(Any, rs)
+class CameraWrapper:
+    def __init__(self, devices=None, width=640, height=480, fps=30, num_realsense=0, cv_format="MJPEG"):
+        self.width = width
+        self.height = height
+        self.fps = fps
+        self.num_realsense = max(0, int(num_realsense))
+        self.cv_format = cv_format
+        self.cameras = []  # list of dicts: {type: 'rs'|'cv', handle: pipeline|cap}
+        self.device_ids = devices if devices is not None else []
+        self._open_cameras()
+        print(f'successfully opened {len(self.cameras)} cameras!')
+    def _open_cameras(self):
+        if not self.device_ids:
+            print("No devices provided for CameraWrapper")
+            return
+        for idx, dev in enumerate(self.device_ids):
+            # Decide camera type
+            use_realsense = (idx < self.num_realsense)
+            if use_realsense:
+                if rs is None:
+                    print(f"pyrealsense2 not available, skipping RealSense device at index {idx} (id: {dev})")
+                    continue
+                try:
+                    serial = str(dev)
+                    pipeline = rs.pipeline()  # type: ignore[attr-defined]
+                    config = rs.config()  # type: ignore[attr-defined]
+                    config.enable_device(serial)
+                    config.enable_stream(rs.stream.color, self.width, self.height, rs.format.bgr8, self.fps)  # type: ignore[attr-defined]
+                    pipeline.start(config)
+                    self.cameras.append({"type": "rs", "handle": pipeline})
+                    print(f"RealSense camera {serial} opened successfully")
+                except Exception as e:
+                    print(f"Failed to open RealSense camera {dev}: {e}")
+            else:
+                try:
+                    device_index = int(dev)
+                    cap = cv2.VideoCapture(device_index)
+                    if self.cv_format == "MJPEG":
+                        cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*"MJPG"))  # type: ignore[attr-defined]
+                    elif self.cv_format == "YUYV":
+                        cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*"YUYV"))  # type: ignore[attr-defined]
+                    cap.set(cv2.CAP_PROP_FRAME_WIDTH, self.width)
+                    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, self.height)
+                    cap.set(cv2.CAP_PROP_FPS, self.fps)
+                    if not cap.isOpened():
+                        print(f"Error: Cannot open OpenCV camera {device_index}")
+                        continue
+                    self.cameras.append({"type": "cv", "handle": cap})
+                    print(f"OpenCV camera {device_index} opened successfully")
+                except Exception as e:
+                    print(f"Failed to open OpenCV camera {dev}: {e}")
+    def get_images(self):
+        images = []
+        if len(self.cameras) == 0:
+            # Return dummy images if no cameras available - use 640x480 which is expected by the model
+            for _ in range(max(1, len(self.device_ids))):
+                dummy_img = np.zeros((self.height, self.width, 3), dtype=np.uint8)
+                dummy_img[:, :, :] = 128  # Gray color instead of black
+                images.append(dummy_img)
+            return images
+        for cam in self.cameras:
+            if cam["type"] == "rs":
+                try:
+                    pipeline = cam["handle"]
+                    frames = pipeline.wait_for_frames()
+                    color_frame = frames.get_color_frame()
+                    if not color_frame:
+                        dummy_img = np.zeros((self.height, self.width, 3), dtype=np.uint8)
+                        dummy_img[:, :, :] = 128
+                        images.append(dummy_img)
+                    else:
+                        img = np.asanyarray(color_frame.get_data())
+                        images.append(img)
+                except Exception as e:
+                    print(f"Error reading from RealSense: {e}")
+                    dummy_img = np.zeros((self.height, self.width, 3), dtype=np.uint8)
+                    dummy_img[:, :, :] = 128
+                    images.append(dummy_img)
+            elif cam["type"] == "cv":
+                cap = cam["handle"]
+                ret, frame = cap.read()
+                if not ret or frame is None:
+                    dummy_img = np.zeros((self.height, self.width, 3), dtype=np.uint8)
+                    dummy_img[:, :, :] = 128
+                    images.append(dummy_img)
+                else:
+                    images.append(frame)
+        return images
+    def release(self):
+        for cam in self.cameras:
+            if cam["type"] == "rs":
+                try:
+                    cam["handle"].stop()
+                except Exception:
+                    pass
+            elif cam["type"] == "cv":
+                try:
+                    cam["handle"].release()
+                except Exception:
+                    pass
+        self.cameras = []
+def normalization(state):
+    """Normalize robot state for model input.
+    - Threshold gripper: > 0.04 -> 1.0 else -1.0
+    - Keep 6 joint values unchanged
+    Returns np.ndarray of shape (7,)
+    """
+    arr = np.array(state, dtype=np.float32).copy()
+    arr[6] = 1.0 if arr[6] > 0.04 else -1.0
+    return arr
+def unnormalization(action):
+    """Unnormalize model action to robot command space.
+    - Map gripper: > 0.5 -> 0.06850814 (open), else -> 0.025 (close)
+    - Keep 6 joint values unchanged
+    Returns np.ndarray of shape (7,)
+    """
+    arr = np.array(action, dtype=np.float32).copy()
+    arr[6] = 0.06850814 if arr[6] > 0 else 0.025
+    return arr
+class RobotWrapper:
+    # Ports dict example: {'left_arm': 50051, 'right_arm': None}
+    # Only arms with a non-None port will be initialized
+    def __init__(self, url='localhost', ports=None, arm_speed='slow', type='move'):
+        if ports is None:
+            ports = {'left_arm': None, 'right_arm': None}
+        assert any(p is not None for p in ports.values()), "at least one arm port is required"
+        assert arm_speed in ['slow', 'default', 'fast'], "arm_speed must be in ['slow', default', 'fast']"
+        assert type in ['move', 'servo'], "type must be in ['move', 'servo']"
+        self.type = type
+        self.robots = {}
+        for arm_name in ['left_arm', 'right_arm']:
+            port = ports.get(arm_name)
+            if port is None:
+                continue
+            robot = AIRBOTPlay(url=url, port=port)
+            robot.connect()
+            robot.set_speed_profile(SpeedProfile.SLOW if arm_speed == 'slow' else SpeedProfile.FAST)
+            product_info = robot.get_product_info()
+            print("---------------------------------------------------------")
+            print(f"Arm: {arm_name}")
+            print(f"Product name: {product_info['product_type']}")
+            print(f"Serial number: {product_info['sn']}")
+            print(f"Simulation mode: {product_info['is_sim']}")
+            print(f"Using interfaces: {product_info['interfaces']}")
+            print(f"Installed end effectors: {product_info['eef_types']}")
+            print(f"Firmware versions: {product_info['fw_versions']}")
+            print("---------------------------------------------------------")
+            # Default initial joints per arm (optional; can be customized)
+            if arm_name == 'left_arm':
+                joints = [0.0, 0.0, 0.15, -1.7, 0.1, 1.7]
+            else:
+                joints = [0.0, 0.0, 0.15, 1.7, -0.1, -1.7]
+            # using move mode to move to initial pose
+            robot.switch_mode(RobotMode.PLANNING_POS)
+            # Overwrite with a safe neutral pose
+            joints = [0.00019073777366429567, 0.17948424816131592, 0.027656977996230125, 1.4654383659362793, -0.3435187339782715, -1.4288166761398315]
+            # joints = [0.03604944050312042, 0.17948424816131592, 0.029564354568719864, 1.6039139032363892, -0.3419928252696991, -1.5939955711364746]
+            robot.move_to_joint_pos(joints)
+            robot.move_eef_pos([0.06850814])
+            print(f'arm: {arm_name}, joints: {joints}')
+            if self.type == 'move':
+                robot.switch_mode(RobotMode.PLANNING_POS)
+            elif self.type == 'servo':
+                robot.switch_mode(RobotMode.SERVO_JOINT_POS)
+            init_joint_pos = robot.get_joint_pos()
+            init_eef_pos = robot.get_eef_pos()
+            print(f'[{arm_name}] init_joint_pos: {init_joint_pos}, init_eef_pos: {init_eef_pos}')
+            self.robots[arm_name] = robot
+            print(f"robot arm {arm_name} (port: {port}) init success!")
+            time.sleep(2)
+    def move_to_pos(self, pos, arm='right_arm'):
+        assert arm in self.robots, f"arm '{arm}' not initialized"
+        if self.type == 'move':
+            self.robots[arm].move_to_joint_pos(pos[:6], blocking=True)
+            self.robots[arm].move_eef_pos([pos[6]], blocking=True)
+        elif self.type == 'servo':
+            self.robots[arm].servo_joint_pos(pos[:6])
+            self.robots[arm].servo_eef_pos([pos[6]])
+    def get_joint_pos(self, arm='right_arm'):
+        assert arm in self.robots, f"arm '{arm}' not initialized"
+        return self.robots[arm].get_joint_pos()
+    def get_eef_pos(self, arm='right_arm'):
+        assert arm in self.robots, f"arm '{arm}' not initialized"
+        return self.robots[arm].get_eef_pos()
+    def get_state_pos(self, arm='right_arm'):
+        assert arm in self.robots, f"arm '{arm}' not initialized"
+        pos = self.robots[arm].get_joint_pos()
+        eef_pos = self.robots[arm].get_eef_pos()
+        result = pos + eef_pos
+        return result
+class ActionSmoother:
+    def __init__(self, method='exponential', alpha=0.3, window_size=5, smooth_dims=None):
+        self.method = method
+        self.window_size = window_size
+        self.smooth_dims = smooth_dims
+        if isinstance(alpha, (list, tuple, np.ndarray)):
+            self.alpha = np.array(alpha, dtype=np.float32)
+        else:
+            self.alpha = alpha
+        self.history = []
+        self.smoothed_action = None
+    def smooth_action(self, raw_action):
+        raw_action = np.array(raw_action, dtype=np.float32)
+        self.history.append(raw_action.copy())
+        if len(self.history) > self.window_size:
+            self.history = self.history[-self.window_size:]
+        if self.smoothed_action is None:
+            self.smoothed_action = raw_action.copy()
+            return self.smoothed_action
+        result_action = raw_action.copy()
+        if self.smooth_dims is None:
+            dims_to_smooth = list(range(len(raw_action)))
+        else:
+            dims_to_smooth = [d for d in self.smooth_dims if d < len(raw_action)]
+        if self.method == 'exponential':
+            if isinstance(self.alpha, np.ndarray):
+                for dim in dims_to_smooth:
+                    alpha = self.alpha[dim] if dim < len(self.alpha) else self.alpha[-1]
+                    result_action[dim] = alpha * raw_action[dim] + (1 - alpha) * self.smoothed_action[dim]
+            else:
+                for dim in dims_to_smooth:
+                    result_action[dim] = self.alpha * raw_action[dim] + (1 - self.alpha) * self.smoothed_action[dim]
+        elif self.method == 'moving_average':
+            history_array = np.array(self.history)
+            for dim in dims_to_smooth:
+                result_action[dim] = np.mean(history_array[:, dim])
+        elif self.method == 'linear_interpolation':
+            for dim in dims_to_smooth:
+                result_action[dim] = 0.7 * raw_action[dim] + 0.3 * self.smoothed_action[dim]
+        elif self.method == 'identity':
+            return result_action
+        elif self.method == 'average':
+            history_array = np.array(self.history)
+            for dim in dims_to_smooth:
+                result_action[dim] = np.mean(history_array[:, dim])
+        else:
+            raise ValueError(f"Unknown smoothing method: {self.method}")
+        self.smoothed_action = result_action.copy()
+        return result_action
+    def reset(self):
+        self.history = []
+        self.smoothed_action = None
+class VLAClient:
+    def __init__(self, server_url):
+        self.server_url = server_url
+    def predict(self, obs):
+        try:
+            response = requests.post(
+                self.server_url,
+                json={"observation": obs},
+            )
+            action_chunk = response.json()
+            actions = []
+            for arm, gripper in zip(action_chunk['action.right_arm'], action_chunk['action.right_gripper']):
+                action = np.asarray(list(arm) + [float(gripper)], dtype=np.float32)
+                actions.append(action)
+            return np.array(actions)
+        except Exception as e:
+            print(f"VLA prediction error: {e}")
+            return None
+def predict_actions(server_url, obs):
+    response = requests.post(
+        server_url,
+        json={"observation": obs},
+    )
+    return response.json()
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--use_rtc", action='store_true', default=False)
+    parser.add_argument("--fast", action='store_true', default=False)
+    args = parser.parse_args()
+    USE_RTC_CONTROLLER = args.use_rtc
+    print(f"Use RTC Controller: {USE_RTC_CONTROLLER}")
+    task_description= "Pick up each block one by one and place them all into the bowl."
+    task_description = 'Pick up each block one by one and place them all into the blue bowl.'
+    task_description = 'Pick up each block one by one and place them all into the right bowl.'
+    print("Robot System Init...")
+    robots = RobotWrapper(
+        url='localhost',
+        ports={'left_arm': None, 'right_arm': 50053},
+        arm_speed='default' if args.fast else 'slow',
+        type='servo'
+    )
+    if args.fast:
+        target_queue = queue.Queue(maxsize=30)
+        servo_queue = queue.Queue(maxsize=30)
+        rp = RuckigPlanner(robots.robots['right_arm'], robots.get_state_pos('right_arm'), DoFs=7, dt=0.02)
+        rp.start(servo_queue, target_queue)
+    # server_url = "http://106.13.248.32:10090/act"
+    server_url = "http://127.0.0.1:10090/act"
+    # server_url = "http://114.111.24.161:10090/act"
+    print(f"VLA Server URL: {server_url}")
+    # Camera System Init
+    print("Camera System Init...")
+    caps = CameraWrapper(
+        devices=["215322074711", "242622070332", 0],
+        num_realsense=2,
+        width=640,
+        height=480,
+        fps=30,
+        cv_format="MJPEG"
+    )
+    time.sleep(2)
+    print(f'Smoother System Init for Grippers...')
+    action_smoother = ActionSmoother(
+        method='average',
+        alpha=0.1,
+        window_size=10,
+        smooth_dims=[6]
+    )
+    # action_smoother = None
+    print("Loop Start!")
+    step_count = 0
+    if USE_RTC_CONTROLLER:
+        print("Using ASync (RTCController) Mode...")
+        vla_client = VLAClient(server_url)
+        def get_observation():
+            images = caps.get_images()
+            images = caps.get_images()
+            images = caps.get_images()
+            if len(images) >= 3:
+                img_right, img_front, img_env = images[:3]
+            else:
+                filler = np.zeros((480, 640, 3), dtype=np.uint8)
+                filler[:, :, :] = 128
+                imgs = images + [filler] * (3 - len(images))
+                img_right, img_front, img_env = imgs[:3]
+            arm_state = normalization(robots.get_state_pos(arm='right_arm'))
+            obs = {
+                "video.cam_head": img_front[np.newaxis, ::],
+                "video.cam_env": img_env[np.newaxis, ::],
+                "video.cam_right_wrist": img_right[np.newaxis, ::],
+                "state.right_arm": np.expand_dims(np.array(arm_state[:6], dtype=np.float32), axis=0),
+                "state.right_gripper": np.expand_dims(np.array([arm_state[6]], dtype=np.float32), axis=0),
+                "annotation.human.task_description": [task_description]
+            }
+            return obs
+        # RTCController Settings
+        H = 16
+        d = 8
+        s = 5
+        print("RTCController System Init...")
+        rtc_controller = RTCController(
+            vla_client=vla_client,
+            observation_fn=get_observation,
+            H=H,
+            d=d,
+            s=s,
+        )
+        print("predict action chunk...")
+        rtc_controller.reset()
+        if not rtc_controller.is_ready():
+            print(f'RTCController Configuration Failed!')
+            exit(1)
+        print(f"RTCController Configuration Completed")
+        print(f"RTC Parames: H={H}, d={d}, s={s}")
+        while True:
+            try:
+                loop_start_time = time.time()
+                step_count += 1
+                action = rtc_controller.step()
+                if action is None:
+                    print("RTCController is not ready...")
+                    time.sleep(0.05)
+                    continue
+                denormalized_action = unnormalization(action)
+                denormalized_action = action_smoother.smooth_action(denormalized_action) if action_smoother else denormalized_action
+                if args.fast:
+                    target_queue.put(denormalized_action.tolist())
+                else:
+                    robots.move_to_pos(denormalized_action.tolist(), arm='right_arm')
+                # if step_count % 10 == 0:
+                #     print(f"[RTC] Step {step_count}: executor_index={rtc_controller.executor_index}")
+                #     print(f"Action: {action}")
+                #     current_state = robots.get_state_pos(arm='right_arm')
+                #     print(f"Robot state: {current_state}")
+                # 20Hz
+                if args.fast:
+                    target_period = 0.1
+                else:
+                    target_period = 0.05
+                elapsed = time.time() - loop_start_time
+                sleep_time = max(0.0, target_period - elapsed)
+                if sleep_time > 0:
+                    time.sleep(sleep_time)
+                else:
+                    print(f"Loop control time cost: {elapsed:.3f}s > {target_period:.3f}s")
+            except KeyboardInterrupt:
+                print("Received interrupt signal, exiting safely...")
+                break
+            except Exception as e:
+                print(f"ASYNC control mode error: {e}")
+                time.sleep(0.05)
+    else:
+        print("Using Sync Mode...")
+        current_actions = []
+        current_action_idx = 0
+        chunk_size = 16
+        while True:
+            try:
+                loop_start_time = time.time()
+                step_count += 1
+                # 如果所有actions都执行完了，获取新的chunk
+                if current_action_idx >= len(current_actions):
+                    print(f"[Sync Mode] Obtain new action chunk...")
+                    # image
+                    images = caps.get_images()
+                    images = caps.get_images()
+                    images = caps.get_images()
+                    if len(images) >= 3:
+                        img_right, img_front, img_env = images[:3]
+                    else:
+                        filler = np.zeros((480, 640, 3), dtype=np.uint8)
+                        filler[:, :, :] = 128
+                        imgs = images + [filler] * (3 - len(images))
+                        img_right, img_front, img_env = imgs[:3]
+                    # state
+                    arm_state = normalization(robots.get_state_pos(arm='right_arm'))
+                    # obs
+                    obs = {
+                        "video.cam_head": img_front[np.newaxis, ::],
+                        "video.cam_env": img_env[np.newaxis, ::],
+                        "video.cam_right_wrist": img_right[np.newaxis, ::],
+                        "state.right_arm": np.expand_dims(np.array(arm_state[:6], dtype=np.float32), axis=0),
+                        "state.right_gripper": np.expand_dims(np.array([arm_state[6]], dtype=np.float32), axis=0),
+                        "annotation.human.task_description": [task_description]
+                    }
+                    start_time = time.time()
+                    action_chunk = predict_actions(server_url, obs)
+                    print(f"inference time cost: {time.time() - start_time:.3f}s")
+                    current_actions = []
+                    current_action_idx = 0
+                    for arm, gripper in zip(action_chunk['action.right_arm'], action_chunk['action.right_gripper']):
+                        action = np.asarray(list(arm) + [float(gripper)], dtype=np.float32)
+                        current_actions.append(action)
+                    print(f"Obtainbed {len(current_actions)} action steps...")
+                if current_action_idx < len(current_actions):
+                    raw_action = current_actions[current_action_idx]
+                    current_action_idx += 1
+                    smoothed_action = action_smoother.smooth_action(raw_action)
+                    denormalized_action = unnormalization(smoothed_action)
+                    robots.move_to_pos(denormalized_action.tolist(), arm='right_arm')
+                    if step_count % 10 == 0:
+                        print(f"[Sync Mode] Step {step_count}: action_idx={current_action_idx}/{len(current_actions)}")
+                        print(f"   Raw: {raw_action}")
+                        print(f"   Smoothed: {smoothed_action}")
+                # 20Hz control
+                time.sleep(0.05)
+            except KeyboardInterrupt:
+                print("Received interrupt signal, exiting safely...")
+                break
+            except Exception as e:
+                print(f"SYNC control mode error: {e}")
+                time.sleep(0.05)
+    print("Clearing resources...")
+    try:
+        caps.release()
+        print("Camera resources released")
+    except Exception as e:
+        print(f"Camera clearing error: {e}")
+    print("Program ended")

tests/install_av_opencv.sh ADDED Viewed

	@@ -0,0 +1,5 @@

+# make sure ffmpeg have av codec
+sudo apt install libavcodec-dev libavformat-dev libavutil-dev libswscale-dev ffmpeg
+export CMAKE_ARGS="-D WITH_FFMPEG=ON"
+pip install --no-binary opencv-python --no-deps opencv-python -v

tests/modality.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+    "state": {
+        "left_arm": {
+            "start": 0,
+            "end": 6
+        },
+        "left_gripper": {
+            "start": 6,
+            "end": 7
+        },
+        "right_arm": {
+            "start": 7,
+            "end": 13
+        },
+        "right_gripper": {
+            "start": 13,
+            "end": 14
+        }
+    },
+    "action": {
+        "left_arm": {
+            "start": 0,
+            "end": 6
+        },
+        "left_gripper": {
+            "start": 6,
+            "end": 7
+        },
+        "right_arm": {
+            "start": 7,
+            "end": 13
+        },
+        "right_gripper": {
+            "start": 13,
+            "end": 14
+        }
+    },
+    "video": {
+        "cam_high": {
+            "original_key": "observation.images.cam_high"
+        },
+        "cam_left_wrist": {
+            "original_key": "observation.images.cam_left_wrist"
+        },
+        "cam_right_wrist": {
+            "original_key": "observation.images.cam_right_wrist"
+        }
+    },
+    "annotation": {
+        "human.action.task_description": {
+            "original_key": "task_index"
+        }
+    }
+}

tests/replay_sl.py ADDED Viewed

	@@ -0,0 +1,628 @@

+'''
+load lerobot dataset and replay it on REAL ARM
+'''
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+'''
+test for control Agilex arm.
+'''
+import os
+import sys
+import time
+import argparse
+import threading
+import json
+import pandas as pd
+import numpy as np
+import json
+import time
+import logging
+import argparse
+import threading
+from pathlib import Path
+from typing import Dict, Any, List, Optional
+from piper_sdk import C_PiperInterface_V2
+# 配置日志
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.FileHandler('arm_replay.log'),
+        logging.StreamHandler()
+    ]
+)
+logger = logging.getLogger(__name__)
+class ArmDataReplayer:
+    """机械臂数据重播器"""
+    def __init__(self, arms_config: Dict[str, str]):
+        """
+        初始化重播器
+        Args:
+            arms_config: 机械臂配置 {'left': 'can_left', 'right': 'can_right'}
+        """
+        self.arms_config = arms_config
+        # 机械臂接口
+        self.arms = {}
+        self.is_connected = False
+        # 重播数据
+        self.replay_data = None
+        self.replay_metadata = None
+        # 重播控制
+        self.is_replaying = False
+        self.replay_thread = None
+        self.replay_speed = 1.0
+        self.current_frame = 0
+        self.total_frames = 0
+        # 重播统计
+        self.frames_replayed = 0
+        self.start_time = None
+        logger.info("数据重播器初始化完成")
+    def connect_arms(self) -> bool:
+        """连接机械臂"""
+        try:
+            for arm_name, can_name in self.arms_config.items():
+                logger.info(f"正在连接{arm_name}臂 ({can_name})...")
+                arm = C_PiperInterface_V2(
+                    can_name=can_name,
+                    judge_flag=False,
+                    can_auto_init=True,
+                    dh_is_offset=1,
+                    start_sdk_joint_limit=False,
+                    start_sdk_gripper_limit=False
+                )
+                arm.ConnectPort()
+                self.arms[arm_name] = arm
+                # 等待连接稳定
+                time.sleep(1.0)
+                arm.EnableArm()
+                time.sleep(0.5)
+                # 验证连接
+                test_msg = arm.GetArmJointMsgs()
+                if test_msg and test_msg.Hz > 0:
+                    logger.info(f"✅ {arm_name}臂连接成功")
+                else:
+                    logger.warning(f"⚠️ {arm_name}臂连接可能不稳定")
+            self.is_connected = True
+            logger.info("所有机械臂连接完成")
+            return True
+        except Exception as e:
+            logger.error(f"连接机械臂失败: {e}")
+            self.disconnect_arms()
+            return False
+    def disconnect_arms(self):
+        """断开机械臂连接"""
+        try:
+            for arm_name, arm in self.arms.items():
+                if arm:
+                    arm.DisconnectPort()
+                    logger.info(f"{arm_name}臂已断开连接")
+            self.arms.clear()
+            self.is_connected = False
+            logger.info("所有机械臂已断开连接")
+        except Exception as e:
+            logger.error(f"断开连接失败: {e}")
+    def load_hdf5_data(self, filename: str) -> bool:
+        """加载HDF5格式数据"""
+        try:
+            with h5py.File(filename, 'r') as f:
+                # 读取元数据
+                self.replay_metadata = {
+                    'collection_frequency': f['metadata'].attrs.get('collection_frequency', 30),
+                    'total_samples': f['metadata'].attrs.get('total_samples', 0),
+                    'arms': f['metadata'].attrs.get('arms', []),
+                    'start_time': f['metadata'].attrs.get('start_time', 0),
+                    'creation_time': f['metadata'].attrs.get('creation_time', '')
+                }
+                # 读取时间数据
+                timestamps = f['timestamps'][:]
+                relative_times = f['relative_times'][:]
+                # 读取机械臂数据
+                arms_data = {}
+                for arm_name in self.replay_metadata['arms']:
+                    if f'arms/{arm_name}' in f:
+                        arm_group = f[f'arms/{arm_name}']
+                        arms_data[arm_name] = {
+                            'joint_positions': arm_group['joint_positions'][:],
+                            'gripper_angles': arm_group['gripper_angles'][:],
+                            'gripper_efforts': arm_group['gripper_efforts'][:]
+                        }
+                        # 兼容性处理：检查是否有gripper_positions字段
+                        if 'gripper_positions' in arm_group:
+                            arms_data[arm_name]['gripper_positions'] = arm_group['gripper_positions'][:]
+                        else:
+                            # 如果没有位置数据，使用零填充
+                            arms_data[arm_name]['gripper_positions'] = np.zeros_like(arms_data[arm_name]['gripper_angles'])
+                # 重构数据格式
+                self.replay_data = []
+                for i in range(len(timestamps)):
+                    frame = {
+                        'timestamp': timestamps[i],
+                        'relative_time': relative_times[i],
+                        'frame_index': i,
+                        'arms': {}
+                    }
+                    for arm_name, data in arms_data.items():
+                        if i < len(data['joint_positions']):
+                            frame['arms'][arm_name] = {
+                                'joint_positions': data['joint_positions'][i],
+                                'gripper_angle': data['gripper_angles'][i],
+                                'gripper_position': data['gripper_positions'][i],
+                                'gripper_effort': data['gripper_efforts'][i]
+                            }
+                    self.replay_data.append(frame)
+                self.total_frames = len(self.replay_data)
+                logger.info(f"HDF5数据加载完成: {self.total_frames}帧")
+                logger.info(f"包含机械臂: {self.replay_metadata['arms']}")
+                logger.info(f"原始采集频率: {self.replay_metadata['collection_frequency']}Hz")
+                return True
+        except Exception as e:
+            logger.error(f"加载HDF5数据失败: {e}")
+            return False
+    def load_lerobot_data(self, data_dir: str, episode_idx: int = 0) -> bool:
+        """
+        加载LeRobot格式的数据集目录。
+        Args:
+            data_dir: LeRobot数据集的根目录路径。
+            episode_idx: 要加载的episode索引。
+        """
+        try:
+            logger.info(f"开始加载LeRobot数据集: {data_dir}, Episode: {episode_idx}")
+            root_path = Path(data_dir)
+            info_path = root_path / 'meta' / 'info.json'
+            data_path = root_path / 'data'
+            if not (root_path.is_dir() and info_path.exists() and data_path.exists()):
+                logger.error(f"无效的LeRobot数据集目录结构。缺少 meta/info.json 或 data 目录。")
+                return False
+            with open(info_path, 'r', encoding='utf-8') as f:
+                info = json.load(f)
+            self.replay_metadata = {
+                'collection_frequency': info.get('fps', 30),
+                'arms': ['left', 'right'],
+                'lerobot_info': info
+            }
+            logger.info(f"数据集元信息加载完成。采集频率: {self.replay_metadata['collection_frequency']}Hz")
+            # 找到对应的parquet文件
+            # 假设 chunk 总是 000
+            parquet_file = data_path / 'chunk-000' / f'episode_{episode_idx:06d}.parquet'
+            if not parquet_file.exists():
+                logger.error(f"未找到Episode {episode_idx} 的数据文件: {parquet_file}")
+                return False
+            logger.info(f"正在读取数据文件: {parquet_file}")
+            df = pd.read_parquet(parquet_file)
+            self.replay_data = []
+            # LeRobot的action是puppet(从)臂的下一个state，所以我们用action作为目标
+            # names是一个嵌套列表
+            action_names = info['features']['action']['names'][0]
+            # 创建映射
+            joint_indices = {'left': [None]*6, 'right': [None]*6}
+            gripper_indices = {'left': -1, 'right': -1}
+            for i, name in enumerate(action_names):
+                if 'masterLeft' in name:
+                    if 'joint6' in name:
+                        gripper_indices['left'] = i
+                    else:
+                        # 从 '...joint0' 中提取数字 0
+                        joint_num = int(name.split('joint')[-1])
+                        if 0 <= joint_num < 6:
+                            joint_indices['left'][joint_num] = i
+                elif 'masterRight' in name:
+                    if 'joint6' in name:
+                        gripper_indices['right'] = i
+                    else:
+                        joint_num = int(name.split('joint')[-1])
+                        if 0 <= joint_num < 6:
+                            joint_indices['right'][joint_num] = i
+            logger.info(f"解析出的左臂关节索引: {joint_indices['left']}")
+            logger.info(f"解析出的左臂夹爪索引: {gripper_indices['left']}")
+            logger.info(f"解析出的右臂关节索引: {joint_indices['right']}")
+            logger.info(f"解析出的右臂夹爪索引: {gripper_indices['right']}")
+            for i, row in df.iterrows():
+                frame = {
+                    'timestamp': row.get('timestamp', time.time()),
+                    'relative_time': row.get('timestamp', 0) - df['timestamp'].iloc[0] if 'timestamp' in df else i / self.replay_metadata['collection_frequency'],
+                    'frame_index': i,
+                    'arms': {}
+                }
+                action_values = row['action']
+                for arm_name in ['left', 'right']:
+                    # LeRobot的action通常是下一个state，所以直接用作目标
+                    joint_positions = np.array([action_values[j] for j in joint_indices[arm_name]])
+                    # 夹爪数据，LeRobot通常是-1到1，需要映射到0-60度
+                    # 这里的 aloha_arm 数据集，夹爪值在-1到1之间，-1为闭合，1为张开
+                    gripper_action = action_values[gripper_indices[arm_name]]
+                    # 映射: 1 -> 0度 (张开), -1 -> 60度 (闭合)
+                    gripper_angle = (1 - gripper_action) / 2 * 60
+                    frame['arms'][arm_name] = {
+                        'joint_positions': joint_positions,
+                        'gripper_angle': gripper_angle,
+                    }
+                self.replay_data.append(frame)
+            self.total_frames = len(self.replay_data)
+            logger.info(f"LeRobot数据加载完成: {self.total_frames}帧")
+            logger.info(f"包含机械臂: {list(self.replay_data[0]['arms'].keys())}")
+            return True
+        except Exception as e:
+            logger.error(f"加载LeRobot数据失败: {e}", exc_info=True)
+            return False
+    def load_data(self, path: str, episode_idx: int = 0) -> bool:
+        """自动检测并加载数据"""
+        filepath = Path(path)
+        if not filepath.exists():
+            logger.error(f"数据文件或目录不存在: {path}")
+            return False
+        if filepath.is_dir():
+            # 认为是LeRobot数据集目录
+            return self.load_lerobot_data(path, episode_idx=episode_idx)
+        elif filepath.suffix.lower() == '.h5' or filepath.suffix.lower() == '.hdf5':
+            return self.load_hdf5_data(path)
+        # elif filepath.suffix.lower() == '.json': # 旧的逻辑，暂时禁用
+        else:
+            logger.error(f"不支持的文件格式或路径类型: {path}")
+            return False
+    def back_to_zero_position(self) -> bool:
+        for k, piper in self.arms:
+            logger.info(f'==> processing {k}')
+            # piper = self.arms.get('right', None)
+            # piper.JointConfig(joint_num=7, set_zero=0xAE)
+            # piper.GripperCtrl(set_zero=0xAE)
+            piper.JointCtrl(
+                joint_1=0,    # 0度
+                joint_2=0,    # 0度
+                joint_3=0,    # 0度
+                joint_4=0,    # 0度
+                joint_5=0,    # 0度
+                joint_6=0     # 0度
+            )
+            joint_msgs = piper.GetArmJointMsgs()
+            print(f"关节状态: {joint_msgs}")
+            joint1_angle = joint_msgs.joint_state.joint_1
+            print(f"关节1角度: {joint1_angle/1000.0} 度")
+            joint2_angle = joint_msgs.joint_state.joint_2
+            print(f"关节2角度: {joint2_angle/1000.0} 度")
+            joint3_angle = joint_msgs.joint_state.joint_3
+            print(f"关节3角度: {joint3_angle/1000.0} 度")
+            joint4_angle = joint_msgs.joint_state.joint_4
+            print(f"关节4角度: {joint4_angle/1000.0} 度")
+            joint5_angle = joint_msgs.joint_state.joint_5
+            print(f"关节5角度: {joint5_angle/1000.0} 度")
+            joint6_angle = joint_msgs.joint_state.joint_6
+            print(f"关节6角度: {joint6_angle/1000.0} 度")
+            joint7_angle = joint_msgs.joint_state.joint_7
+            print(f"关节7角度: {joint7_angle/1000.0} 度")
+            # 获取夹爪状态
+            gripper_msgs = piper.GetArmGripperMsgs()
+            print(f"夹爪状态: {gripper_msgs}")
+        return True
+    def move_to_start_position(self) -> bool:
+        """移动到起始位置"""
+        if not self.is_connected or not self.replay_data:
+            logger.error("机械臂未连接或数据未加载")
+            return False
+        try:
+            start_frame = self.replay_data[0]
+            logger.info("正在移动到起始位置...")
+            print(start_frame)
+            self.back_to_zero_position()
+            for arm_name, arm in self.arms.items():
+                if arm_name in start_frame['arms']:
+                    target_joint_positions = start_frame['arms'][arm_name]['joint_positions']
+                    target_gripper_angle = start_frame['arms'][arm_name]['gripper_angle']
+                    # 转换弧度到0.001度
+                    target_joint_positions_deg = (target_joint_positions * 180.0 * 1000.0 / np.pi).astype(int)
+                    # 设置控制模式和速度
+                    arm.MotionCtrl_2(ctrl_mode=0x01, move_mode=0x01, move_spd_rate_ctrl=20)
+                    # 发送关节控制指令
+                    arm.JointCtrl(
+                        int(target_joint_positions_deg[0]), int(target_joint_positions_deg[1]),
+                        int(target_joint_positions_deg[2]), int(target_joint_positions_deg[3]),
+                        int(target_joint_positions_deg[4]), int(target_joint_positions_deg[5])
+                    )
+                    # 夹爪控制
+                    arm.GripperCtrl(int(target_gripper_angle * 1000.0), 0, 0x01, 0)
+                    logger.info(f"{arm_name}臂目标关节位置 (0.001度): {target_joint_positions_deg.tolist()}")
+                    logger.info(f"{arm_name}臂目标夹爪位置 (度): {target_gripper_angle:.2f}")
+            # 等待移动完成
+            logger.info("等待机械臂移动到起始位置...")
+            time.sleep(3.0)
+            logger.info("==> 移动到起始位置完成")
+            return True
+        except Exception as e:
+            logger.error(f"移动到起始位置失败: {e}")
+            return False
+    def reset_to_zero(self):
+        """重置到零位置"""
+        for arm_name, arm in self.arms.items():
+            arm.GripperCtrl(0, 0, 0x01, 0)
+            arm.JointCtrl(0, 0, 0, 0, 0, 0)
+            time.sleep(0.5)
+            logger.info(f"{arm_name}臂重置到零位置完成")
+        return True
+    def start_replay(self, speed: float = 1.0, start_frame: int = 0, end_frame: Optional[int] = None):
+        """开始重播"""
+        if not self.is_connected or not self.replay_data:
+            logger.error("机械臂未连接或数据未加载")
+            return False
+        if self.is_replaying:
+            logger.warning("重播已在进行中")
+            return False
+        self.replay_speed = speed
+        self.current_frame = start_frame
+        self.frames_replayed = 0
+        self.start_time = time.time()
+        if end_frame is None:
+            end_frame = self.total_frames
+        # 启动重播线程
+        self.is_replaying = True
+        self.replay_thread = threading.Thread(
+            target=self._replay_loop,
+            args=(start_frame, end_frame),
+            daemon=True
+        )
+        self.replay_thread.start()
+        logger.info(f"开始重播数据")
+        logger.info(f"重播速度: {speed}x")
+        logger.info(f"帧范围: {start_frame} - {end_frame}")
+        return True
+    def stop_replay(self):
+        """停止重播"""
+        if not self.is_replaying:
+            return
+        self.is_replaying = False
+        if self.replay_thread:
+            self.replay_thread.join(timeout=2.0)
+        logger.info("重播已停止")
+        logger.info(f"总重播帧数: {self.frames_replayed}")
+    def _replay_loop(self, start_frame: int, end_frame: int):
+        """重播循环"""
+        original_frequency = self.replay_metadata.get('collection_frequency', 30)
+        base_dt = 1.0 / original_frequency
+        adjusted_dt = base_dt / self.replay_speed
+        logger.info(f"原始频率: {original_frequency}Hz, 调整后间隔: {adjusted_dt:.4f}s")
+        for frame_idx in range(start_frame, min(end_frame, self.total_frames)):
+            if not self.is_replaying:
+                break
+            frame_start_time = time.time()
+            try:
+                frame = self.replay_data[frame_idx]
+                self.current_frame = frame_idx
+                # 执行关节控制
+                for arm_name, arm in self.arms.items():
+                    if arm_name in frame['arms']:
+                        arm_data = frame['arms'][arm_name]
+                        target_joint_positions = arm_data['joint_positions']
+                        target_gripper_angle = arm_data['gripper_angle']
+                        # 转换弧度到0.001度
+                        target_joint_positions_deg = (target_joint_positions * 180.0 * 1000.0 / np.pi).astype(int)
+                        # 发送关节控制指令
+                        arm.JointCtrl(
+                            int(target_joint_positions_deg[0]), int(target_joint_positions_deg[1]),
+                            int(target_joint_positions_deg[2]), int(target_joint_positions_deg[3]),
+                            int(target_joint_positions_deg[4]), int(target_joint_positions_deg[5])
+                        )
+                        # 夹爪控制
+                        arm.GripperCtrl(int(target_gripper_angle * 1000.0), 0, 0x01, 0)
+                self.frames_replayed += 1
+                # 等待下一个周期
+                elapsed_time = time.time() - frame_start_time
+                sleep_time = adjusted_dt - elapsed_time
+                if sleep_time > 0:
+                    time.sleep(sleep_time)
+            except Exception as e:
+                logger.error(f"重播帧 {frame_idx} 时发生错误: {e}")
+                self.is_replaying = False
+                break
+        self.is_replaying = False
+        logger.info("重播循环结束")
+        self.reset_to_zero()
+    def get_replay_info(self) -> Dict[str, Any]:
+        """获取重播信息"""
+        return {
+            'total_frames': self.total_frames,
+            'current_frame': self.current_frame,
+            'frames_replayed': self.frames_replayed,
+            'is_replaying': self.is_replaying,
+            'replay_speed': self.replay_speed,
+            'metadata': self.replay_metadata
+        }
+    def get_frame_data(self, frame_index: int) -> Optional[Dict[str, Any]]:
+        """获取指定帧的数据"""
+        if not self.replay_data or frame_index >= len(self.replay_data):
+            return None
+        return self.replay_data[frame_index]
+    def seek_to_frame(self, frame_index: int):
+        """跳转到指定帧"""
+        if not self.replay_data:
+            logger.error("没有数据可跳转")
+            return False
+        if frame_index < 0 or frame_index >= self.total_frames:
+            logger.error(f"帧索引超出范围: {frame_index}")
+            return False
+        self.current_frame = frame_index
+        logger.info(f"跳转到帧: {frame_index}")
+        return True
+def main():
+    """主函数"""
+    parser = argparse.ArgumentParser(description="机械臂数据重播工具")
+    parser.add_argument(
+        '--path',
+        type=str,
+        required=True,
+        help="要重播的数据文件路径(.h5)或LeRobot数据集目录"
+    )
+    parser.add_argument(
+        '--episode',
+        type=int,
+        default=0,
+        help="当路径为LeRobot目录时，指定要重播的episode索引"
+    )
+    parser.add_argument(
+        '--speed',
+        type=float,
+        default=1.0,
+        help="重播速度倍率"
+    )
+    parser.add_argument(
+        '--left-can',
+        type=str,
+        default='can_left',
+        help="左臂CAN接口名称"
+    )
+    parser.add_argument(
+        '--right-can',
+        type=str,
+        default='can_right',
+        help="右臂CAN接口名称"
+    )
+    args = parser.parse_args()
+    arms_config = {
+        'left': args.left_can,
+        'right': args.right_can
+    }
+    replayer = ArmDataReplayer(arms_config)
+    # 连接机械臂
+    if not replayer.connect_arms():
+        logger.error("无法连接到机械臂，程序退出。")
+        return
+    # 加载数据
+    if not replayer.load_data(args.path, episode_idx=args.episode):
+        logger.error("数据加载失败，程序退出。")
+        replayer.disconnect_arms()
+        return
+    try:
+        # 移动到起始位置
+        if not replayer.move_to_start_position():
+            logger.error("移动到起始位置失败，程序退出。")
+            return
+        logger.info("准备开始重播...")
+        # # 开始重播
+        # replayer.start_replay(speed=args.speed)
+        # # 保持主线程运行，直到重播完成或用户中断
+        # while replayer.is_replaying:
+        #     info = replayer.get_replay_info()
+        #     print(f"\r重播中... 帧: {info['current_frame']}/{info['total_frames']} ({(info['current_frame']+1)*100/info['total_frames']:.1f}%)", end="")
+        #     time.sleep(0.5)
+        # print("\n重播完成。")
+    except KeyboardInterrupt:
+        logger.info("接收到中断信号，正在停止...")
+    finally:
+        replayer.stop_replay()
+        replayer.disconnect_arms()
+        logger.info("程序已清理并退出。")
+if __name__ == "__main__":
+    main()

tests/save_s1.py ADDED Viewed

	@@ -0,0 +1,70 @@

+"""
+Saving a s1 pretrained model for training
+"""
+import torch
+from starforce.model.starforce_s1 import Starforce_S1, Starforce_S1_Config
+from starforce.model.action_head.flow_matching_action_head import FlowmatchingActionHeadConfig
+config = Starforce_S1_Config()
+config.backbone_cfg = {
+    "tune_llm": False,
+    # "vllm_base_model_path": "Qwen/Qwen2.5-VL-7B-Instruct",
+    "vllm_base_model_path": "/pfs/pfs-ahGxdf/data/wujingyi/huggingface/Qwen2.5-VL-3B-Instruct",
+    "select_layer": 12,
+    "feature_dim": 2048,
+    "project_to_dim": 2048,
+}
+config.action_horizon = 16
+config.action_dim = 32
+config.action_head_cfg = {
+    "action_dim": 32,
+    "action_horizon": 16,
+    "add_pos_embed": True,
+    "backbone_embedding_dim": 2048,
+    "diffusion_model_cfg": {
+        "attention_head_dim": 48,
+        "cross_attention_dim": 2048,
+        "dropout": 0.2,
+        "final_dropout": True,
+        "interleave_self_attention": True,
+        "norm_type": "ada_norm",
+        "num_attention_heads": 32,
+        "num_layers": 16,
+        "output_dim": 1024,
+        "positional_embeddings": None,
+    },
+    "hidden_size": 1024,
+    "input_embedding_dim": 1536,
+    "max_action_dim": 32,
+    "max_state_dim": 64,
+    "model_dtype": "float32",
+    "noise_beta_alpha": 1.5,
+    "noise_beta_beta": 1.0,
+    "noise_s": 0.999,
+    "num_inference_timesteps": 4,
+    "num_target_vision_tokens": 32,
+    "num_timestep_buckets": 1000,
+    "tune_diffusion_model": True,
+    "tune_projector": True,
+    "use_vlln": True,
+    "vl_self_attention_cfg": {
+        "attention_head_dim": 64,
+        "dropout": 0.2,
+        "final_dropout": True,
+        "num_attention_heads": 32,
+        "num_layers": 4,
+        "positional_embeddings": None,
+    },
+}
+model = Starforce_S1(config=config, local_model_path=None)
+# action_head_state_dict = torch.load("checkpoints/GR00T-N1.5-3B-action-expert.pth")
+action_head_state_dict = torch.load("checkpoints/qz-action-expert.pth")
+model.action_head.load_state_dict(action_head_state_dict)
+model.save_pretrained("checkpoints/Starforce-S1-3B")
+print(model.)
+print("done!")

tests/save_s1_7B.py ADDED Viewed

	@@ -0,0 +1,69 @@

+"""
+Saving a s1 pretrained model for training
+"""
+import torch
+from starforce.model.starforce_s1 import Starforce_S1, Starforce_S1_Config
+from starforce.model.action_head.flow_matching_action_head import FlowmatchingActionHeadConfig
+config = Starforce_S1_Config()
+config.backbone_cfg = {
+    "tune_llm": False,
+    # "vllm_base_model_path": "Qwen/Qwen2.5-VL-7B-Instruct",
+    "vllm_base_model_path": "/pfs/pfs-ahGxdf/data/wujingyi/huggingface/Qwen2.5-VL-7B-Instruct",
+    "select_layer": 12,
+    "feature_dim": 3584,
+    "project_to_dim": 2048,
+}
+config.action_horizon = 16
+config.action_dim = 32
+config.action_head_cfg = {
+    "action_dim": 32,
+    "action_horizon": 16,
+    "add_pos_embed": True,
+    "backbone_embedding_dim": 2048,
+    "diffusion_model_cfg": {
+        "attention_head_dim": 48,
+        "cross_attention_dim": 2048,
+        "dropout": 0.2,
+        "final_dropout": True,
+        "interleave_self_attention": True,
+        "norm_type": "ada_norm",
+        "num_attention_heads": 32,
+        "num_layers": 16,
+        "output_dim": 1024,
+        "positional_embeddings": None,
+    },
+    "hidden_size": 1024,
+    "input_embedding_dim": 1536,
+    "max_action_dim": 32,
+    "max_state_dim": 64,
+    "model_dtype": "float32",
+    "noise_beta_alpha": 1.5,
+    "noise_beta_beta": 1.0,
+    "noise_s": 0.999,
+    "num_inference_timesteps": 4,
+    "num_target_vision_tokens": 32,
+    "num_timestep_buckets": 1000,
+    "tune_diffusion_model": True,
+    "tune_projector": True,
+    "use_vlln": True,
+    "vl_self_attention_cfg": {
+        "attention_head_dim": 64,
+        "dropout": 0.2,
+        "final_dropout": True,
+        "num_attention_heads": 32,
+        "num_layers": 4,
+        "positional_embeddings": None,
+    },
+}
+model = Starforce_S1(config=config, local_model_path=None)
+# action_head_state_dict = torch.load("checkpoints/GR00T-N1.5-3B-action-expert.pth")
+action_head_state_dict = torch.load("checkpoints/qz-action-expert.pth")
+model.action_head.load_state_dict(action_head_state_dict)
+model.save_pretrained("checkpoints/Starforce-S1-7B")
+print("done!")

tests/test_cv.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import cv2
+import sys
+video_path = "data/sl/0723pre_data_v1/videos/chunk-000/observation.images.cam_high/episode_000045.mp4"  # Replace with your actual video path
+video_path = 'data/test_aloha_singlearm/videos/chunk-000/observation.images.cam_high/episode_000000.mp4'
+print("OpenCV build info:")
+print(cv2.getBuildInformation())
+cap = cv2.VideoCapture(video_path)
+if not cap.isOpened():
+    print(f"Error: Failed to open video file: {video_path}")
+    sys.exit(1)
+ret, frame = cap.read()
+if not ret:
+    print("Error: Unable to read the first frame of the video.")
+    cap.release()
+    sys.exit(1)
+print("Success: Video file opened and first frame read successfully.")
+cap.release()

tests/test_hf.py ADDED Viewed

	@@ -0,0 +1,19 @@

+import os
+os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
+from transformers import AutoConfig
+from transformers.models.qwen2_5_vl import Qwen2_5_VLConfig
+from transformers import AutoModelForCausalLM, AutoModel
+from transformers.models.qwen2_5_vl import Qwen2_5_VLForConditionalGeneration
+import torch
+vllm_base_model_path = "Qwen/Qwen2.5-VL-3B-Instruct"
+config = Qwen2_5_VLConfig.from_pretrained(vllm_base_model_path, trust_remote_code=True)
+vllm_model = Qwen2_5_VLForConditionalGeneration(
+    config=config,
+)
+print(vllm_model)

tests/test_pi0.py ADDED Viewed

	@@ -0,0 +1,58 @@

+from starforce.datasets.lerobot_dataset import LeRobotDataset
+import torch
+from starforce.models.pretrained import PreTrainedConfig
+from starforce.models.build_model import make_policy
+from loguru import logger
+import time
+def main():
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    dtype = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float32
+    # paligemma doesn't support bf16?
+    # dtype = torch.float32
+    logger.info(f"##info, device: {device}, dtype: {dtype}")
+    # dataset_repo_id = "danaaubakirova/koch_test"
+    # dataset_repo_id = "data/robotwin2lerobot/block_hammer_beat"
+    dataset_repo_id = "/pfs/data/xiongxiao/lerobot_fps30/open_laptop"
+    # ckpt_torch_dir = "/pfs/data/fgang/vla_holo/checkpoints/pi0"
+    # ckpt_torch_dir = "/pfs/data/fgang/outputs_models/pi0-1-20000/pretrained_model"
+    # ckpt_torch_dir = "/pfs/data/fgang/outputs_models/pi0-robotwin-30fps-tasks3"
+    # ckpt_torch_dir = "/pfs/data/fgang/vla_holo/outputs/pi0-fixed-20ksteps"
+    ckpt_torch_dir = "/pfs/data/fgang/outputs_models/pi0-robotwin-30fps-tasks5"
+    dataset = LeRobotDataset(dataset_repo_id, episodes=[0])
+    dataloader = torch.utils.data.DataLoader(
+        dataset,
+        num_workers=0,
+        batch_size=1,
+    )
+    batch = next(iter(dataloader))
+    # To device
+    for k in batch:
+        if isinstance(batch[k], torch.Tensor):
+            batch[k] = batch[k].to(device=device, dtype=dtype)
+    print(f'dataset.meta: {dataset.meta}')
+    cfg = PreTrainedConfig.from_pretrained(ckpt_torch_dir, device=device)
+    cfg.pretrained_path = ckpt_torch_dir
+    policy = make_policy(cfg, ds_meta=dataset.meta)
+    # policy.to(dtype)
+    # print(policy)
+    t0 = time.time()
+    with torch.amp.autocast(device_type=device):
+        benchmark_iters = 30
+        for _ in range(benchmark_iters):
+            # print(batch)
+            t00 = time.time()
+            action = policy.select_action(batch, n_steps_out=50)
+            torch.cuda.synchronize()
+            # print("##info, action:", action.shape, action.dtype, action.device, action, time.time() - t00)
+    t1 = time.time()
+    print(f'cost: {t1-t0:.3f}, avg: {(t1-t0)/benchmark_iters}')
+if __name__ == "__main__":
+    main()

tests/test_starhelm.py ADDED Viewed

File without changes

tests/test_tensor.py ADDED Viewed

	@@ -0,0 +1,21 @@

+'''
+write a code read model weights from
+checkpoints/models/pi0-sl-b1/model.safetensors
+print all key and values
+'''
+'''
+write a code read model weights from
+checkpoints/models/pi0-sl-b1/model.safetensors
+print all key and values
+'''
+from safetensors.torch import load_file
+# Load the weights from the safetensors file
+weights = load_file("checkpoints/models/pi0-sl-b1/model.safetensors")
+# Print all keys and their corresponding values' shapes
+for key, value in weights.items():
+    if 'normalize' in key:
+        print(f"Key: {key}, Shape: {value.shape}, Type: {value.dtype} {value}")

tests/vis_lerobot_data.py ADDED Viewed

	@@ -0,0 +1,200 @@

+import numpy as np
+import matplotlib.pyplot as plt
+from vlaholo.datasets.lerobot_dataset import LeRobotDataset
+import os
+import cv2
+from matplotlib.animation import FuncAnimation
+"""
+TODO:
+support datasets == 4.0
+"""
+def plot_episode_joint_states(dataset_path: str, episode_index: int):
+    dataset = LeRobotDataset(dataset_path)
+    if episode_index >= dataset.num_episodes:
+        print(
+            f"episode index {episode_index} is out of range, total episodes: {dataset.num_episodes}"
+        )
+        episode_index = dataset.num_episodes - 1
+        print(f"force set to max episode index: {episode_index}")
+    hf_dataset = dataset.hf_dataset
+    episode_ds = hf_dataset.filter(lambda x: x["episode_index"] == episode_index)
+    video_paths = dataset.encode_episode_videos(episode_index=episode_index)
+    caps = {}
+    for key, path in video_paths.items():
+        cap = cv2.VideoCapture(path)
+        if not cap.isOpened():
+            raise ValueError(f"Could not open video: {path}")
+        caps[key] = cap
+    fps = caps[next(iter(caps))].get(cv2.CAP_PROP_FPS)
+    total_frames = int(caps[next(iter(caps))].get(cv2.CAP_PROP_FRAME_COUNT))
+    df = episode_ds.to_pandas()
+    joint_states = np.vstack(df["observation.state"].values)
+    timestamps = df["timestamp"].values
+    duration_sec = timestamps[-1] - timestamps[0]
+    joint_names = dataset.features["observation.state"]["names"]
+    if isinstance(joint_names, list) and len(joint_names) == 1 and isinstance(joint_names[0], list):
+        joint_names = joint_names[0]
+    if len(joint_names) <= 1:
+        joint_names = [f"Joint {i}" for i in range(joint_states.shape[1])]
+    n_joints = joint_states.shape[1]
+    n_joint_rows = (n_joints + 2) // 3
+    # 创建 Figure 并设置对称左右边距及顶部空间
+    fig = plt.figure(figsize=(18, 4 + 4 * n_joint_rows))
+    fig.subplots_adjust(top=0.92, bottom=0.05, left=0.05, right=0.95, hspace=0.4, wspace=0.3)
+    # 一级标题：字体更大（48号）
+    fig.suptitle(
+        "Starforce Data Inspect System", x=0.5, y=0.98, fontsize=35, fontweight="bold", ha="center"
+    )
+    # 二级统计信息：fps、总帧数、episode index、轨迹时长，16号字体
+    stats_text = (
+        f"FPS: {fps:.2f}    Total frames: {total_frames}    "
+        f"Episode: {episode_index}    Duration: {duration_sec:.2f}s"
+    )
+    fig.text(0.5, 0.92, stats_text, ha="center", fontsize=21, fontweight="bold")
+    plt.rcParams.update(
+        {
+            "font.family": "sans-serif",
+            "font.sans-serif": ["Arial", "DejaVu Sans"],
+            "font.size": 12,
+            "axes.titlesize": 14,
+            "axes.labelsize": 13,
+            "axes.spines.top": False,
+            "axes.spines.right": False,
+        }
+    )
+    # 使用均等宽度的 GridSpec
+    gs = fig.add_gridspec(
+        n_joint_rows + 1, 3, width_ratios=[1, 1, 1], height_ratios=[2] + [1] * n_joint_rows
+    )
+    # 渲染视频区域
+    video_axes, video_imgs = {}, {}
+    for idx, key in enumerate(video_paths.keys()):
+        ax = fig.add_subplot(gs[0, idx])
+        ax.set_xticks([])
+        ax.set_yticks([])
+        ax.set_title(key)
+        img = ax.imshow(np.zeros((480, 640, 3)), aspect="auto")
+        ax.set_box_aspect(480 / 640)
+        video_axes[key] = ax
+        video_imgs[key] = img
+    # 绘制轨迹
+    joint_axes, lines, time_lines = [], [], []
+    base_colors = [
+        "#1f77b4",
+        "#ff7f0e",
+        "#2ca02c",
+        "#d62728",
+        "#9467bd",
+        "#8c564b",
+        "#e377c2",
+        "#7f7f7f",
+        "#bcbd22",
+        "#17becf",
+    ]
+    colors = (base_colors * ((n_joints // len(base_colors)) + 1))[:n_joints]
+    for i in range(n_joints):
+        row, col = 1 + i // 3, i % 3
+        ax = fig.add_subplot(gs[row, col])
+        # 渐变背景
+        gradient = np.linspace(0, 1, 256).reshape(256, 1)
+        extent = [timestamps[0], timestamps[-1], joint_states[:, i].min(), joint_states[:, i].max()]
+        ax.imshow(
+            np.repeat(gradient, 256, axis=1),
+            aspect="auto",
+            cmap="Blues",
+            alpha=0.1,
+            extent=extent,
+            origin="lower",
+            zorder=0,
+        )
+        # 轨迹线
+        (line,) = ax.plot([], [], label=joint_names[i], color=colors[i], linewidth=2.5, zorder=1)
+        lines.append(line)
+        # 轴设置
+        ax.set_xlabel("Time (s)")
+        ax.set_ylabel("pos")
+        ax.spines["left"].set_visible(False)
+        ax.set_title(joint_names[i], fontweight="bold")
+        ax.set_xlim(timestamps[0], timestamps[-1])
+        y0, y1 = joint_states[:, i].min(), joint_states[:, i].max()
+        m = (y1 - y0) * 0.1
+        ax.set_ylim(y0 - m, y1 + m)
+        tl = ax.axvline(x=timestamps[0], color="crimson", alpha=0.7, linewidth=1.2, zorder=2)
+        time_lines.append(tl)
+        joint_axes.append(ax)
+    # 初始化与动画函数
+    def init():
+        for ln in lines:
+            ln.set_data([], [])
+        return lines + time_lines + list(video_imgs.values())
+    def animate(frame_idx):
+        idx = min(frame_idx, len(timestamps) - 1)
+        t = timestamps[idx]
+        print(
+            f"\rProcessing frames: {frame_idx + 1}/{total_frames} ({(frame_idx+1)/total_frames*100:.1f}%)",
+            end="",
+            flush=True,
+        )
+        for key, cap in caps.items():
+            ret, frame = cap.read()
+            if ret:
+                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                video_imgs[key].set_array(frame)
+        for j, ln in enumerate(lines):
+            ln.set_data(timestamps[: idx + 1], joint_states[: idx + 1, j])
+        for tl in time_lines:
+            tl.set_xdata([t, t])
+        return lines + time_lines + list(video_imgs.values())
+    anim = FuncAnimation(
+        fig, animate, init_func=init, frames=total_frames, interval=1000 / fps, blit=True
+    )
+    print()
+    save_dir = "outputs/"
+    os.makedirs(save_dir, exist_ok=True)
+    out_path = os.path.join(save_dir, f"episode_{episode_index}_animation.mp4")
+    anim.save(out_path, writer="ffmpeg", fps=fps)
+    plt.close()
+    for cap in caps.values():
+        cap.release()
+    print(f"Animation saved to: {out_path}")
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(
+        description="Visualize joint states of a LeRobot dataset episode"
+    )
+    parser.add_argument("dataset_path", type=str, help="Path or HF repo ID of the LeRobot dataset")
+    parser.add_argument("-i", type=int, default=89, help="Episode index to visualize")
+    args = parser.parse_args()
+    plot_episode_joint_states(args.dataset_path, args.i)

tests/vis_lerobot_data_v1.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import numpy as np
+import matplotlib.pyplot as plt
+from vlaholo.datasets.lerobot_dataset import LeRobotDataset
+import os
+import cv2
+from matplotlib.animation import FuncAnimation
+from vlaholo.utils.dataset_utils import DEFAULT_VIDEO_PATH
+def plot_episode_joint_states(dataset_path: str, episode_index: int):
+    dataset = LeRobotDataset(dataset_path)
+    if episode_index > dataset.num_episodes:
+        print(
+            f"episode index {episode_index} is out of range, total episodes: {dataset.num_episodes}"
+        )
+        episode_index = dataset.num_episodes - 1
+        print(f"force set to max episode index: {episode_index}")
+    hf_dataset = dataset.hf_dataset
+    episode_ds = hf_dataset.filter(lambda x: x["episode_index"] == episode_index)
+    video_paths = dataset.encode_episode_videos(episode_index=episode_index)
+    caps = {}
+    for key, path in video_paths.items():
+        cap = cv2.VideoCapture(path)
+        if not cap.isOpened():
+            raise ValueError(f"Could not open video: {path}")
+        caps[key] = cap
+    fps = caps[list(caps.keys())[0]].get(cv2.CAP_PROP_FPS)
+    total_frames = int(caps[list(caps.keys())[0]].get(cv2.CAP_PROP_FRAME_COUNT))
+    df = episode_ds.to_pandas()
+    joint_states = np.vstack(df["observation.state"].values)
+    timestamps = df["timestamp"].values
+    fig = plt.figure(figsize=(15, 10))
+    gs = fig.add_gridspec(3, 2)
+    video_names_map = {
+        "observation.images.cam_high": "High Camera",
+        "observation.images.cam_left_wrist": "Left Wrist Camera",
+        "observation.images.cam_right_wrist": "Right Wrist Camera",
+    }
+    video_axes = {
+        "observation.images.cam_high": fig.add_subplot(gs[0, 0]),
+        "observation.images.cam_left_wrist": fig.add_subplot(gs[0, 1]),
+        "observation.images.cam_right_wrist": fig.add_subplot(gs[1, :]),
+    }
+    joint_ax = fig.add_subplot(gs[2, :])
+    video_imgs = {}
+    for key, ax in video_axes.items():
+        ax.set_xticks([])
+        ax.set_yticks([])
+        ax.set_title(video_names_map[key])
+        img = ax.imshow(np.zeros((480, 640, 3)))
+        video_imgs[key] = img
+    n_joints = joint_states.shape[1]
+    lines = []
+    for i in range(n_joints):
+        (line,) = joint_ax.plot([], [], label=f"Joint {i}")
+        lines.append(line)
+    joint_ax.set_xlim(timestamps[0], timestamps[-1])
+    joint_ax.set_ylim(joint_states.min(), joint_states.max())
+    joint_ax.grid(True)
+    joint_ax.legend(loc="upper right")
+    joint_ax.set_xlabel("Time (s)")
+    time_line = joint_ax.axvline(x=timestamps[0], color="r")
+    def init():
+        for line in lines:
+            line.set_data([], [])
+        return lines + [time_line] + list(video_imgs.values())
+    def animate(frame_idx):
+        data_idx = min(frame_idx, len(timestamps) - 1)
+        current_time = timestamps[data_idx]
+        for key, cap in caps.items():
+            ret, frame = cap.read()
+            if ret:
+                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                video_imgs[key].set_array(frame)
+        for i, line in enumerate(lines):
+            line.set_data(timestamps[: data_idx + 1], joint_states[: data_idx + 1, i])
+        time_line.set_xdata([current_time, current_time])
+        return lines + [time_line] + list(video_imgs.values())
+    anim = FuncAnimation(
+        fig, animate, init_func=init, frames=total_frames, interval=1000 / fps, blit=True
+    )
+    save_dir = "outputs/"
+    os.makedirs(save_dir, exist_ok=True)
+    output_path = os.path.join(save_dir, f"episode_{episode_index}_animation.mp4")
+    anim.save(output_path, writer="ffmpeg", fps=fps)
+    plt.close()
+    for cap in caps.values():
+        cap.release()
+    print(f"Animation saved to: {output_path}")
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(
+        description="Visualize joint states of a LeRobot dataset episode"
+    )
+    parser.add_argument("dataset_path", type=str, help="Path or HF repo ID of the LeRobot dataset")
+    parser.add_argument("-i", type=int, default=89, help="Episode index to visualize")
+    args = parser.parse_args()
+    plot_episode_joint_states(args.dataset_path, args.i)

wandb/debug.log ADDED Viewed

	@@ -0,0 +1,30 @@

+2025-12-20 21:53:42,814 INFO    MainThread:3365891 [wandb_setup.py:_flush():77] Current SDK version is 0.18.0
+2025-12-20 21:53:42,814 INFO    MainThread:3365891 [wandb_setup.py:_flush():77] Configure stats pid to 3365891
+2025-12-20 21:53:42,815 INFO    MainThread:3365891 [wandb_setup.py:_flush():77] Loading settings from /home/lumos6/.config/wandb/settings
+2025-12-20 21:53:42,815 INFO    MainThread:3365891 [wandb_setup.py:_flush():77] Loading settings from /home/lumos6/work/starforce2/wandb/settings
+2025-12-20 21:53:42,815 INFO    MainThread:3365891 [wandb_setup.py:_flush():77] Loading settings from environment variables: {}
+2025-12-20 21:53:42,815 INFO    MainThread:3365891 [wandb_setup.py:_flush():77] Applying setup settings: {'_disable_service': False}
+2025-12-20 21:53:42,815 INFO    MainThread:3365891 [wandb_setup.py:_flush():77] Inferring run settings from compute environment: {'program_relpath': 'finetune.py', 'program_abspath': '/home/lumos6/work/starforce2/finetune.py', 'program': '/home/lumos6/work/starforce2/finetune.py'}
+2025-12-20 21:53:42,815 INFO    MainThread:3365891 [wandb_setup.py:_flush():77] Applying login settings: {}
+2025-12-20 21:53:42,815 INFO    MainThread:3365891 [wandb_setup.py:_flush():77] Applying login settings: {'mode': 'offline'}
+2025-12-20 21:53:42,815 INFO    MainThread:3365891 [wandb_init.py:_log_setup():525] Logging user logs to /home/lumos6/work/starforce2/wandb/offline-run-20251220_215342-tsf926l3/logs/debug.log
+2025-12-20 21:53:42,815 INFO    MainThread:3365891 [wandb_init.py:_log_setup():526] Logging internal logs to /home/lumos6/work/starforce2/wandb/offline-run-20251220_215342-tsf926l3/logs/debug-internal.log
+2025-12-20 21:53:42,815 INFO    MainThread:3365891 [wandb_init.py:init():609] calling init triggers
+2025-12-20 21:53:42,815 INFO    MainThread:3365891 [wandb_init.py:init():616] wandb.init called with sweep_config: {}
+config: {}
+2025-12-20 21:53:42,815 INFO    MainThread:3365891 [wandb_init.py:init():659] starting backend
+2025-12-20 21:53:42,815 INFO    MainThread:3365891 [wandb_init.py:init():663] setting up manager
+2025-12-20 21:53:42,817 INFO    MainThread:3365891 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-12-20 21:53:42,817 INFO    MainThread:3365891 [wandb_init.py:init():671] backend started and connected
+2025-12-20 21:53:42,818 INFO    MainThread:3365891 [wandb_init.py:init():766] updated telemetry
+2025-12-20 21:53:42,823 INFO    MainThread:3365891 [wandb_init.py:init():799] communicating run to backend with 90.0 second timeout
+2025-12-20 21:53:42,835 INFO    MainThread:3365891 [wandb_init.py:init():850] starting run threads in backend
+2025-12-20 21:53:42,973 INFO    MainThread:3365891 [wandb_run.py:_console_start():2466] atexit reg
+2025-12-20 21:53:42,973 INFO    MainThread:3365891 [wandb_run.py:_redirect():2312] redirect: wrap_raw
+2025-12-20 21:53:42,973 INFO    MainThread:3365891 [wandb_run.py:_redirect():2377] Wrapping output streams.
+2025-12-20 21:53:42,973 INFO    MainThread:3365891 [wandb_run.py:_redirect():2402] Redirects installed.
+2025-12-20 21:53:42,974 INFO    MainThread:3365891 [wandb_init.py:init():893] run started, returning control to user process
+2025-12-20 21:53:42,974 INFO    MainThread:3365891 [wandb_run.py:_config_callback():1393] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['GR00T_N1_5'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': None, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'outputs/gr00t-3b-piper-task-pickup-bs8-1gpu-step60k/final_model', 'transformers_version': '4.52.2', 'action_dim': 32, 'action_head_cfg': {'action_dim': 32, 'action_horizon': 16, 'add_pos_embed': True, 'backbone_embedding_dim': 2048, 'diffusion_model_cfg': {'attention_head_dim': 48, 'cross_attention_dim': 2048, 'dropout': 0.2, 'final_dropout': True, 'interleave_self_attention': True, 'norm_type': 'ada_norm', 'num_attention_heads': 32, 'num_layers': 16, 'output_dim': 1024, 'positional_embeddings': None}, 'hidden_size': 1024, 'input_embedding_dim': 1536, 'max_action_dim': 32, 'max_state_dim': 64, 'model_dtype': 'float32', 'noise_beta_alpha': 1.5, 'noise_beta_beta': 1.0, 'noise_s': 0.999, 'num_inference_timesteps': 4, 'num_target_vision_tokens': 32, 'num_timestep_buckets': 1000, 'tune_diffusion_model': True, 'tune_projector': True, 'use_vlln': True, 'vl_self_attention_cfg': {'attention_head_dim': 64, 'dropout': 0.2, 'final_dropout': True, 'num_attention_heads': 32, 'num_layers': 4, 'positional_embeddings': None}}, 'action_horizon': 16, 'backbone_cfg': {'eagle_path': 'NVEagle/eagle_er-qwen3_1_7B-Siglip2_400M_stage1_5_128gpu_er_v7_1mlp_nops', 'load_bf16': False, 'project_to_dim': None, 'reproject_vision': False, 'select_layer': 12, 'tune_llm': False, 'tune_visual': True, 'use_flash_attention': True}, 'compute_dtype': 'bfloat16', 'hidden_size': 2048, 'model_dtype': 'float32', 'model_type': 'gr00t_n1_5', 'attn_implementation': None, 'output_dir': 'outputs/gr00t-3b-piper-task-pickup02-bs8-1gpu-step60k', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'eval_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 2.5e-05, 'weight_decay': 1e-05, 'adam_beta1': 0.95, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 300, 'max_steps': 60000, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.05, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'outputs/gr00t-3b-piper-task-pickup02-bs8-1gpu-step60k/runs/Dec20_21-53-38_lumos6', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 5000, 'save_total_limit': 3, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': True, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 8, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'outputs/gr00t-3b-piper-task-pickup02-bs8-1gpu-step60k', 'disable_tqdm': False, 'remove_unused_columns': False, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': '', 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard'], 'ddp_find_unused_parameters': False, 'ddp_bucket_cap_mb': 100, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': False, 'dataloader_persistent_workers': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False}
+2025-12-20 21:53:42,976 INFO    MainThread:3365891 [wandb_config.py:__setitem__():154] config set model/num_parameters = 2724163520 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x75c8c8147460>>
+2025-12-20 21:53:42,976 INFO    MainThread:3365891 [wandb_run.py:_config_callback():1393] config_cb model/num_parameters 2724163520 None
+2025-12-21 08:08:04,099 WARNING MsgRouterThr:3365891 [router.py:message_loop():77] message_loop has been closed