Upload folder using huggingface_hub
Browse files- README.md +19 -9
- assets/SO-ARM100/Simulation/SO101/scene_with_cube.xml +6 -3
- assets/SO-ARM100/Simulation/SO101/so101_new_calib.xml +4 -6
- assets/media/output.gif +2 -2
- env.py +4 -128
- src/__init__.py +0 -0
- src/base_so101_env.py +129 -0
- src/scripts/00_run_demo.py +45 -0
README.md
CHANGED
|
@@ -1,22 +1,23 @@
|
|
| 1 |
-
# Basic
|
| 2 |
|
| 3 |
-
|
|
|
|
|
|
|
| 4 |
|
| 5 |
-
Implements a
|
| 6 |
|
| 7 |
-
- Observation is a `np.ndarray.shape = 640, 480`.
|
| 8 |
- Action is a `np.ndarray.shape = 6` where each element represents the joint control.
|
| 9 |
- Reward is the euclidian distance between the gripper and the red block, which it needs to minimize.
|
| 10 |
|
| 11 |
|
| 12 |
-
|
| 13 |
## Basic usage
|
| 14 |
|
| 15 |
```python
|
| 16 |
SO101Env(
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
env = gym.make(
|
| 21 |
"base-sO101-env-v0",
|
| 22 |
)
|
|
@@ -32,7 +33,15 @@ finally:
|
|
| 32 |
|
| 33 |
```
|
| 34 |
|
| 35 |
-
##
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
All robot files are from [SO-ARM100](https://github.com/TheRobotStudio/SO-ARM100)
|
| 37 |
```bib
|
| 38 |
@software{Knight_Standard_Open_SO-100,
|
|
@@ -41,6 +50,7 @@ All robot files are from [SO-ARM100](https://github.com/TheRobotStudio/SO-ARM100
|
|
| 41 |
url = {https://github.com/TheRobotStudio/SO-ARM100}
|
| 42 |
}
|
| 43 |
```
|
|
|
|
| 44 |
|
| 45 |
[MuJoCo library used](https://github.com/google-deepmind/mujoco)
|
| 46 |
```bib
|
|
|
|
| 1 |
+
# <div style="text-align:center; border-radius:30px 30px; padding:7px; color:white; margin:0; font-size:150%; font-family:Arial; background-color:#636363; overflow:hidden"><b> Basic so-101 simulation environment</b></div>
|
| 2 |
|
| 3 |
+
<p align="center">
|
| 4 |
+
<img src="assets/media/output.gif" alt="Basic policy" width="75%"/>
|
| 5 |
+
</p>
|
| 6 |
|
| 7 |
+
Implements a so-101 robotic arm simulaltion environment for the [EnvHub](https://huggingface.co/docs/lerobot/envhub).
|
| 8 |
|
| 9 |
+
- Observation is a `np.ndarray.shape = (640, 480, 3)`.
|
| 10 |
- Action is a `np.ndarray.shape = 6` where each element represents the joint control.
|
| 11 |
- Reward is the euclidian distance between the gripper and the red block, which it needs to minimize.
|
| 12 |
|
| 13 |
|
|
|
|
| 14 |
## Basic usage
|
| 15 |
|
| 16 |
```python
|
| 17 |
SO101Env(
|
| 18 |
+
xml_pth=Path("assets/SO-ARM100/Simulation/SO101/scene_with_cube.xml"),
|
| 19 |
+
obs_w=640,
|
| 20 |
+
obs_h=480)
|
| 21 |
env = gym.make(
|
| 22 |
"base-sO101-env-v0",
|
| 23 |
)
|
|
|
|
| 33 |
|
| 34 |
```
|
| 35 |
|
| 36 |
+
## ToDo
|
| 37 |
+
Things I want to do
|
| 38 |
+
|
| 39 |
+
- Make a hardcoded policy that can pick up the block.
|
| 40 |
+
- First attempt of training a model.
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
# <div style="text-align:center; border-radius:30px 30px; padding:7px; color:white; margin:0; font-size:150%; font-family:Arial; background-color:#636363; overflow:hidden"><b> References</b></div>
|
| 44 |
+
## Assets
|
| 45 |
All robot files are from [SO-ARM100](https://github.com/TheRobotStudio/SO-ARM100)
|
| 46 |
```bib
|
| 47 |
@software{Knight_Standard_Open_SO-100,
|
|
|
|
| 50 |
url = {https://github.com/TheRobotStudio/SO-ARM100}
|
| 51 |
}
|
| 52 |
```
|
| 53 |
+
## MuJoCo
|
| 54 |
|
| 55 |
[MuJoCo library used](https://github.com/google-deepmind/mujoco)
|
| 56 |
```bib
|
assets/SO-ARM100/Simulation/SO101/scene_with_cube.xml
CHANGED
|
@@ -18,10 +18,13 @@
|
|
| 18 |
</asset>
|
| 19 |
<worldbody>
|
| 20 |
<light pos="0 0 3.5" dir="0 0 -1" directional="true" />
|
| 21 |
-
<geom name="floor" size="0 0 0.05" pos="0 0 0" type="plane" material="groundplane" />
|
| 22 |
<!-- Add a cube -->
|
| 23 |
-
<body name="cube" pos="0.25 0.01 0.
|
| 24 |
-
<
|
|
|
|
|
|
|
|
|
|
| 25 |
</body>
|
| 26 |
</worldbody>
|
| 27 |
</mujoco>
|
|
|
|
| 18 |
</asset>
|
| 19 |
<worldbody>
|
| 20 |
<light pos="0 0 3.5" dir="0 0 -1" directional="true" />
|
| 21 |
+
<geom name="floor" size="0 0 0.05" pos="0 0 0" type="plane" material="groundplane" friction="0.0 0.0 0.0"/>
|
| 22 |
<!-- Add a cube -->
|
| 23 |
+
<body name="cube" pos="0.25 0.01 0.015">
|
| 24 |
+
<joint name="cube_joint" type="free" frictionloss="0.01" />
|
| 25 |
+
<!-- <inertial pos="0 0 0" mass="0.025" fullinertia="6.61427e-06 1.89032e-06 5.28738e-06 -3.19807e-07 -5.90717e-09 -1.09945e-07"/> -->
|
| 26 |
+
<inertial pos="0 0 0" mass="0.0125" diaginertia="1e-5 1e-5 1e-5" />
|
| 27 |
+
<geom name="cube_geom" type="box" condim="4" solimp="2 1 0.01" solref="0.01 1" friction="5.0 4 0.01" size="0.05 0.01 0.015" material="cube_material" mass="0.0125"/>
|
| 28 |
</body>
|
| 29 |
</worldbody>
|
| 30 |
</mujoco>
|
assets/SO-ARM100/Simulation/SO101/so101_new_calib.xml
CHANGED
|
@@ -20,11 +20,9 @@
|
|
| 20 |
<default class="sts3215">
|
| 21 |
<geom contype="0" conaffinity="0"/>
|
| 22 |
<joint damping="0.60" frictionloss="0.052" armature="0.028"/>
|
| 23 |
-
<!-- For lerobot this are not exactly the motor params as the Kp and Kd not map 1-to-1 thus motor idendification with lerobot Kp=16 and Kd=32 should actually be done -->
|
| 24 |
<position kp="17.8"/>
|
| 25 |
</default>
|
| 26 |
<default class="backlash">
|
| 27 |
-
<!-- +/- 0.5° of backlash -->
|
| 28 |
<joint damping="0.01" frictionloss="0" armature="0.01" limited="true" range="-0.008726646259971648 0.008726646259971648"/>
|
| 29 |
</default>
|
| 30 |
</default>
|
|
@@ -102,8 +100,8 @@
|
|
| 102 |
<geom type="mesh" class="visual" pos="0.0077 0.0001 -0.0234" quat="0.707107 -0.707107 1.66015e-15 6.45094e-15" mesh="sts3215_03a_v1" material="sts3215_03a_v1_material"/>
|
| 103 |
<geom type="mesh" class="collision" pos="0.0077 0.0001 -0.0234" quat="0.707107 -0.707107 1.66015e-15 6.45094e-15" mesh="sts3215_03a_v1" material="sts3215_03a_v1_material"/>
|
| 104 |
<!-- Part wrist_roll_follower_so101_v1 -->
|
| 105 |
-
<geom type="mesh" class="visual" pos="8.32667e-17 -0.000218214 0.000949706" quat="0 1 0 0" mesh="wrist_roll_follower_so101_v1" material="wrist_roll_follower_so101_v1_material"/>
|
| 106 |
-
<geom type="mesh" class="collision" pos="8.32667e-17 -0.000218214 0.000949706" quat="0 1 0 0" mesh="wrist_roll_follower_so101_v1" material="wrist_roll_follower_so101_v1_material"/>
|
| 107 |
<!-- Frame gripperframe -->
|
| 108 |
<site group="3" name="gripperframe" pos="-0.0079 -0.000218121 -0.0981274" quat="0.707107 -0 0.707107 -2.37788e-17"/>
|
| 109 |
<!-- Link moving_jaw_so101_v1 -->
|
|
@@ -112,8 +110,8 @@
|
|
| 112 |
<joint axis="0 0 1" name="gripper" type="hinge" range="-0.17453297762778586 1.7453291995659765" class="sts3215"/>
|
| 113 |
<inertial pos="-0.00157495 -0.0300244 0.0192755" mass="0.012" fullinertia="6.61427e-06 1.89032e-06 5.28738e-06 -3.19807e-07 -5.90717e-09 -1.09945e-07"/>
|
| 114 |
<!-- Part moving_jaw_so101_v1 -->
|
| 115 |
-
<geom type="mesh" class="visual" pos="-5.55112e-17 -5.55112e-17 0.0189" quat="1 -0 3.00524e-16 -2.00834e-17" mesh="moving_jaw_so101_v1" material="moving_jaw_so101_v1_material"/>
|
| 116 |
-
<geom type="mesh" class="collision" pos="-5.55112e-17 -5.55112e-17 0.0189" quat="1 -0 3.00524e-16 -2.00834e-17" mesh="moving_jaw_so101_v1" material="moving_jaw_so101_v1_material"/>
|
| 117 |
</body>
|
| 118 |
</body>
|
| 119 |
</body>
|
|
|
|
| 20 |
<default class="sts3215">
|
| 21 |
<geom contype="0" conaffinity="0"/>
|
| 22 |
<joint damping="0.60" frictionloss="0.052" armature="0.028"/>
|
|
|
|
| 23 |
<position kp="17.8"/>
|
| 24 |
</default>
|
| 25 |
<default class="backlash">
|
|
|
|
| 26 |
<joint damping="0.01" frictionloss="0" armature="0.01" limited="true" range="-0.008726646259971648 0.008726646259971648"/>
|
| 27 |
</default>
|
| 28 |
</default>
|
|
|
|
| 100 |
<geom type="mesh" class="visual" pos="0.0077 0.0001 -0.0234" quat="0.707107 -0.707107 1.66015e-15 6.45094e-15" mesh="sts3215_03a_v1" material="sts3215_03a_v1_material"/>
|
| 101 |
<geom type="mesh" class="collision" pos="0.0077 0.0001 -0.0234" quat="0.707107 -0.707107 1.66015e-15 6.45094e-15" mesh="sts3215_03a_v1" material="sts3215_03a_v1_material"/>
|
| 102 |
<!-- Part wrist_roll_follower_so101_v1 -->
|
| 103 |
+
<geom type="mesh" class="visual" pos="8.32667e-17 -0.000218214 0.000949706" quat="0 1 0 0" mesh="wrist_roll_follower_so101_v1" material="wrist_roll_follower_so101_v1_material" friction="5.0 1.0 1.0"/>
|
| 104 |
+
<geom type="mesh" class="collision" pos="8.32667e-17 -0.000218214 0.000949706" quat="0 1 0 0" mesh="wrist_roll_follower_so101_v1" material="wrist_roll_follower_so101_v1_material" friction="5.0 1.0 1.0"/>
|
| 105 |
<!-- Frame gripperframe -->
|
| 106 |
<site group="3" name="gripperframe" pos="-0.0079 -0.000218121 -0.0981274" quat="0.707107 -0 0.707107 -2.37788e-17"/>
|
| 107 |
<!-- Link moving_jaw_so101_v1 -->
|
|
|
|
| 110 |
<joint axis="0 0 1" name="gripper" type="hinge" range="-0.17453297762778586 1.7453291995659765" class="sts3215"/>
|
| 111 |
<inertial pos="-0.00157495 -0.0300244 0.0192755" mass="0.012" fullinertia="6.61427e-06 1.89032e-06 5.28738e-06 -3.19807e-07 -5.90717e-09 -1.09945e-07"/>
|
| 112 |
<!-- Part moving_jaw_so101_v1 -->
|
| 113 |
+
<geom type="mesh" class="visual" pos="-5.55112e-17 -5.55112e-17 0.0189" quat="1 -0 3.00524e-16 -2.00834e-17" mesh="moving_jaw_so101_v1" material="moving_jaw_so101_v1_material" friction="5.0 0.1 0.1"/>
|
| 114 |
+
<geom type="mesh" class="collision" pos="-5.55112e-17 -5.55112e-17 0.0189" quat="1 -0 3.00524e-16 -2.00834e-17" mesh="moving_jaw_so101_v1" material="moving_jaw_so101_v1_material" friction="5.0 0.1 0.1"/>
|
| 115 |
</body>
|
| 116 |
</body>
|
| 117 |
</body>
|
assets/media/output.gif
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
env.py
CHANGED
|
@@ -1,131 +1,5 @@
|
|
| 1 |
-
from pathlib import Path
|
| 2 |
-
from typing import Any
|
| 3 |
-
|
| 4 |
-
import cv2
|
| 5 |
import gymnasium as gym
|
| 6 |
-
import
|
| 7 |
-
import numpy as np
|
| 8 |
-
from gymnasium import spaces
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
class SO101Env(gym.Env):
|
| 12 |
-
metadata = {"render_modes": ["rgb_array"], "render_fps": 4}
|
| 13 |
-
|
| 14 |
-
def __init__(
|
| 15 |
-
self,
|
| 16 |
-
xml_pth: Path = Path("assets/SO-ARM100/Simulation/SO101/scene_with_cube.xml"),
|
| 17 |
-
width: int = 640,
|
| 18 |
-
height: int = 480,
|
| 19 |
-
max_episode_steps: int = 1_000,
|
| 20 |
-
camera_distance: float = 1.0,
|
| 21 |
-
camera_azimuth: int = 100,
|
| 22 |
-
) -> None:
|
| 23 |
-
"""Most simple S0101 environment. Reinforcement learning environment where reward is
|
| 24 |
-
defined by the euclidian distance between the gripper and a red block that it needs to pick up.
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
Args:
|
| 29 |
-
xml_pth (Path, optional): Path to the scene .xml file that containing the robot and the cube it needs to pickup. Defaults to Path("assets/SO-ARM100/Simulation/SO101/scene_with_cube.xml").
|
| 30 |
-
width (int, optional): Render width. Defaults to 640.
|
| 31 |
-
height (int, optional): _description_. Defaults to 480.
|
| 32 |
-
max_episode_steps (int, optional): Size of on Episode. Defaults to 200.
|
| 33 |
-
camera_distance (float, optional): Distance of the render camera to the robot. Defaults to 1.0.
|
| 34 |
-
camera_azimuth (int, optional): Azimuth of the render camera. Defaults to 100.
|
| 35 |
-
"""
|
| 36 |
-
self.mj_model = mujoco.MjModel.from_xml_path(str(xml_pth))
|
| 37 |
-
self.mj_data = mujoco.MjData(self.mj_model)
|
| 38 |
-
self.width = width
|
| 39 |
-
self.height = height
|
| 40 |
-
self.mj_renderer = mujoco.Renderer(
|
| 41 |
-
self.mj_model, height=self.height, width=self.width
|
| 42 |
-
)
|
| 43 |
-
|
| 44 |
-
self.max_episode_steps = max_episode_steps
|
| 45 |
-
self.current_step = 0
|
| 46 |
-
self.action_space = gym.spaces.Box(
|
| 47 |
-
low=np.array([-1.91986, -1.74533, -1.69, -1.65806, -2.74385, -0.17453]),
|
| 48 |
-
high=np.array([1.91986, 1.74533, 1.69, 1.65806, 2.84121, 1.74533]),
|
| 49 |
-
shape=(self.mj_model.nu,), # Number of actuators
|
| 50 |
-
dtype=np.float32,
|
| 51 |
-
)
|
| 52 |
-
|
| 53 |
-
self.observation_space = gym.spaces.Box(
|
| 54 |
-
low=0, high=255, shape=(self.height, self.width, 3), dtype=np.uint8
|
| 55 |
-
)
|
| 56 |
-
self.gripper_geom_id = mujoco.mj_name2id(
|
| 57 |
-
self.mj_model, mujoco.mjtObj.mjOBJ_BODY, "moving_jaw_so101_v1"
|
| 58 |
-
)
|
| 59 |
-
self.cube_geom_id = mujoco.mj_name2id(
|
| 60 |
-
self.mj_model, mujoco.mjtObj.mjOBJ_GEOM, "cube_geom"
|
| 61 |
-
)
|
| 62 |
-
self.camera_distance = camera_distance
|
| 63 |
-
self.camera_azimuth = camera_azimuth
|
| 64 |
-
|
| 65 |
-
def step(
|
| 66 |
-
self, action: np.ndarray
|
| 67 |
-
) -> tuple[np.ndarray, float, bool, bool, dict[str, Any]]:
|
| 68 |
-
# Apply the action and update the scene
|
| 69 |
-
self.mj_data.ctrl = action
|
| 70 |
-
mujoco.mj_step(self.mj_model, self.mj_data)
|
| 71 |
-
self.mj_renderer.update_scene(self.mj_data)
|
| 72 |
-
|
| 73 |
-
# Get a rendered observation
|
| 74 |
-
obs = self._get_obs()
|
| 75 |
-
|
| 76 |
-
# Compute the reward
|
| 77 |
-
reward = self._compute_reward()
|
| 78 |
-
|
| 79 |
-
# Check if the episode is terminated or truncated
|
| 80 |
-
terminated = self.current_step >= self.max_episode_steps
|
| 81 |
-
truncated = False
|
| 82 |
-
info = {}
|
| 83 |
-
|
| 84 |
-
self.current_step += 1
|
| 85 |
-
|
| 86 |
-
return obs, reward, terminated, truncated, info
|
| 87 |
-
|
| 88 |
-
def reset(
|
| 89 |
-
self, seed: int | None = None, options: dict[str, Any] | None = None
|
| 90 |
-
) -> tuple[np.ndarray, dict[str, Any]]:
|
| 91 |
-
"""Reset the environment to the initial state.
|
| 92 |
-
Returns:
|
| 93 |
-
np.ndarray: Initial observation
|
| 94 |
-
dict: Additional information
|
| 95 |
-
"""
|
| 96 |
-
mujoco.mj_resetData(self.mj_model, self.mj_data)
|
| 97 |
-
self.current_step = 0
|
| 98 |
-
obs = self._get_obs()
|
| 99 |
-
|
| 100 |
-
return obs, {}
|
| 101 |
-
|
| 102 |
-
def _compute_reward(self) -> float:
|
| 103 |
-
"""Compute the reward as the negative Euclidean distance between the gripper and the cube."""
|
| 104 |
-
# Get the positions of the gripper and cube geoms
|
| 105 |
-
gripper_pos = self.mj_data.geom_xpos[self.gripper_geom_id]
|
| 106 |
-
cube_pos = self.mj_data.geom_xpos[self.cube_geom_id]
|
| 107 |
-
|
| 108 |
-
# Return the negative distance as the reward
|
| 109 |
-
return -np.linalg.norm(gripper_pos - cube_pos)
|
| 110 |
-
|
| 111 |
-
def _get_obs(self) -> np.ndarray:
|
| 112 |
-
"""Render observation
|
| 113 |
-
|
| 114 |
-
Returns:
|
| 115 |
-
np.ndarray: Obervation, rendered image
|
| 116 |
-
"""
|
| 117 |
-
self.mj_renderer.update_scene(self.mj_data)
|
| 118 |
-
camera = mujoco.MjvCamera()
|
| 119 |
-
camera.distance = self.camera_distance # Decrease this value to zoom in
|
| 120 |
-
camera.azimuth = self.camera_azimuth # Camera azimuth angle (degrees)
|
| 121 |
-
self.mj_renderer.update_scene(self.mj_data, camera=camera)
|
| 122 |
-
return self.mj_renderer.render().copy()
|
| 123 |
-
|
| 124 |
-
def close(self) -> None:
|
| 125 |
-
# del self.mj_renderer
|
| 126 |
-
del self.mj_data
|
| 127 |
-
del self.mj_model
|
| 128 |
-
|
| 129 |
|
| 130 |
gym.register(
|
| 131 |
id="base-sO101-env-v0",
|
|
@@ -133,7 +7,9 @@ gym.register(
|
|
| 133 |
)
|
| 134 |
|
| 135 |
|
| 136 |
-
def make_env(
|
|
|
|
|
|
|
| 137 |
"""
|
| 138 |
Create vectorized environments for your custom task.
|
| 139 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gymnasium as gym
|
| 2 |
+
from src.base_so101_env import SO101Env
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
gym.register(
|
| 5 |
id="base-sO101-env-v0",
|
|
|
|
| 7 |
)
|
| 8 |
|
| 9 |
|
| 10 |
+
def make_env(
|
| 11 |
+
n_envs: int = 1, use_async_envs: bool = False
|
| 12 |
+
) -> gym.vector.AsyncVectorEnv | gym.vector.SyncVectorEnv:
|
| 13 |
"""
|
| 14 |
Create vectorized environments for your custom task.
|
| 15 |
|
src/__init__.py
ADDED
|
File without changes
|
src/base_so101_env.py
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
+
from typing import Any
|
| 3 |
+
|
| 4 |
+
import cv2
|
| 5 |
+
import gymnasium as gym
|
| 6 |
+
import mujoco
|
| 7 |
+
import numpy as np
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class SO101Env(gym.Env):
|
| 11 |
+
metadata = {"render_modes": ["rgb_array"], "render_fps": 4}
|
| 12 |
+
|
| 13 |
+
def __init__(
|
| 14 |
+
self,
|
| 15 |
+
xml_pth: Path = Path("assets/SO-ARM100/Simulation/SO101/scene_with_cube.xml"),
|
| 16 |
+
obs_h: int = 480,
|
| 17 |
+
obs_w: int = 640,
|
| 18 |
+
n_max_epi_steps: int = 1_000,
|
| 19 |
+
cam_dis: float = 1.0,
|
| 20 |
+
cam_azi: int = 100,
|
| 21 |
+
n_sim_steps: int = 10,
|
| 22 |
+
) -> None:
|
| 23 |
+
"""Most simple S0101 environment. Reinforcement learning environment where reward is
|
| 24 |
+
defined by the euclidian distance between the gripper and a red block that it needs to pick up.
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
Args:
|
| 28 |
+
xml_pth (Path, optional): Path to the scene .xml file that containing the robot and the cube it needs to pickup. Defaults to Path("assets/SO-ARM100/Simulation/SO101/scene_with_cube.xml").
|
| 29 |
+
obs_w (int, optional): Render obs_w. Defaults to 640.
|
| 30 |
+
obs_h (int, optional): _description_. Defaults to 480.
|
| 31 |
+
n_max_epi_steps (int, optional): Size of on Episode. Defaults to 1_000.
|
| 32 |
+
cam_dis (float, optional): Distance of the render camera to the robot. Defaults to 1.0.
|
| 33 |
+
cam_azi (int, optional): Azimuth of the render camera. Defaults to 100.
|
| 34 |
+
n_sim_steps (int, optional): Number of mujoco simulation steps.
|
| 35 |
+
"""
|
| 36 |
+
self.mj_model = mujoco.MjModel.from_xml_path(str(xml_pth))
|
| 37 |
+
self.mj_data = mujoco.MjData(self.mj_model)
|
| 38 |
+
self.obs_h = obs_h
|
| 39 |
+
self.obs_w = obs_w
|
| 40 |
+
self.n_sim_steps = n_sim_steps
|
| 41 |
+
self.mj_renderer = mujoco.Renderer(
|
| 42 |
+
self.mj_model, height=self.obs_h, width=self.obs_w
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
self.n_max_epi_steps = n_max_epi_steps
|
| 46 |
+
self.current_step = 0
|
| 47 |
+
self.action_space = gym.spaces.Box(
|
| 48 |
+
low=np.array([-1.91986, -1.74533, -1.69, -1.65806, -2.74385, -0.17453]),
|
| 49 |
+
high=np.array([1.91986, 1.74533, 1.69, 1.65806, 2.84121, 1.74533]),
|
| 50 |
+
shape=(self.mj_model.nu,),
|
| 51 |
+
dtype=np.float32,
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
self.observation_space = gym.spaces.Box(
|
| 55 |
+
low=0, high=255, shape=(self.obs_h, self.obs_w, 3), dtype=np.uint8
|
| 56 |
+
)
|
| 57 |
+
self.gripper_geom_id = mujoco.mj_name2id(
|
| 58 |
+
self.mj_model, mujoco.mjtObj.mjOBJ_BODY, "moving_jaw_so101_v1"
|
| 59 |
+
)
|
| 60 |
+
self.cube_geom_id = mujoco.mj_name2id(
|
| 61 |
+
self.mj_model, mujoco.mjtObj.mjOBJ_GEOM, "cube_geom"
|
| 62 |
+
)
|
| 63 |
+
self.cam_dis = cam_dis
|
| 64 |
+
self.cam_azi = cam_azi
|
| 65 |
+
|
| 66 |
+
def step(
|
| 67 |
+
self, action: np.ndarray
|
| 68 |
+
) -> tuple[np.ndarray, float, bool, bool, dict[str, Any]]:
|
| 69 |
+
# Apply the action and update the scene
|
| 70 |
+
self.mj_data.ctrl = action
|
| 71 |
+
for _ in range(self.n_sim_steps):
|
| 72 |
+
mujoco.mj_step(self.mj_model, self.mj_data)
|
| 73 |
+
self.mj_renderer.update_scene(self.mj_data)
|
| 74 |
+
|
| 75 |
+
# Get a rendered observation
|
| 76 |
+
obs = self._get_obs()
|
| 77 |
+
|
| 78 |
+
# Compute the reward
|
| 79 |
+
reward = self._compute_reward()
|
| 80 |
+
|
| 81 |
+
# Check if the episode is terminated or truncated
|
| 82 |
+
terminated = self.current_step >= self.n_max_epi_steps
|
| 83 |
+
truncated = False
|
| 84 |
+
info = {}
|
| 85 |
+
|
| 86 |
+
self.current_step += 1
|
| 87 |
+
|
| 88 |
+
return obs, reward, terminated, truncated, info
|
| 89 |
+
|
| 90 |
+
def reset(
|
| 91 |
+
self, seed: int | None = None, options: dict[str, Any] | None = None
|
| 92 |
+
) -> tuple[np.ndarray, dict[str, Any]]:
|
| 93 |
+
"""Reset the environment to the initial state.
|
| 94 |
+
Returns:
|
| 95 |
+
np.ndarray: Initial observation
|
| 96 |
+
dict: Additional information
|
| 97 |
+
"""
|
| 98 |
+
mujoco.mj_resetData(self.mj_model, self.mj_data)
|
| 99 |
+
self.current_step = 0
|
| 100 |
+
obs = self._get_obs()
|
| 101 |
+
|
| 102 |
+
return obs, {}
|
| 103 |
+
|
| 104 |
+
def _compute_reward(self) -> float:
|
| 105 |
+
"""Compute the reward as the negative Euclidean distance between the gripper and the cube."""
|
| 106 |
+
# Get the positions of the gripper and cube geoms
|
| 107 |
+
gripper_pos = self.mj_data.geom_xpos[self.gripper_geom_id]
|
| 108 |
+
cube_pos = self.mj_data.geom_xpos[self.cube_geom_id]
|
| 109 |
+
|
| 110 |
+
# Return the negative distance as the reward
|
| 111 |
+
return -np.linalg.norm(gripper_pos - cube_pos)
|
| 112 |
+
|
| 113 |
+
def _get_obs(self) -> np.ndarray:
|
| 114 |
+
"""Render observation
|
| 115 |
+
|
| 116 |
+
Returns:
|
| 117 |
+
np.ndarray: Obervation, rendered image
|
| 118 |
+
"""
|
| 119 |
+
mj_cam = mujoco.MjvCamera()
|
| 120 |
+
mj_cam.distance = self.cam_dis
|
| 121 |
+
mj_cam.azimuth = self.cam_azi
|
| 122 |
+
self.mj_renderer.update_scene(self.mj_data, camera=mj_cam)
|
| 123 |
+
return self.mj_renderer.render().copy()
|
| 124 |
+
|
| 125 |
+
def close(self) -> None:
|
| 126 |
+
# del self.mj_renderer
|
| 127 |
+
del self.mj_data
|
| 128 |
+
del self.mj_model
|
| 129 |
+
|
src/scripts/00_run_demo.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
+
import cv2
|
| 3 |
+
import numpy as np
|
| 4 |
+
import imageio
|
| 5 |
+
import gymnasium as gym
|
| 6 |
+
from src.base_so101_env import SO101Env
|
| 7 |
+
|
| 8 |
+
gripper_close = 0.05
|
| 9 |
+
env = SO101Env(
|
| 10 |
+
xml_pth=Path("assets/SO-ARM100/Simulation/SO101/scene_with_cube.xml"),
|
| 11 |
+
obs_w=640,
|
| 12 |
+
obs_h=480,
|
| 13 |
+
n_sim_steps=10,
|
| 14 |
+
cam_azi = 270,
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
frames = [] # <--- store frames here
|
| 18 |
+
|
| 19 |
+
try:
|
| 20 |
+
obs, _ = env.reset()
|
| 21 |
+
action = np.array([0.0, 0.0, 0.0, 1, 1.5, 2])
|
| 22 |
+
|
| 23 |
+
# warm-up steps
|
| 24 |
+
for _ in range(10):
|
| 25 |
+
obs, reward, terminated, truncated, info = env.step(action)
|
| 26 |
+
|
| 27 |
+
def run_and_capture(action, steps):
|
| 28 |
+
for _ in range(steps):
|
| 29 |
+
obs, reward, terminated, truncated, info = env.step(action)
|
| 30 |
+
if terminated or truncated:
|
| 31 |
+
break
|
| 32 |
+
frames.append(obs)
|
| 33 |
+
|
| 34 |
+
run_and_capture(np.array([0.1, 0.2, 0.2, 1, 1.5, 2]), 10)
|
| 35 |
+
run_and_capture(np.array([0.0, 0.2, 0.2, 1, 1.5, gripper_close]), 20)
|
| 36 |
+
run_and_capture(np.array([0, -0.6, 0.2, 1, 1.5, gripper_close]), 20)
|
| 37 |
+
run_and_capture(np.array([0, -0.6, 0.1, 1, 1.5, gripper_close]), 20)
|
| 38 |
+
run_and_capture(np.array([0, -0.6, -0.0, 1, 1.5, gripper_close]), 20)
|
| 39 |
+
run_and_capture(np.array([0, -0.6, -0.2, 1, 1.5, gripper_close]), 20)
|
| 40 |
+
run_and_capture(np.array([0, -0.6, -0.4, 1, 1.5, gripper_close]), 20)
|
| 41 |
+
|
| 42 |
+
finally:
|
| 43 |
+
env.close()
|
| 44 |
+
|
| 45 |
+
imageio.mimsave("assets/media/output3.gif", frames, fps=20)
|