| import gymnasium as gym |
| from gymnasium import spaces |
| import numpy as np |
| import mujoco |
| import os |
|
|
| from controllers import RLPolicyController |
|
|
|
|
| class G1Env(gym.Env): |
| """ |
| Gymnasium environment for the Unitree G1 humanoid robot in MuJoCo. |
| The G1 has 29 degrees of freedom (floating base + 29 actuated joints). |
| """ |
| metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 30} |
|
|
| |
| JOINT_NAMES = [ |
| |
| "left_hip_pitch_joint", "left_hip_roll_joint", "left_hip_yaw_joint", |
| "left_knee_joint", "left_ankle_pitch_joint", "left_ankle_roll_joint", |
| |
| "right_hip_pitch_joint", "right_hip_roll_joint", "right_hip_yaw_joint", |
| "right_knee_joint", "right_ankle_pitch_joint", "right_ankle_roll_joint", |
| |
| "waist_yaw_joint", "waist_roll_joint", "waist_pitch_joint", |
| |
| "left_shoulder_pitch_joint", "left_shoulder_roll_joint", "left_shoulder_yaw_joint", |
| "left_elbow_joint", "left_wrist_roll_joint", "left_wrist_pitch_joint", "left_wrist_yaw_joint", |
| |
| "right_shoulder_pitch_joint", "right_shoulder_roll_joint", "right_shoulder_yaw_joint", |
| "right_elbow_joint", "right_wrist_roll_joint", "right_wrist_pitch_joint", "right_wrist_yaw_joint", |
| ] |
|
|
| def __init__(self, render_mode=None, width=1280, height=720): |
| super().__init__() |
|
|
| |
| model_path = os.path.join(os.path.dirname(__file__), "scene.xml") |
| self.model = mujoco.MjModel.from_xml_path(model_path) |
| self.data = mujoco.MjData(self.model) |
|
|
| |
| self.num_actuators = self.model.nu |
|
|
| |
| |
| self.action_space = spaces.Box( |
| low=-1.0, high=1.0, |
| shape=(self.num_actuators,), |
| dtype=np.float32 |
| ) |
|
|
| |
| |
| |
| |
| |
| obs_dim = 71 |
| self.observation_space = spaces.Box( |
| low=-np.inf, high=np.inf, |
| shape=(obs_dim,), |
| dtype=np.float32 |
| ) |
|
|
| self.render_mode = render_mode |
| self.width = width |
| self.height = height |
| self.renderer = None |
|
|
| self.steps = 0 |
| self.max_steps = 1000 |
|
|
| |
| self.ctrl_ranges = self.model.actuator_ctrlrange.copy() |
|
|
| |
| self.controller = RLPolicyController(self.num_actuators) |
|
|
| def set_command(self, vx: float = 0.0, vy: float = 0.0, vyaw: float = 0.0): |
| """Set velocity command for RL policy controller.""" |
| self.controller.set_command(vx, vy, vyaw) |
|
|
| def get_command(self): |
| """Get current velocity command.""" |
| return self.controller.get_command() |
|
|
| def _get_obs(self): |
| """Get observation from simulation state.""" |
| |
| base_pos = self.data.qpos[:3].copy() |
| base_quat = self.data.qpos[3:7].copy() |
|
|
| |
| base_lin_vel = self.data.qvel[:3].copy() |
| base_ang_vel = self.data.qvel[3:6].copy() |
|
|
| |
| joint_pos = self.data.qpos[7:].copy() |
| joint_vel = self.data.qvel[6:].copy() |
|
|
| return np.concatenate([ |
| base_pos, base_quat, |
| base_lin_vel, base_ang_vel, |
| joint_pos, joint_vel |
| ]).astype(np.float32) |
|
|
| |
| |
| DEFAULT_STANDING_POSE = np.array([ |
| |
| -0.1, 0.0, 0.0, 0.3, -0.2, 0.0, |
| |
| -0.1, 0.0, 0.0, 0.3, -0.2, 0.0, |
| |
| 0.0, 0.0, 0.0, |
| |
| 0.0, 0.3, 0.0, 0.3, 0.0, 0.0, 0.0, |
| |
| 0.0, -0.3, 0.0, 0.3, 0.0, 0.0, 0.0, |
| ]) |
|
|
| def reset(self, seed=None, options=None): |
| super().reset(seed=seed) |
|
|
| mujoco.mj_resetData(self.model, self.data) |
|
|
| |
| self.data.qpos[7:] = self.DEFAULT_STANDING_POSE.copy() |
|
|
| |
| noise = self.np_random.uniform(-0.01, 0.01, size=self.data.qpos[7:].shape) |
| self.data.qpos[7:] += noise |
|
|
| mujoco.mj_forward(self.model, self.data) |
|
|
| self.steps = 0 |
|
|
| |
| if self.controller: |
| self.controller.reset() |
|
|
| observation = self._get_obs() |
| return observation, {} |
|
|
| def step(self, action): |
| """Step with explicit action (for RL training).""" |
| |
| scaled_action = np.zeros(self.num_actuators) |
| for i in range(self.num_actuators): |
| low, high = self.ctrl_ranges[i] |
| scaled_action[i] = low + (action[i] + 1) * 0.5 * (high - low) |
|
|
| |
| self.data.ctrl[:] = scaled_action |
|
|
| |
| mujoco.mj_step(self.model, self.data) |
| self.steps += 1 |
|
|
| observation = self._get_obs() |
|
|
| |
| |
| base_height = self.data.qpos[2] |
|
|
| |
| base_quat = self.data.qpos[3:7] |
| |
| upright_reward = base_quat[0] ** 2 |
|
|
| |
| height_reward = min(base_height, 0.8) / 0.8 |
|
|
| |
| energy_penalty = -0.001 * np.sum(np.abs(scaled_action)) |
|
|
| reward = height_reward + upright_reward + energy_penalty |
|
|
| |
| terminated = base_height < 0.3 |
| truncated = self.steps >= self.max_steps |
|
|
| info = { |
| "base_height": base_height, |
| "upright": upright_reward, |
| } |
|
|
| return observation, reward, terminated, truncated, info |
|
|
| def step_with_controller(self, dt: float = 0.01): |
| """Step using the active controller (for visualization/demo).""" |
| observation = self._get_obs() |
|
|
| |
| if self.controller: |
| torques = self.controller.compute_action(observation, self.data) |
| self.controller.step(dt) |
| else: |
| torques = np.zeros(self.num_actuators) |
|
|
| |
| for i in range(self.num_actuators): |
| low, high = self.ctrl_ranges[i] |
| torques[i] = np.clip(torques[i], low, high) |
|
|
| |
| self.data.ctrl[:] = torques |
|
|
| |
| mujoco.mj_step(self.model, self.data) |
| self.steps += 1 |
|
|
| return self._get_obs() |
|
|
| def render(self): |
| if self.render_mode == "rgb_array": |
| if self.renderer is None: |
| self.renderer = mujoco.Renderer(self.model, height=self.height, width=self.width) |
| self.renderer.update_scene(self.data) |
| return self.renderer.render() |
| return None |
|
|
| def close(self): |
| if self.renderer: |
| self.renderer.close() |
| self.renderer = None |
|
|