nova-sim / robots /g1 /g1_env.py
gpue's picture
Initial commit: Nova Sim unified robot simulation platform
ef118cf
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import mujoco
import os
from controllers import RLPolicyController
class G1Env(gym.Env):
"""
Gymnasium environment for the Unitree G1 humanoid robot in MuJoCo.
The G1 has 29 degrees of freedom (floating base + 29 actuated joints).
"""
metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 30}
# Joint names for the G1 robot (29 actuated joints)
JOINT_NAMES = [
# Left leg (6 joints)
"left_hip_pitch_joint", "left_hip_roll_joint", "left_hip_yaw_joint",
"left_knee_joint", "left_ankle_pitch_joint", "left_ankle_roll_joint",
# Right leg (6 joints)
"right_hip_pitch_joint", "right_hip_roll_joint", "right_hip_yaw_joint",
"right_knee_joint", "right_ankle_pitch_joint", "right_ankle_roll_joint",
# Torso (3 joints)
"waist_yaw_joint", "waist_roll_joint", "waist_pitch_joint",
# Left arm (7 joints)
"left_shoulder_pitch_joint", "left_shoulder_roll_joint", "left_shoulder_yaw_joint",
"left_elbow_joint", "left_wrist_roll_joint", "left_wrist_pitch_joint", "left_wrist_yaw_joint",
# Right arm (7 joints)
"right_shoulder_pitch_joint", "right_shoulder_roll_joint", "right_shoulder_yaw_joint",
"right_elbow_joint", "right_wrist_roll_joint", "right_wrist_pitch_joint", "right_wrist_yaw_joint",
]
def __init__(self, render_mode=None, width=1280, height=720):
super().__init__()
# Load model
model_path = os.path.join(os.path.dirname(__file__), "scene.xml")
self.model = mujoco.MjModel.from_xml_path(model_path)
self.data = mujoco.MjData(self.model)
# Number of actuators (29 motors)
self.num_actuators = self.model.nu
# Action space: torques for all 29 motors
# Using normalized actions [-1, 1] that get scaled by ctrl limits
self.action_space = spaces.Box(
low=-1.0, high=1.0,
shape=(self.num_actuators,),
dtype=np.float32
)
# Observation space:
# - Base position (3) and orientation quaternion (4)
# - Base linear velocity (3) and angular velocity (3)
# - Joint positions (29) and velocities (29)
# Total: 3 + 4 + 3 + 3 + 29 + 29 = 71
obs_dim = 71
self.observation_space = spaces.Box(
low=-np.inf, high=np.inf,
shape=(obs_dim,),
dtype=np.float32
)
self.render_mode = render_mode
self.width = width
self.height = height
self.renderer = None
self.steps = 0
self.max_steps = 1000
# Store control limits for scaling
self.ctrl_ranges = self.model.actuator_ctrlrange.copy()
# RL Policy controller (only controller)
self.controller = RLPolicyController(self.num_actuators)
def set_command(self, vx: float = 0.0, vy: float = 0.0, vyaw: float = 0.0):
"""Set velocity command for RL policy controller."""
self.controller.set_command(vx, vy, vyaw)
def get_command(self):
"""Get current velocity command."""
return self.controller.get_command()
def _get_obs(self):
"""Get observation from simulation state."""
# Base position and orientation (from floating base joint)
base_pos = self.data.qpos[:3].copy()
base_quat = self.data.qpos[3:7].copy()
# Base velocities
base_lin_vel = self.data.qvel[:3].copy()
base_ang_vel = self.data.qvel[3:6].copy()
# Joint positions and velocities (skip floating base: 7 qpos, 6 qvel)
joint_pos = self.data.qpos[7:].copy()
joint_vel = self.data.qvel[6:].copy()
return np.concatenate([
base_pos, base_quat,
base_lin_vel, base_ang_vel,
joint_pos, joint_vel
]).astype(np.float32)
# Default standing pose from RL policy (g1.yaml)
# This bent-knee stance is stable and matches what the RL policy expects
DEFAULT_STANDING_POSE = np.array([
# Left leg: hip_pitch, hip_roll, hip_yaw, knee, ankle_pitch, ankle_roll
-0.1, 0.0, 0.0, 0.3, -0.2, 0.0,
# Right leg: same
-0.1, 0.0, 0.0, 0.3, -0.2, 0.0,
# Waist: yaw, roll, pitch
0.0, 0.0, 0.0,
# Left arm: shoulder_pitch, shoulder_roll, shoulder_yaw, elbow, wrist x3
0.0, 0.3, 0.0, 0.3, 0.0, 0.0, 0.0,
# Right arm
0.0, -0.3, 0.0, 0.3, 0.0, 0.0, 0.0,
])
def reset(self, seed=None, options=None):
super().reset(seed=seed)
mujoco.mj_resetData(self.model, self.data)
# Initialize joints to stable standing pose (from RL policy defaults)
self.data.qpos[7:] = self.DEFAULT_STANDING_POSE.copy()
# Small random perturbation for variety
noise = self.np_random.uniform(-0.01, 0.01, size=self.data.qpos[7:].shape)
self.data.qpos[7:] += noise
mujoco.mj_forward(self.model, self.data)
self.steps = 0
# Reset controller
if self.controller:
self.controller.reset()
observation = self._get_obs()
return observation, {}
def step(self, action):
"""Step with explicit action (for RL training)."""
# Scale action from [-1, 1] to actual control range
scaled_action = np.zeros(self.num_actuators)
for i in range(self.num_actuators):
low, high = self.ctrl_ranges[i]
scaled_action[i] = low + (action[i] + 1) * 0.5 * (high - low)
# Apply action
self.data.ctrl[:] = scaled_action
# Step simulation
mujoco.mj_step(self.model, self.data)
self.steps += 1
observation = self._get_obs()
# Simple reward: stay upright and alive
# Base height (pelvis should be around 0.8m for standing)
base_height = self.data.qpos[2]
# Upright reward (penalize tilting)
base_quat = self.data.qpos[3:7]
# Check if roughly upright (w component of quaternion close to 1)
upright_reward = base_quat[0] ** 2
# Height reward
height_reward = min(base_height, 0.8) / 0.8
# Energy penalty (small)
energy_penalty = -0.001 * np.sum(np.abs(scaled_action))
reward = height_reward + upright_reward + energy_penalty
# Termination: robot fell
terminated = base_height < 0.3
truncated = self.steps >= self.max_steps
info = {
"base_height": base_height,
"upright": upright_reward,
}
return observation, reward, terminated, truncated, info
def step_with_controller(self, dt: float = 0.01):
"""Step using the active controller (for visualization/demo)."""
observation = self._get_obs()
# Get action from controller
if self.controller:
torques = self.controller.compute_action(observation, self.data)
self.controller.step(dt)
else:
torques = np.zeros(self.num_actuators)
# Clip torques to control limits
for i in range(self.num_actuators):
low, high = self.ctrl_ranges[i]
torques[i] = np.clip(torques[i], low, high)
# Apply torques directly
self.data.ctrl[:] = torques
# Step simulation
mujoco.mj_step(self.model, self.data)
self.steps += 1
return self._get_obs()
def render(self):
if self.render_mode == "rgb_array":
if self.renderer is None:
self.renderer = mujoco.Renderer(self.model, height=self.height, width=self.width)
self.renderer.update_scene(self.data)
return self.renderer.render()
return None
def close(self):
if self.renderer:
self.renderer.close()
self.renderer = None