Spaces:

gpue
/

nova-sim

Paused

App Files Files Community

nova-sim / robots /g1 /g1_env.py

gpue

Initial commit: Nova Sim unified robot simulation platform

ef118cf 3 months ago

raw

history blame contribute delete

7.88 kB

	import gymnasium as gym
	from gymnasium import spaces
	import numpy as np
	import mujoco
	import os

	from controllers import RLPolicyController


	class G1Env(gym.Env):
	"""
	Gymnasium environment for the Unitree G1 humanoid robot in MuJoCo.
	The G1 has 29 degrees of freedom (floating base + 29 actuated joints).
	"""
	metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 30}

	# Joint names for the G1 robot (29 actuated joints)
	JOINT_NAMES = [
	# Left leg (6 joints)
	"left_hip_pitch_joint", "left_hip_roll_joint", "left_hip_yaw_joint",
	"left_knee_joint", "left_ankle_pitch_joint", "left_ankle_roll_joint",
	# Right leg (6 joints)
	"right_hip_pitch_joint", "right_hip_roll_joint", "right_hip_yaw_joint",
	"right_knee_joint", "right_ankle_pitch_joint", "right_ankle_roll_joint",
	# Torso (3 joints)
	"waist_yaw_joint", "waist_roll_joint", "waist_pitch_joint",
	# Left arm (7 joints)
	"left_shoulder_pitch_joint", "left_shoulder_roll_joint", "left_shoulder_yaw_joint",
	"left_elbow_joint", "left_wrist_roll_joint", "left_wrist_pitch_joint", "left_wrist_yaw_joint",
	# Right arm (7 joints)
	"right_shoulder_pitch_joint", "right_shoulder_roll_joint", "right_shoulder_yaw_joint",
	"right_elbow_joint", "right_wrist_roll_joint", "right_wrist_pitch_joint", "right_wrist_yaw_joint",
	]

	def __init__(self, render_mode=None, width=1280, height=720):
	super().__init__()

	# Load model
	model_path = os.path.join(os.path.dirname(__file__), "scene.xml")
	self.model = mujoco.MjModel.from_xml_path(model_path)
	self.data = mujoco.MjData(self.model)

	# Number of actuators (29 motors)
	self.num_actuators = self.model.nu

	# Action space: torques for all 29 motors
	# Using normalized actions [-1, 1] that get scaled by ctrl limits
	self.action_space = spaces.Box(
	low=-1.0, high=1.0,
	shape=(self.num_actuators,),
	dtype=np.float32
	)

	# Observation space:
	# - Base position (3) and orientation quaternion (4)
	# - Base linear velocity (3) and angular velocity (3)
	# - Joint positions (29) and velocities (29)
	# Total: 3 + 4 + 3 + 3 + 29 + 29 = 71
	obs_dim = 71
	self.observation_space = spaces.Box(
	low=-np.inf, high=np.inf,
	shape=(obs_dim,),
	dtype=np.float32
	)

	self.render_mode = render_mode
	self.width = width
	self.height = height
	self.renderer = None

	self.steps = 0
	self.max_steps = 1000

	# Store control limits for scaling
	self.ctrl_ranges = self.model.actuator_ctrlrange.copy()

	# RL Policy controller (only controller)
	self.controller = RLPolicyController(self.num_actuators)

	def set_command(self, vx: float = 0.0, vy: float = 0.0, vyaw: float = 0.0):
	"""Set velocity command for RL policy controller."""
	self.controller.set_command(vx, vy, vyaw)

	def get_command(self):
	"""Get current velocity command."""
	return self.controller.get_command()

	def _get_obs(self):
	"""Get observation from simulation state."""
	# Base position and orientation (from floating base joint)
	base_pos = self.data.qpos[:3].copy()
	base_quat = self.data.qpos[3:7].copy()

	# Base velocities
	base_lin_vel = self.data.qvel[:3].copy()
	base_ang_vel = self.data.qvel[3:6].copy()

	# Joint positions and velocities (skip floating base: 7 qpos, 6 qvel)
	joint_pos = self.data.qpos[7:].copy()
	joint_vel = self.data.qvel[6:].copy()

	return np.concatenate([
	base_pos, base_quat,
	base_lin_vel, base_ang_vel,
	joint_pos, joint_vel
	]).astype(np.float32)

	# Default standing pose from RL policy (g1.yaml)
	# This bent-knee stance is stable and matches what the RL policy expects
	DEFAULT_STANDING_POSE = np.array([
	# Left leg: hip_pitch, hip_roll, hip_yaw, knee, ankle_pitch, ankle_roll
	-0.1, 0.0, 0.0, 0.3, -0.2, 0.0,
	# Right leg: same
	-0.1, 0.0, 0.0, 0.3, -0.2, 0.0,
	# Waist: yaw, roll, pitch
	0.0, 0.0, 0.0,
	# Left arm: shoulder_pitch, shoulder_roll, shoulder_yaw, elbow, wrist x3
	0.0, 0.3, 0.0, 0.3, 0.0, 0.0, 0.0,
	# Right arm
	0.0, -0.3, 0.0, 0.3, 0.0, 0.0, 0.0,
	])

	def reset(self, seed=None, options=None):
	super().reset(seed=seed)

	mujoco.mj_resetData(self.model, self.data)

	# Initialize joints to stable standing pose (from RL policy defaults)
	self.data.qpos[7:] = self.DEFAULT_STANDING_POSE.copy()

	# Small random perturbation for variety
	noise = self.np_random.uniform(-0.01, 0.01, size=self.data.qpos[7:].shape)
	self.data.qpos[7:] += noise

	mujoco.mj_forward(self.model, self.data)

	self.steps = 0

	# Reset controller
	if self.controller:
	self.controller.reset()

	observation = self._get_obs()
	return observation, {}

	def step(self, action):
	"""Step with explicit action (for RL training)."""
	# Scale action from [-1, 1] to actual control range
	scaled_action = np.zeros(self.num_actuators)
	for i in range(self.num_actuators):
	low, high = self.ctrl_ranges[i]
	scaled_action[i] = low + (action[i] + 1) * 0.5 * (high - low)

	# Apply action
	self.data.ctrl[:] = scaled_action

	# Step simulation
	mujoco.mj_step(self.model, self.data)
	self.steps += 1

	observation = self._get_obs()

	# Simple reward: stay upright and alive
	# Base height (pelvis should be around 0.8m for standing)
	base_height = self.data.qpos[2]

	# Upright reward (penalize tilting)
	base_quat = self.data.qpos[3:7]
	# Check if roughly upright (w component of quaternion close to 1)
	upright_reward = base_quat[0] ** 2

	# Height reward
	height_reward = min(base_height, 0.8) / 0.8

	# Energy penalty (small)
	energy_penalty = -0.001 * np.sum(np.abs(scaled_action))

	reward = height_reward + upright_reward + energy_penalty

	# Termination: robot fell
	terminated = base_height < 0.3
	truncated = self.steps >= self.max_steps

	info = {
	"base_height": base_height,
	"upright": upright_reward,
	}

	return observation, reward, terminated, truncated, info

	def step_with_controller(self, dt: float = 0.01):
	"""Step using the active controller (for visualization/demo)."""
	observation = self._get_obs()

	# Get action from controller
	if self.controller:
	torques = self.controller.compute_action(observation, self.data)
	self.controller.step(dt)
	else:
	torques = np.zeros(self.num_actuators)

	# Clip torques to control limits
	for i in range(self.num_actuators):
	low, high = self.ctrl_ranges[i]
	torques[i] = np.clip(torques[i], low, high)

	# Apply torques directly
	self.data.ctrl[:] = torques

	# Step simulation
	mujoco.mj_step(self.model, self.data)
	self.steps += 1

	return self._get_obs()

	def render(self):
	if self.render_mode == "rgb_array":
	if self.renderer is None:
	self.renderer = mujoco.Renderer(self.model, height=self.height, width=self.width)
	self.renderer.update_scene(self.data)
	return self.renderer.render()
	return None

	def close(self):
	if self.renderer:
	self.renderer.close()
	self.renderer = None