Buckets:
| # SPDX-License-Identifier: BSD-3-Clause | |
| # | |
| # Redistribution and use in source and binary forms, with or without | |
| # modification, are permitted provided that the following conditions are met: | |
| # | |
| # 1. Redistributions of source code must retain the above copyright notice, this | |
| # list of conditions and the following disclaimer. | |
| # | |
| # 2. Redistributions in binary form must reproduce the above copyright notice, | |
| # this list of conditions and the following disclaimer in the documentation | |
| # and/or other materials provided with the distribution. | |
| # | |
| # 3. Neither the name of the copyright holder nor the names of its | |
| # contributors may be used to endorse or promote products derived from | |
| # this software without specific prior written permission. | |
| # | |
| # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
| # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
| # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | |
| # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
| # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
| # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
| # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| # | |
| # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved. | |
| from legged_gym.envs.base.legged_robot_config import LeggedRobotCfg | |
| from .h1_task_button_config import H1TaskButtonCfg | |
| from isaacgym.torch_utils import * | |
| from isaacgym import gymtorch, gymapi | |
| import torch | |
| from legged_gym.envs.base.legged_robot import LeggedRobot, get_euler_xyz_tensor, quat_rotate_inverse, LEGGED_GYM_ROOT_DIR | |
| from legged_gym.utils.terrain import XBotTerrain | |
| from legged_gym.utils.human import load_target_jt, sample_int_from_float | |
| # from collections import deque | |
| import math | |
| import os | |
| from collections import deque | |
| class H1TaskButton(LeggedRobot): | |
| ''' | |
| Args: | |
| cfg (LeggedRobotCfg): Configuration object for the legged robot. | |
| sim_params: Parameters for the simulation. | |
| physics_engine: Physics engine used in the simulation. | |
| sim_device: Device used for the simulation. | |
| headless: Flag indicating whether the simulation should be run in headless mode. | |
| Attributes: | |
| last_feet_z (float): The z-coordinate of the last feet position. | |
| feet_height (torch.Tensor): Tensor representing the height of the feet. | |
| sim (gymtorch.GymSim): The simulation object. | |
| terrain (HumanoidTerrain): The terrain object. | |
| up_axis_idx (int): The index representing the up axis. | |
| command_input (torch.Tensor): Tensor representing the command input. | |
| privileged_obs_buf (torch.Tensor): Tensor representing the privileged observations buffer. | |
| obs_buf (torch.Tensor): Tensor representing the observations buffer. | |
| obs_history (collections.deque): Deque containing the history of observations. | |
| critic_history (collections.deque): Deque containing the history of critic observations. | |
| Methods: | |
| _push_robots(): Randomly pushes the robots by setting a randomized base velocity. | |
| _get_phase(): Calculates the phase of the gait cycle. | |
| _get_gait_phase(): Calculates the gait phase. | |
| create_sim(): Creates the simulation, terrain, and environments. | |
| _get_noise_scale_vec(cfg): Sets a vector used to scale the noise added to the observations. | |
| step(actions): Performs a simulation step with the given actions. | |
| compute_observations(): Computes the observations. | |
| reset_idx(env_ids): Resets the environment for the specified environment IDs. | |
| ''' | |
| def __init__(self, cfg: H1TaskButtonCfg, sim_params, physics_engine, sim_device, headless): | |
| super().__init__(cfg, sim_params, physics_engine, sim_device, headless) | |
| self.cfg: H1TaskButtonCfg | |
| self.last_feet_z = 0.05 | |
| self.feet_height = torch.zeros((self.num_envs, 2), device=self.device) | |
| self.button_goal_pos = torch.zeros(self.num_envs, 3, device=self.device) | |
| self.reset_idx(torch.tensor(range(self.num_envs), device=self.device)) | |
| self.gym.simulate(self.sim) | |
| self.gym.refresh_rigid_body_state_tensor(self.sim) | |
| self.compute_observations() | |
| # import pdb; pdb.set_trace() | |
| def _push_robots(self): | |
| """ Random pushes the robots. Emulates an impulse by setting a randomized base velocity. | |
| """ | |
| max_vel = self.cfg.domain_rand.max_push_vel_xy | |
| max_push_angular = self.cfg.domain_rand.max_push_ang_vel | |
| self.rand_push_force[:, :2] = torch_rand_float( | |
| -max_vel, max_vel, (self.num_envs, 2), device=self.device) # lin vel x/y | |
| self.humanoid_root_states[:, 7:9] = self.rand_push_force[:, :2] | |
| self.rand_push_torque = torch_rand_float( | |
| -max_push_angular, max_push_angular, (self.num_envs, 3), device=self.device) | |
| self.humanoid_root_states[:, 10:13] = self.rand_push_torque | |
| self.gym.set_actor_root_state_tensor( | |
| self.sim, gymtorch.unwrap_tensor(self.root_states)) | |
| def _get_phase(self): | |
| cycle_time = self.cfg.rewards.cycle_time | |
| phase = self.episode_length_buf * self.dt / cycle_time | |
| return phase | |
| def _get_gait_phase(self): | |
| # return float mask 1 is stance, 0 is swing | |
| phase = self._get_phase() | |
| sin_pos = torch.sin(2 * torch.pi * phase) | |
| # Add double support phase | |
| stance_mask = torch.zeros((self.num_envs, 2), device=self.device) | |
| # left foot stance | |
| stance_mask[:, 0] = sin_pos >= 0 | |
| # right foot stance | |
| stance_mask[:, 1] = sin_pos < 0 | |
| # Double support phase | |
| stance_mask[torch.abs(sin_pos) < 0.1] = 1 | |
| return stance_mask | |
| def create_sim(self): | |
| """ Creates simulation, terrain and evironments | |
| """ | |
| self.up_axis_idx = 2 # 2 for z, 1 for y -> adapt gravity accordingly | |
| self.sim = self.gym.create_sim( | |
| self.sim_device_id, self.graphics_device_id, self.physics_engine, self.sim_params) | |
| mesh_type = self.cfg.terrain.mesh_type | |
| if mesh_type in ['heightfield', 'trimesh']: | |
| self.terrain = XBotTerrain(self.cfg.terrain, self.num_envs) | |
| if mesh_type == 'plane': | |
| self._create_ground_plane() | |
| elif mesh_type == 'heightfield': | |
| self._create_heightfield() | |
| elif mesh_type == 'trimesh': | |
| self._create_trimesh() | |
| elif mesh_type is not None: | |
| raise ValueError( | |
| "Terrain mesh type not recognised. Allowed types are [None, plane, heightfield, trimesh]") | |
| self._create_envs() | |
| def _create_envs(self): | |
| """ Creates environments: | |
| 1. loads the robot URDF/MJCF asset, | |
| 2. For each environment | |
| 2.1 creates the environment, | |
| 2.2 calls DOF and Rigid shape properties callbacks, | |
| 2.3 create actor with these properties and add them to the env | |
| 3. Store indices of different bodies of the robot | |
| """ | |
| asset_path = self.cfg.asset.file.format(LEGGED_GYM_ROOT_DIR=LEGGED_GYM_ROOT_DIR) | |
| asset_root = os.path.dirname(asset_path) | |
| asset_file = os.path.basename(asset_path) | |
| asset_options = gymapi.AssetOptions() | |
| asset_options.default_dof_drive_mode = self.cfg.asset.default_dof_drive_mode | |
| asset_options.collapse_fixed_joints = self.cfg.asset.collapse_fixed_joints | |
| asset_options.replace_cylinder_with_capsule = self.cfg.asset.replace_cylinder_with_capsule | |
| asset_options.flip_visual_attachments = self.cfg.asset.flip_visual_attachments | |
| asset_options.fix_base_link = self.cfg.asset.fix_base_link | |
| asset_options.density = self.cfg.asset.density | |
| asset_options.angular_damping = self.cfg.asset.angular_damping | |
| asset_options.linear_damping = self.cfg.asset.linear_damping | |
| asset_options.max_angular_velocity = self.cfg.asset.max_angular_velocity | |
| asset_options.max_linear_velocity = self.cfg.asset.max_linear_velocity | |
| asset_options.armature = self.cfg.asset.armature | |
| asset_options.thickness = self.cfg.asset.thickness | |
| asset_options.disable_gravity = self.cfg.asset.disable_gravity | |
| robot_asset = self.gym.load_asset(self.sim, asset_root, asset_file, asset_options) | |
| self.num_dof = self.gym.get_asset_dof_count(robot_asset) | |
| self.num_bodies = self.gym.get_asset_rigid_body_count(robot_asset) | |
| dof_props_asset = self.gym.get_asset_dof_properties(robot_asset) | |
| rigid_shape_props_asset = self.gym.get_asset_rigid_shape_properties(robot_asset) | |
| # save body names from the asset | |
| self.body_names = self.gym.get_asset_rigid_body_names(robot_asset) | |
| self.dof_names = self.gym.get_asset_dof_names(robot_asset) | |
| self.num_bodies = len(self.body_names) | |
| self.num_dofs = len(self.dof_names) | |
| feet_names = [s for s in self.body_names if self.cfg.asset.foot_name in s] | |
| knee_names = [s for s in self.body_names if self.cfg.asset.knee_name in s] | |
| penalized_contact_names = [] | |
| for name in self.cfg.asset.penalize_contacts_on: | |
| penalized_contact_names.extend([s for s in self.body_names if name in s]) | |
| termination_contact_names = [] | |
| for name in self.cfg.asset.terminate_after_contacts_on: | |
| termination_contact_names.extend([s for s in self.body_names if name in s]) | |
| base_init_state_list = self.cfg.init_state.pos + self.cfg.init_state.rot + self.cfg.init_state.lin_vel + self.cfg.init_state.ang_vel | |
| self.base_init_state = to_torch(base_init_state_list, device=self.device, requires_grad=False) | |
| start_pose = gymapi.Transform() | |
| start_pose.p = gymapi.Vec3(*self.base_init_state[:3]) | |
| self._get_env_origins() | |
| env_lower = gymapi.Vec3(0., 0., 0.) | |
| env_upper = gymapi.Vec3(0., 0., 0.) | |
| self.actor_handles = [] | |
| self.envs = [] | |
| self.env_frictions = torch.zeros(self.num_envs, 1, dtype=torch.float32, device=self.device) | |
| self.body_mass = torch.zeros(self.num_envs, 1, dtype=torch.float32, device=self.device, requires_grad=False) | |
| ### assets | |
| # create wall asset | |
| wall_dims = gymapi.Vec3(*self.cfg.asset.wall_dims) | |
| asset_options = gymapi.AssetOptions() | |
| asset_options.fix_base_link = True | |
| asset_options.disable_gravity = True | |
| wall_asset = self.gym.create_box(self.sim, wall_dims.x, wall_dims.y, wall_dims.z, asset_options) | |
| wall_pose = gymapi.Transform() | |
| self.wall_idxs = [] | |
| self.humanoid_idxs = [] | |
| for i in range(self.num_envs): | |
| # create env instance | |
| env_handle = self.gym.create_env(self.sim, env_lower, env_upper, int(np.sqrt(self.num_envs))) | |
| pos = self.env_origins[i].clone() | |
| # pos[:2] += torch_rand_float(-1., 1., (2,1), device=self.device).squeeze(1) | |
| start_pose.p = gymapi.Vec3(*pos) | |
| rigid_shape_props = self._process_rigid_shape_props(rigid_shape_props_asset, i) | |
| self.gym.set_asset_rigid_shape_properties(robot_asset, rigid_shape_props) | |
| actor_handle = self.gym.create_actor(env_handle, robot_asset, start_pose, self.cfg.asset.name, i, self.cfg.asset.self_collisions, 0) | |
| dof_props = self._process_dof_props(dof_props_asset, i) | |
| self.gym.set_actor_dof_properties(env_handle, actor_handle, dof_props) | |
| body_props = self.gym.get_actor_rigid_body_properties(env_handle, actor_handle) | |
| body_props = self._process_rigid_body_props(body_props, i) | |
| self.gym.set_actor_rigid_body_properties(env_handle, actor_handle, body_props, recomputeInertia=True) | |
| self.envs.append(env_handle) | |
| self.actor_handles.append(actor_handle) | |
| self.humanoid_idxs.append(self.gym.get_actor_index(env_handle, actor_handle, gymapi.DOMAIN_SIM)) | |
| ### assets | |
| # add wall | |
| wall_pose.p = gymapi.Vec3(*(pos[:3] + torch.tensor(self.cfg.asset.wall_offset, device=self.device))) | |
| wall_handle = self.gym.create_actor(env_handle, wall_asset, wall_pose, "wall", i, 0) | |
| self.wall_idxs.append(self.gym.get_actor_index(env_handle, wall_handle, gymapi.DOMAIN_SIM)) | |
| self._create_sensors_all() | |
| self.humanoid_idxs = torch.tensor(self.humanoid_idxs, device=self.device) | |
| ### assets | |
| self.wall_idxs = torch.tensor(self.wall_idxs, device=self.device) | |
| self.feet_indices = torch.zeros(len(feet_names), dtype=torch.long, device=self.device, requires_grad=False) | |
| for i in range(len(feet_names)): | |
| self.feet_indices[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.actor_handles[0], feet_names[i]) | |
| self.knee_indices = torch.zeros(len(knee_names), dtype=torch.long, device=self.device, requires_grad=False) | |
| for i in range(len(knee_names)): | |
| self.knee_indices[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.actor_handles[0], knee_names[i]) | |
| self.penalised_contact_indices = torch.zeros(len(penalized_contact_names), dtype=torch.long, device=self.device, requires_grad=False) | |
| for i in range(len(penalized_contact_names)): | |
| self.penalised_contact_indices[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.actor_handles[0], penalized_contact_names[i]) | |
| self.termination_contact_indices = torch.zeros(len(termination_contact_names), dtype=torch.long, device=self.device, requires_grad=False) | |
| for i in range(len(termination_contact_names)): | |
| self.termination_contact_indices[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.actor_handles[0], termination_contact_names[i]) | |
| ### other body parts | |
| # elbow | |
| elbow_names = [s for s in self.body_names if self.cfg.asset.elbow_name in s] | |
| self.elbow_indices = torch.zeros(len(elbow_names), dtype=torch.long, device=self.device, requires_grad=False) | |
| for i in range(len(elbow_names)): | |
| self.elbow_indices[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.actor_handles[0], elbow_names[i]) | |
| # torso | |
| torso_names = [s for s in self.body_names if self.cfg.asset.torso_name in s] | |
| self.torso_indices = torch.zeros(len(torso_names), dtype=torch.long, device=self.device, requires_grad=False) | |
| for i in range(len(torso_names)): | |
| self.torso_indices[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.actor_handles[0], torso_names[i]) | |
| # wrist | |
| wrist_names = [s for s in self.body_names if self.cfg.asset.wrist_name in s] | |
| self.wrist_indices = torch.zeros(len(wrist_names), dtype=torch.long, device=self.device, requires_grad=False) | |
| for i in range(len(wrist_names)): | |
| self.wrist_indices[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.actor_handles[0], wrist_names[i]) | |
| def _init_buffers(self): | |
| """ Initialize torch tensors which will contain simulation states and processed quantities | |
| """ | |
| self._init_visual_buffers() | |
| # get gym GPU state tensors | |
| actor_root_state = self.gym.acquire_actor_root_state_tensor(self.sim) | |
| dof_state_tensor = self.gym.acquire_dof_state_tensor(self.sim) | |
| net_contact_forces = self.gym.acquire_net_contact_force_tensor(self.sim) | |
| rigid_body_state = self.gym.acquire_rigid_body_state_tensor(self.sim) | |
| self.gym.refresh_dof_state_tensor(self.sim) | |
| self.gym.refresh_actor_root_state_tensor(self.sim) | |
| self.gym.refresh_net_contact_force_tensor(self.sim) | |
| self.gym.refresh_rigid_body_state_tensor(self.sim) | |
| # create some wrapper tensors for different slices | |
| self.root_states = gymtorch.wrap_tensor(actor_root_state) | |
| self.humanoid_root_states = self.root_states.view(self.num_envs, -1, 13)[:, self.humanoid_idxs[0]] | |
| self.wall_root_states = self.root_states.view(self.num_envs, -1, 13)[:, self.wall_idxs[0]] | |
| self.dof_state = gymtorch.wrap_tensor(dof_state_tensor) | |
| self.dof_pos = self.dof_state.view(self.num_envs, self.num_dof, 2)[..., 0] | |
| self.dof_vel = self.dof_state.view(self.num_envs, self.num_dof, 2)[..., 1] | |
| self.base_quat = self.humanoid_root_states[:, 3:7] | |
| self.base_euler_xyz = get_euler_xyz_tensor(self.base_quat) | |
| self.contact_forces = gymtorch.wrap_tensor(net_contact_forces).view(self.num_envs, -1, 3) # shape: num_envs, num_bodies, xyz axis | |
| self.rigid_state = gymtorch.wrap_tensor(rigid_body_state).view(self.num_envs, -1, 13) | |
| # initialize some data used later on | |
| self.common_step_counter = 0 | |
| self.extras = {} | |
| self.noise_scale_vec = self._get_noise_scale_vec(self.cfg) | |
| self.gravity_vec = to_torch(get_axis_params(-1., self.up_axis_idx), device=self.device).repeat((self.num_envs, 1)) | |
| self.forward_vec = to_torch([1., 0., 0.], device=self.device).repeat((self.num_envs, 1)) | |
| self.torques = torch.zeros(self.num_envs, self.num_actions, dtype=torch.float, device=self.device, requires_grad=False) | |
| self.p_gains = torch.zeros(self.num_envs, self.num_actions, dtype=torch.float, device=self.device, requires_grad=False) | |
| self.d_gains = torch.zeros(self.num_envs, self.num_actions, dtype=torch.float, device=self.device, requires_grad=False) | |
| self.actions = torch.zeros(self.num_envs, self.num_actions, dtype=torch.float, device=self.device, requires_grad=False) | |
| self.last_actions = torch.zeros(self.num_envs, self.num_actions, dtype=torch.float, device=self.device, requires_grad=False) | |
| self.last_last_actions = torch.zeros(self.num_envs, self.num_actions, dtype=torch.float, device=self.device, requires_grad=False) | |
| self.last_rigid_state = torch.zeros_like(self.rigid_state) | |
| self.last_dof_vel = torch.zeros_like(self.dof_vel) | |
| self.last_root_vel = torch.zeros_like(self.humanoid_root_states[:, 7:13]) | |
| self.commands = torch.zeros(self.num_envs, self.cfg.commands.num_commands, dtype=torch.float, device=self.device, requires_grad=False) # x vel, y vel, yaw vel, heading | |
| self.commands_scale = torch.tensor([self.obs_scales.lin_vel, self.obs_scales.lin_vel, self.obs_scales.ang_vel], device=self.device, requires_grad=False,) | |
| self.feet_air_time = torch.zeros(self.num_envs, self.feet_indices.shape[0], dtype=torch.float, device=self.device, requires_grad=False) | |
| self.last_contacts = torch.zeros(self.num_envs, len(self.feet_indices), dtype=torch.bool, device=self.device, requires_grad=False) | |
| self.base_lin_vel = quat_rotate_inverse(self.base_quat, self.humanoid_root_states[:, 7:10]) | |
| self.base_ang_vel = quat_rotate_inverse(self.base_quat, self.humanoid_root_states[:, 10:13]) | |
| self.projected_gravity = quat_rotate_inverse(self.base_quat, self.gravity_vec) | |
| if self.cfg.terrain.measure_heights: | |
| self.height_points = self._init_height_points() | |
| self.measured_heights = 0 | |
| # joint positions offsets and PD gains | |
| self.default_dof_pos = torch.zeros(self.num_dof, dtype=torch.float, device=self.device, requires_grad=False) | |
| for i in range(self.num_dofs): | |
| name = self.dof_names[i] | |
| # print(name) | |
| self.default_dof_pos[i] = self.cfg.init_state.default_joint_angles[name] | |
| found = False | |
| for dof_name in self.cfg.control.stiffness.keys(): | |
| if dof_name in name: | |
| self.p_gains[:, i] = self.cfg.control.stiffness[dof_name] | |
| self.d_gains[:, i] = self.cfg.control.damping[dof_name] | |
| found = True | |
| if not found: | |
| self.p_gains[:, i] = 0. | |
| self.d_gains[:, i] = 0. | |
| print(f"PD gain of joint {name} were not defined, setting them to zero") | |
| self.rand_push_force = torch.zeros((self.num_envs, 3), dtype=torch.float32, device=self.device) | |
| self.rand_push_torque = torch.zeros((self.num_envs, 3), dtype=torch.float32, device=self.device) | |
| self.default_dof_pos = self.default_dof_pos.unsqueeze(0) | |
| self.default_joint_pd_target = self.default_dof_pos.clone() | |
| self.obs_history = deque(maxlen=self.cfg.env.frame_stack) | |
| self.critic_history = deque(maxlen=self.cfg.env.c_frame_stack) | |
| for _ in range(self.cfg.env.frame_stack): | |
| self.obs_history.append(torch.zeros( | |
| self.num_envs, self.cfg.env.num_single_obs, dtype=torch.float, device=self.device)) | |
| for _ in range(self.cfg.env.c_frame_stack): | |
| self.critic_history.append(torch.zeros( | |
| self.num_envs, self.cfg.env.single_num_privileged_obs, dtype=torch.float, device=self.device)) | |
| def _reset_dofs(self, env_ids): | |
| """ Resets DOF position and velocities of selected environmments | |
| Positions are randomly selected within 0.5:1.5 x default positions. | |
| Velocities are set to zero. | |
| Args: | |
| env_ids (List[int]): Environemnt ids | |
| """ | |
| self.dof_pos[env_ids] = self.default_dof_pos + torch_rand_float(-0.1, 0.1, (len(env_ids), self.num_dof), device=self.device) | |
| self.dof_vel[env_ids] = 0. | |
| humanoid_ids_int32 = self.humanoid_idxs[env_ids].to(dtype=torch.int32) | |
| self.gym.set_dof_state_tensor_indexed(self.sim, | |
| gymtorch.unwrap_tensor(self.dof_state), | |
| gymtorch.unwrap_tensor(humanoid_ids_int32), len(humanoid_ids_int32)) | |
| def _reset_root_states(self, env_ids): | |
| """ Resets ROOT states position and velocities of selected environmments | |
| Sets base position based on the curriculum | |
| Selects randomized base velocities within -0.5:0.5 [m/s, rad/s] | |
| Args: | |
| env_ids (List[int]): Environemnt ids | |
| """ | |
| # base position | |
| if self.custom_origins: | |
| self.humanoid_root_states[env_ids] = self.base_init_state | |
| self.humanoid_root_states[env_ids, :3] += self.env_origins[env_ids] | |
| self.humanoid_root_states[env_ids, :2] += torch_rand_float(-1., 1., (len(env_ids), 2), device=self.device) # xy position within 1m of the center | |
| else: | |
| self.humanoid_root_states[env_ids] = self.base_init_state | |
| self.humanoid_root_states[env_ids, :3] += self.env_origins[env_ids] | |
| # base velocities | |
| # self.humanoid_root_states[env_ids, 7:13] = torch_rand_float(-0.05, 0.05, (len(env_ids), 6), device=self.device) # [7:10]: lin vel, [10:13]: ang vel | |
| if self.cfg.asset.fix_base_link: | |
| self.humanoid_root_states[env_ids, 7:13] = 0 | |
| self.humanoid_root_states[env_ids, 2] += 1.8 | |
| self._reset_goal(env_ids) | |
| humanoid_ids_int32 = self.humanoid_idxs[env_ids].to(dtype=torch.int32) | |
| self.gym.set_actor_root_state_tensor_indexed(self.sim, | |
| gymtorch.unwrap_tensor(self.root_states), | |
| gymtorch.unwrap_tensor(humanoid_ids_int32), len(humanoid_ids_int32)) | |
| def _reset_goal(self, env_ids): | |
| self.button_goal_pos[env_ids, 0] = self.wall_root_states[env_ids, 0].clone() | |
| self.button_goal_pos[env_ids, 1] = self.wall_root_states[env_ids, 1].clone() + torch.FloatTensor(len(env_ids)).uniform_(*self.cfg.commands.ranges.button_pos_y).to(self.device) | |
| self.button_goal_pos[env_ids, 2] = self.cfg.asset.button_ori_z + torch.FloatTensor(len(env_ids)).uniform_(*self.cfg.commands.ranges.button_pos_z).to(self.device) | |
| def step(self, actions): | |
| if self.cfg.env.use_ref_actions: | |
| actions += self.ref_action | |
| # dynamic randomization | |
| # delay = torch.rand((self.num_envs, 1), device=self.device) | |
| delay = torch.rand((self.num_envs, 1), device=self.device) | |
| actions = (1 - delay) * actions.to(self.device) + delay * self.actions | |
| actions += self.cfg.domain_rand.dynamic_randomization * torch.randn_like(actions) * actions | |
| return super().step(actions) | |
| def post_physics_step(self): | |
| """ check terminations, compute observations and rewards | |
| calls self._post_physics_step_callback() for common computations | |
| calls self._draw_debug_vis() if needed | |
| """ | |
| self.gym.refresh_actor_root_state_tensor(self.sim) | |
| self.gym.refresh_net_contact_force_tensor(self.sim) | |
| self.gym.refresh_rigid_body_state_tensor(self.sim) | |
| self.episode_length_buf += 1 | |
| self.common_step_counter += 1 | |
| # prepare quantities | |
| self.base_quat[:] = self.humanoid_root_states[:, 3:7] | |
| self.base_lin_vel[:] = quat_rotate_inverse(self.base_quat, self.humanoid_root_states[:, 7:10]) | |
| self.base_ang_vel[:] = quat_rotate_inverse(self.base_quat, self.humanoid_root_states[:, 10:13]) | |
| self.projected_gravity[:] = quat_rotate_inverse(self.base_quat, self.gravity_vec) | |
| self.base_euler_xyz = get_euler_xyz_tensor(self.base_quat) | |
| self._post_physics_step_callback() | |
| # compute observations, rewards, resets, ... | |
| self.check_termination() | |
| self.compute_reward() | |
| env_ids = self.reset_buf.nonzero(as_tuple=False).flatten() | |
| self.reset_idx(env_ids) | |
| self.compute_observations() # in some cases a simulation step might be required to refresh some obs (for example body positions) | |
| self.last_last_actions[:] = torch.clone(self.last_actions[:]) | |
| self.last_actions[:] = self.actions[:] | |
| self.last_dof_vel[:] = self.dof_vel[:] | |
| self.last_root_vel[:] = self.humanoid_root_states[:, 7:13] | |
| self.last_rigid_state[:] = self.rigid_state[:] | |
| if self.viewer and self.enable_viewer_sync and self.debug_viz: | |
| self._draw_debug_vis() | |
| def compute_observations(self): | |
| self.compute_visual_observations() | |
| phase = self._get_phase() | |
| sin_pos = torch.sin(2 * torch.pi * phase).unsqueeze(1) | |
| cos_pos = torch.cos(2 * torch.pi * phase).unsqueeze(1) | |
| stance_mask = self._get_gait_phase() | |
| contact_mask = self.contact_forces[:, self.feet_indices, 2] > 5. | |
| self.command_input = torch.cat( | |
| (sin_pos, cos_pos, self.commands[:, :3] * self.commands_scale), dim=1) | |
| self.command_input_wo_clock = self.commands[:, :3] * self.commands_scale | |
| q = (self.dof_pos - self.default_dof_pos) * self.obs_scales.dof_pos | |
| dq = self.dof_vel * self.obs_scales.dof_vel | |
| wrist_pos = self.rigid_state[:, self.wrist_indices, :7] # [num_envs, 2, 7], two hands | |
| wrist_pos = wrist_pos[:, 0, :3] # [num_envs, 3], left hand, position only | |
| button_goal_pos = self.button_goal_pos[:, :3] # [num_envs, 3] | |
| wrist_button_diff = wrist_pos - button_goal_pos # [num_envs, 3], left hand, position only | |
| self.privileged_obs_buf = torch.cat(( | |
| button_goal_pos, # 3 | |
| wrist_pos, # 3 | |
| wrist_button_diff, # 3 | |
| # original | |
| (self.dof_pos - self.default_joint_pd_target) * \ | |
| self.obs_scales.dof_pos, # |A| | |
| self.dof_vel * self.obs_scales.dof_vel, # |A| | |
| self.actions, # |A| | |
| self.base_lin_vel * self.obs_scales.lin_vel, # 3 | |
| self.base_ang_vel * self.obs_scales.ang_vel, # 3 | |
| self.base_euler_xyz * self.obs_scales.quat, # 3 | |
| self.rand_push_force[:, :2], # 2 | |
| self.rand_push_torque, # 3 | |
| self.env_frictions, # 1 | |
| self.body_mass / 30., # 1 | |
| # stance_mask, # 2 | |
| contact_mask, # 2 | |
| ), dim=-1) | |
| obs_buf = torch.cat(( | |
| wrist_button_diff, # 3 | |
| # original | |
| q, # |A| | |
| dq, # |A| | |
| self.actions, # |A| | |
| self.base_ang_vel * self.obs_scales.ang_vel, # 3 | |
| self.base_euler_xyz * self.obs_scales.quat, # 3 | |
| ), dim=-1) | |
| if self.cfg.terrain.measure_heights: | |
| heights = torch.clip(self.humanoid_root_states[:, 2].unsqueeze(1) - 0.5 - self.measured_heights, -1, 1.) * self.obs_scales.height_measurements | |
| self.privileged_obs_buf = torch.cat((self.obs_buf, heights), dim=-1) | |
| if self.add_noise: | |
| obs_now = obs_buf.clone() + torch.randn_like(obs_buf) * self.noise_scale_vec | |
| else: | |
| obs_now = obs_buf.clone() | |
| self.obs_history.append(obs_now) | |
| self.critic_history.append(self.privileged_obs_buf) | |
| obs_buf_all = torch.stack([self.obs_history[i] | |
| for i in range(self.obs_history.maxlen)], dim=1) # N,T,K | |
| self.obs_buf = obs_buf_all.reshape(self.num_envs, -1) # N, T*K | |
| self.privileged_obs_buf = torch.cat([self.critic_history[i] for i in range(self.cfg.env.c_frame_stack)], dim=1) | |
| def reset_idx(self, env_ids): | |
| super().reset_idx(env_ids) | |
| for i in range(self.obs_history.maxlen): | |
| self.obs_history[i][env_ids] *= 0 | |
| for i in range(self.critic_history.maxlen): | |
| self.critic_history[i][env_ids] *= 0 | |
| # ================================================ Rewards ================================================== # | |
| def _reward_joint_pos(self): | |
| """ | |
| Calculates the reward based on the difference between the current joint positions and the target joint positions. | |
| """ | |
| # Penalize distance to target joint angles | |
| dof_pos_diff = self.dof_pos - self.ref_dof_pos | |
| # dof_pos_diff = dof_pos_diff[:, 10:] | |
| target_jt_error = torch.mean(torch.abs(dof_pos_diff), dim=1) | |
| return torch.exp(-4 * target_jt_error), target_jt_error | |
| def _reward_wrist_pos(self): | |
| wrist_pos = self.rigid_state[:, self.wrist_indices, :7] # [num_envs, 2, 7], two hands | |
| wrist_pos_diff = wrist_pos[:,:,:3] - self.ref_wrist_pos[:,:,:3] # [num_envs, 2, 3], two hands, position only | |
| wrist_pos_diff = torch.flatten(wrist_pos_diff, start_dim=1) # [num_envs, 6] | |
| wrist_pos_error = torch.mean(torch.abs(wrist_pos_diff), dim=1) | |
| return torch.exp(-4 * wrist_pos_error), wrist_pos_error | |
| def _reward_wrist_button_distance(self): | |
| wrist_pos = self.rigid_state[:, self.wrist_indices, :7] # [num_envs, 2, 7], two hands | |
| wrist_pos = wrist_pos[:, 0, :3] # [num_envs, 3], left hand, position only | |
| button_goal_pos = self.button_goal_pos[:, :3] # [num_envs, 3] | |
| wrist_button_diff = wrist_pos - button_goal_pos # [num_envs, 3] | |
| wrist_button_error = torch.mean(torch.abs(wrist_button_diff), dim=1) | |
| return torch.exp(-4 * wrist_button_error), wrist_button_error | |
| def _reward_right_arm_default(self): | |
| """ | |
| Calculates the reward for keeping right arm joint positions close to default positions. | |
| """ | |
| right_shoulder_pitch_index = 15 | |
| joint_diff = self.dof_pos - self.default_joint_pd_target | |
| right_arm_diff = joint_diff[:, right_shoulder_pitch_index:] # start from right shoulder pitch | |
| right_arm_error = torch.mean(torch.abs(right_arm_diff), dim=1) | |
| return torch.exp(-4 * right_arm_error), right_arm_error | |
| def _reward_feet_distance(self): | |
| """ | |
| Calculates the reward based on the distance between the feet. Penilize feet get close to each other or too far away. | |
| """ | |
| foot_pos = self.rigid_state[:, self.feet_indices, :2] | |
| foot_dist = torch.norm(foot_pos[:, 0, :] - foot_pos[:, 1, :], dim=1) | |
| fd = self.cfg.rewards.min_dist | |
| max_df = self.cfg.rewards.max_dist | |
| d_min = torch.clamp(foot_dist - fd, -0.5, 0.) | |
| d_max = torch.clamp(foot_dist - max_df, 0, 0.5) | |
| return (torch.exp(-torch.abs(d_min) * 100) + torch.exp(-torch.abs(d_max) * 100)) / 2, foot_dist | |
| def _reward_knee_distance(self): | |
| """ | |
| Calculates the reward based on the distance between the knee of the humanoid. | |
| """ | |
| knee_pos = self.rigid_state[:, self.knee_indices, :2] | |
| knee_dist = torch.norm(knee_pos[:, 0, :] - knee_pos[:, 1, :], dim=1) | |
| fd = self.cfg.rewards.min_dist | |
| max_df = self.cfg.rewards.max_dist / 2 | |
| d_min = torch.clamp(knee_dist - fd, -0.5, 0.) | |
| d_max = torch.clamp(knee_dist - max_df, 0, 0.5) | |
| return (torch.exp(-torch.abs(d_min) * 100) + torch.exp(-torch.abs(d_max) * 100)) / 2, knee_dist | |
| # @jhcao | |
| def _reward_elbow_distance(self): | |
| """ | |
| Calculates the reward based on the distance between the elbow of the humanoid. | |
| """ | |
| elbow_pos = self.rigid_state[:, self.elbow_indices, :2] | |
| elbow_dist = torch.norm(elbow_pos[:, 0, :] - elbow_pos[:, 1, :], dim=1) | |
| fd = self.cfg.rewards.min_dist | |
| max_df = self.cfg.rewards.max_dist / 2 | |
| d_min = torch.clamp(elbow_dist - fd, -0.5, 0.) | |
| d_max = torch.clamp(elbow_dist - max_df, 0, 0.5) | |
| return (torch.exp(-torch.abs(d_min) * 100) + torch.exp(-torch.abs(d_max) * 100)) / 2, elbow_dist | |
| # # @jhcao | |
| # def _reward_elbow_torso_distance(self): | |
| # """ | |
| # Calculates the reward based on the distance of both elbow to the torso. | |
| # """ | |
| # elbow_pos = self.rigid_state[:, self.elbow_indices, :2] | |
| # torso_pos = self.rigid_state[:, self.torso_indices, :2] | |
| # elbow_torso_dist_l = torch.norm(elbow_pos[:,0,:] - torso_pos[:,0,:], dim=1) | |
| # elbow_torso_dist_r = torch.norm(elbow_pos[:,1,:] - torso_pos[:,0,:], dim=1) | |
| # fd = 0.00 | |
| # max_fd = 0.10 | |
| # d_min_l = torch.clamp(elbow_torso_dist_l - fd, -0.5, 0.) | |
| # d_max_l = torch.clamp(elbow_torso_dist_l - max_fd, 0, 0.5) | |
| # d_min_r = torch.clamp(elbow_torso_dist_r - fd, -0.5, 0.) | |
| # d_max_r = torch.clamp(elbow_torso_dist_r - max_fd, 0, 0.5) | |
| # return (torch.exp(-torch.abs(d_min_l) * 100) + torch.exp(-torch.abs(d_max_l) * 100)) / 2 + (torch.exp(-torch.abs(d_min_r) * 100) + torch.exp(-torch.abs(d_max_r) * 100)) / 2 | |
| def _reward_foot_slip(self): | |
| """ | |
| Calculates the reward for minimizing foot slip. The reward is based on the contact forces | |
| and the speed of the feet. A contact threshold is used to determine if the foot is in contact | |
| with the ground. The speed of the foot is calculated and scaled by the contact condition. | |
| """ | |
| contact = self.contact_forces[:, self.feet_indices, 2] > 5. | |
| foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 10:12], dim=2) | |
| rew = torch.sqrt(foot_speed_norm) | |
| rew *= contact | |
| return torch.sum(rew, dim=1) | |
| def _reward_feet_air_time(self): | |
| """ | |
| Calculates the reward for feet air time, promoting longer steps. This is achieved by | |
| checking the first contact with the ground after being in the air. The air time is | |
| limited to a maximum value for reward calculation. | |
| """ | |
| contact = self.contact_forces[:, self.feet_indices, 2] > 5. | |
| stance_mask = self._get_gait_phase() | |
| self.contact_filt = torch.logical_or(torch.logical_or(contact, stance_mask), self.last_contacts) | |
| self.last_contacts = contact | |
| first_contact = (self.feet_air_time > 0.) * self.contact_filt | |
| self.feet_air_time += self.dt | |
| air_time = self.feet_air_time.clamp(0, 0.5) * first_contact | |
| self.feet_air_time *= ~self.contact_filt | |
| return air_time.sum(dim=1) | |
| def _reward_feet_contact_number(self): | |
| """ | |
| Calculates a reward based on the number of feet contacts aligning with the gait phase. | |
| Rewards or penalizes depending on whether the foot contact matches the expected gait phase. | |
| """ | |
| contact = self.contact_forces[:, self.feet_indices, 2] > 5. | |
| stance_mask = self._get_gait_phase() | |
| reward = torch.where(contact == stance_mask, 1.0, -0.3) | |
| return torch.mean(reward, dim=1) | |
| def _reward_orientation(self): | |
| """ | |
| Calculates the reward for maintaining a flat base orientation. It penalizes deviation | |
| from the desired base orientation using the base euler angles and the projected gravity vector. | |
| """ | |
| quat_mismatch = torch.exp(-torch.sum(torch.abs(self.base_euler_xyz[:, :2]), dim=1) * 10) | |
| orientation = torch.exp(-torch.norm(self.projected_gravity[:, :2], dim=1) * 20) | |
| return (quat_mismatch + orientation) / 2. | |
| def _reward_feet_contact_forces(self): | |
| """ | |
| Calculates the reward for keeping contact forces within a specified range. Penalizes | |
| high contact forces on the feet. | |
| """ | |
| return torch.sum((torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1) - self.cfg.rewards.max_contact_force).clip(0, 400), dim=1) | |
| def _reward_default_joint_pos(self): | |
| """ | |
| Calculates the reward for keeping joint positions close to default positions, with a focus | |
| on penalizing deviation in yaw and roll directions. Excludes yaw and roll from the main penalty. | |
| """ | |
| joint_diff = self.dof_pos - self.default_joint_pd_target | |
| left_yaw_roll = joint_diff[:, :2] | |
| right_yaw_roll = joint_diff[:, 5:7] | |
| yaw_roll = torch.norm(left_yaw_roll, dim=1) + torch.norm(right_yaw_roll, dim=1) | |
| yaw_roll = torch.clamp(yaw_roll - 0.1, 0, 50) | |
| return torch.exp(-yaw_roll * 100) - 0.01 * torch.norm(joint_diff, dim=1) | |
| def _reward_upper_body_pos(self): | |
| """ | |
| Calculates the reward for keeping upper body joint positions close to default positions. | |
| """ | |
| torso_index = 10 | |
| joint_diff = self.dof_pos - self.default_joint_pd_target | |
| upper_body_diff = joint_diff[:, torso_index:torso_index+1] # start from torso, but only torso | |
| upper_body_error = torch.mean(torch.abs(upper_body_diff), dim=1) | |
| return torch.exp(-4 * upper_body_error), upper_body_error | |
| # def _reward_upper_body_pos(self): | |
| # """ | |
| # Calculates the reward for keeping upper body joint positions close to default positions. | |
| # """ | |
| # joint_diff = self.dof_pos - self.default_joint_pd_target | |
| # upper_body_diff = joint_diff[:, 10:] # start from torso | |
| # upper_body_diff = torch.norm(upper_body_diff, dim=1) | |
| # upper_body_diff = torch.clamp(upper_body_diff - 0.1, 0, 50) | |
| # return torch.exp(-upper_body_diff * 100) | |
| def _reward_base_height(self): | |
| """ | |
| Calculates the reward based on the robot's base height. Penalizes deviation from a target base height. | |
| The reward is computed based on the height difference between the robot's base and the average height | |
| of its feet when they are in contact with the ground. | |
| """ | |
| stance_mask = self._get_gait_phase() | |
| measured_heights = torch.sum( | |
| self.rigid_state[:, self.feet_indices, 2] * stance_mask, dim=1) / torch.sum(stance_mask, dim=1) | |
| base_height = self.humanoid_root_states[:, 2] - (measured_heights - 0.05) | |
| return torch.exp(-torch.abs(base_height - self.cfg.rewards.base_height_target) * 100) | |
| def _reward_base_acc(self): | |
| """ | |
| Computes the reward based on the base's acceleration. Penalizes high accelerations of the robot's base, | |
| encouraging smoother motion. | |
| """ | |
| root_acc = self.last_root_vel - self.humanoid_root_states[:, 7:13] | |
| rew = torch.exp(-torch.norm(root_acc, dim=1) * 3) | |
| return rew | |
| def _reward_vel_mismatch_exp(self): | |
| """ | |
| Computes a reward based on the mismatch in the robot's linear and angular velocities. | |
| Encourages the robot to maintain a stable velocity by penalizing large deviations. | |
| """ | |
| lin_mismatch = torch.exp(-torch.square(self.base_lin_vel[:, 2]) * 10) | |
| ang_mismatch = torch.exp(-torch.norm(self.base_ang_vel[:, :2], dim=1) * 5.) | |
| c_update = (lin_mismatch + ang_mismatch) / 2. | |
| return c_update | |
| def _reward_track_vel_hard(self): | |
| """ | |
| Calculates a reward for accurately tracking both linear and angular velocity commands. | |
| Penalizes deviations from specified linear and angular velocity targets. | |
| """ | |
| # Tracking of linear velocity commands (xy axes) | |
| lin_vel_error = torch.norm( | |
| self.commands[:, :2] - self.base_lin_vel[:, :2], dim=1) | |
| lin_vel_error_exp = torch.exp(-lin_vel_error * 10) | |
| # Tracking of angular velocity commands (yaw) | |
| ang_vel_error = torch.abs( | |
| self.commands[:, 2] - self.base_ang_vel[:, 2]) | |
| ang_vel_error_exp = torch.exp(-ang_vel_error * 10) | |
| linear_error = 0.2 * (lin_vel_error + ang_vel_error) | |
| return (lin_vel_error_exp + ang_vel_error_exp) / 2. - linear_error | |
| def _reward_tracking_lin_vel(self): | |
| """ | |
| Tracks linear velocity commands along the xy axes. | |
| Calculates a reward based on how closely the robot's linear velocity matches the commanded values. | |
| """ | |
| lin_vel_diff = self.commands[:, :2] - self.base_lin_vel[:, :2] | |
| lin_vel_error = torch.sum(torch.square( | |
| lin_vel_diff), dim=1) | |
| return torch.exp(-lin_vel_error * self.cfg.rewards.tracking_sigma), torch.mean(torch.abs(lin_vel_diff), dim=1) | |
| def _reward_tracking_ang_vel(self): | |
| """ | |
| Tracks angular velocity commands for yaw rotation. | |
| Computes a reward based on how closely the robot's angular velocity matches the commanded yaw values. | |
| """ | |
| ang_vel_diff = self.commands[:, 2] - self.base_ang_vel[:, 2] | |
| ang_vel_error = torch.square( | |
| ang_vel_diff) | |
| return torch.exp(-ang_vel_error * self.cfg.rewards.tracking_sigma), torch.abs(ang_vel_diff) | |
| def _reward_feet_clearance(self): | |
| """ | |
| Calculates reward based on the clearance of the swing leg from the ground during movement. | |
| Encourages appropriate lift of the feet during the swing phase of the gait. | |
| """ | |
| # Compute feet contact mask | |
| contact = self.contact_forces[:, self.feet_indices, 2] > 5. | |
| # Get the z-position of the feet and compute the change in z-position | |
| feet_z = self.rigid_state[:, self.feet_indices, 2] - 0.05 | |
| delta_z = feet_z - self.last_feet_z | |
| self.feet_height += delta_z | |
| self.last_feet_z = feet_z | |
| # Compute swing mask | |
| swing_mask = 1 - self._get_gait_phase() | |
| # feet height should be closed to target feet height at the peak | |
| rew_pos = torch.abs(self.feet_height - self.cfg.rewards.target_feet_height) < 0.01 | |
| rew_pos = torch.sum(rew_pos * swing_mask, dim=1) | |
| self.feet_height *= ~contact | |
| return rew_pos | |
| def _reward_low_speed(self): | |
| """ | |
| Rewards or penalizes the robot based on its speed relative to the commanded speed. | |
| This function checks if the robot is moving too slow, too fast, or at the desired speed, | |
| and if the movement direction matches the command. | |
| """ | |
| # Calculate the absolute value of speed and command for comparison | |
| absolute_speed = torch.abs(self.base_lin_vel[:, 0]) | |
| absolute_command = torch.abs(self.commands[:, 0]) | |
| # Define speed criteria for desired range | |
| speed_too_low = absolute_speed < 0.5 * absolute_command | |
| speed_too_high = absolute_speed > 1.2 * absolute_command | |
| speed_desired = ~(speed_too_low | speed_too_high) | |
| # Check if the speed and command directions are mismatched | |
| sign_mismatch = torch.sign( | |
| self.base_lin_vel[:, 0]) != torch.sign(self.commands[:, 0]) | |
| # Initialize reward tensor | |
| reward = torch.zeros_like(self.base_lin_vel[:, 0]) | |
| # Assign rewards based on conditions | |
| # Speed too low | |
| reward[speed_too_low] = -1.0 | |
| # Speed too high | |
| reward[speed_too_high] = 0. | |
| # Speed within desired range | |
| reward[speed_desired] = 1.2 | |
| # Sign mismatch has the highest priority | |
| reward[sign_mismatch] = -2.0 | |
| return reward * (self.commands[:, 0].abs() > 0.1) | |
| def _reward_torques(self): | |
| """ | |
| Penalizes the use of high torques in the robot's joints. Encourages efficient movement by minimizing | |
| the necessary force exerted by the motors. | |
| """ | |
| return torch.sum(torch.square(self.torques), dim=1) | |
| def _reward_dof_vel(self): | |
| """ | |
| Penalizes high velocities at the degrees of freedom (DOF) of the robot. This encourages smoother and | |
| more controlled movements. | |
| """ | |
| return torch.sum(torch.square(self.dof_vel), dim=1) | |
| def _reward_dof_acc(self): | |
| """ | |
| Penalizes high accelerations at the robot's degrees of freedom (DOF). This is important for ensuring | |
| smooth and stable motion, reducing wear on the robot's mechanical parts. | |
| """ | |
| return torch.sum(torch.square((self.last_dof_vel - self.dof_vel) / self.dt), dim=1) | |
| def _reward_collision(self): | |
| """ | |
| Penalizes collisions of the robot with the environment, specifically focusing on selected body parts. | |
| This encourages the robot to avoid undesired contact with objects or surfaces. | |
| """ | |
| return torch.sum(1.*(torch.norm(self.contact_forces[:, self.penalised_contact_indices, :], dim=-1) > 0.1), dim=1) | |
| def _reward_action_smoothness(self): | |
| """ | |
| Encourages smoothness in the robot's actions by penalizing large differences between consecutive actions. | |
| This is important for achieving fluid motion and reducing mechanical stress. | |
| """ | |
| term_1 = torch.sum(torch.square( | |
| self.last_actions - self.actions), dim=1) | |
| term_2 = torch.sum(torch.square( | |
| self.actions + self.last_last_actions - 2 * self.last_actions), dim=1) | |
| term_3 = 0.05 * torch.sum(torch.abs(self.actions), dim=1) | |
| return term_1 + term_2 + term_3 | |
| # ==== From HumanPlus ==== # | |
| ## _reward_lin_vel_z, _reward_ang_vel_xy, _reward_action_rate, _reward_termination, _reward_dof_pos_limits, _reward_dof_vel_limits, _reward_torque_limits, **_reward_stumble**, **_reward_stand_still**, _reward_target_jt (joint_pos) | |
| def _reward_lin_vel_z(self): | |
| # Penalize z axis base linear velocity | |
| return torch.square(self.base_lin_vel[:, 2]) | |
| def _reward_ang_vel_xy(self): | |
| # Penalize xy axes base angular velocity | |
| return torch.sum(torch.square(self.base_ang_vel[:, :2]), dim=1) | |
| def _reward_action_rate(self): | |
| # Penalize changes in actions | |
| return torch.sum(torch.square(self.last_actions - self.actions), dim=1) | |
| def _reward_termination(self): | |
| # Terminal reward / penalty | |
| return self.reset_buf * ~self.time_out_buf | |
| def _reward_dof_pos_limits(self): | |
| # Penalize dof positions too close to the limit | |
| out_of_limits = -(self.dof_pos - self.dof_pos_limits[:, 0]).clip(max=0.) # lower limit | |
| out_of_limits += (self.dof_pos - self.dof_pos_limits[:, 1]).clip(min=0.) | |
| return torch.sum(out_of_limits, dim=1) | |
| def _reward_dof_vel_limits(self): | |
| # Penalize dof velocities too close to the limit | |
| # clip to max error = 1 rad/s per joint to avoid huge penalties | |
| return torch.sum((torch.abs(self.dof_vel) - self.dof_vel_limits*self.cfg.rewards.soft_dof_vel_limit).clip(min=0., max=1.), dim=1) | |
| def _reward_torque_limits(self): | |
| # penalize torques too close to the limit | |
| return torch.sum((torch.abs(self.torques) - self.torque_limits*self.cfg.rewards.soft_torque_limit).clip(min=0.), dim=1) | |
| def _reward_stumble(self): | |
| # Penalize feet hitting vertical surfaces | |
| return torch.any(torch.norm(self.contact_forces[:, self.feet_indices, :2], dim=2) >\ | |
| 5 *torch.abs(self.contact_forces[:, self.feet_indices, 2]), dim=1) | |
| def _reward_stand_still(self): | |
| # Penalize motion at zero commands | |
| return torch.sum(torch.abs(self.dof_pos - self.default_dof_pos), dim=1) * (torch.norm(self.commands[:, :2], dim=1) < 0.1) | |
| def _reward_target_jt(self): | |
| # # Penalize distance to target joint angles | |
| # target_jt_error = torch.mean(torch.abs(self.dof_pos - self.target_jt), dim=1) | |
| # return torch.exp(-4 * target_jt_error), target_jt_error | |
| return 0 |
Xet Storage Details
- Size:
- 49 kB
- Xet hash:
- e7a43e704aec910668a01ed206cc20201da0773ef398a1f62ef9283e30aaa5d8
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.