Spaces:
Runtime error
Runtime error
| # Copyright 2017 The dm_control Authors. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| # ============================================================================ | |
| """Stickman Domain.""" | |
| from __future__ import absolute_import | |
| from __future__ import division | |
| from __future__ import print_function | |
| import collections | |
| import os | |
| import numpy as np | |
| import types | |
| from dm_control import mujoco | |
| from dm_control.rl import control | |
| from dm_control.suite import base | |
| from dm_control.suite import common | |
| from dm_control.suite.utils import randomizers | |
| from dm_control.utils import containers | |
| from dm_control.utils import rewards | |
| from dm_control.utils import io as resources | |
| from dm_control import suite | |
| class StickmanYogaPoses: | |
| lie_back = [ -1.2 , 0. , -1.57, 0, 0. , 0.0, 0, -0., 0.0] | |
| lie_front = [-1.2, -0, 1.57, 0, 0, 0, 0, 0., 0.] | |
| legs_up = [ -1.24 , 0. , -1.57, 1.57, 0. , 0.0, 1.57, -0., 0.0] | |
| kneel = [ -0.5 , 0. , 0, 0, -1.57, -0.8, 1.57, -1.57, 0.0] | |
| side_angle = [ -0.3 , 0. , 0.9, 0, 0, -0.7, 1.87, -1.07, 0.0] | |
| stand_up = [-0.15, 0., 0.34, 0.74, -1.34, -0., 1.1, -0.66, -0.1] | |
| lean_back = [-0.27, 0., -0.45, 0.22, -1.5, 0.86, 0.6, -0.8, -0.4] | |
| boat = [ -1.04 , 0. , -0.8, 1.6, 0. , 0.0, 1.6, -0., 0.0] | |
| bridge = [-1.1, 0., -2.2, -0.3, -1.5, 0., -0.3, -0.8, -0.4] | |
| head_stand = [-1, 0., -3, 0.6, -1, -0.3, 0.9, -0.5, 0.3] | |
| one_feet = [-0.2, 0., 0, 0.7, -1.34, 0.5, 1.5, -0.6, 0.1] | |
| arabesque = [-0.34, 0., 1.57, 1.57, 0, 0., 0, -0., 0.] | |
| # new | |
| high_kick = [-0.165, 3.3 , 5.55 , 1.35 ,-0, +0.5 , -0.7, 0. , 0.2,] | |
| splits = [-0.7, 0., 0.5, -0.7, -1. , 0, 1.75, 0., -0.45 ] | |
| sit_knees = [-0.6, -0.2, 0.2, 0.95, -2.5, 0 , 0.95, -2.5, 0 ] | |
| _DEFAULT_TIME_LIMIT = 25 | |
| _CONTROL_TIMESTEP = .025 | |
| # Minimal height of torso over foot above which stand reward is 1. | |
| _STAND_HEIGHT = 1.15 | |
| # Horizontal speeds (meters/second) above which move reward is 1. | |
| _WALK_SPEED = 1 | |
| _RUN_SPEED = 8 | |
| # Copied from walker: | |
| _YOGA_HANDS_UP_HEIGHT = 1.75 | |
| _YOGA_STAND_HEIGHT = 1.0 # lower than stan height = 1.2 | |
| _YOGA_LIE_DOWN_HEIGHT = 0.1 | |
| _YOGA_LEGS_UP_HEIGHT = 1.1 | |
| _YOGA_FEET_UP_HEIGHT = 0.5 | |
| _YOGA_FEET_UP_LIE_DOWN_HEIGHT = 0.35 | |
| _YOGA_KNEE_HEIGHT = 0.25 | |
| _YOGA_KNEESTAND_HEIGHT = 0.75 | |
| _YOGA_SITTING_HEIGHT = 0.55 | |
| _YOGA_SITTING_LEGS_HEIGHT = 0.15 | |
| # speed from: https://github.com/rll-research/url_benchmark/blob/710c3eb/custom_dmc_tasks/py | |
| _SPIN_SPEED = 5.0 | |
| # | |
| _PUNCH_SPEED = 5.0 | |
| _PUNCH_DIST = 0.29 | |
| SUITE = containers.TaggedTasks() | |
| def make(task, | |
| task_kwargs=None, | |
| environment_kwargs=None, | |
| visualize_reward=False): | |
| task_kwargs = task_kwargs or {} | |
| if environment_kwargs is not None: | |
| task_kwargs = task_kwargs.copy() | |
| task_kwargs['environment_kwargs'] = environment_kwargs | |
| env = SUITE[task](**task_kwargs) | |
| env.task.visualize_reward = visualize_reward | |
| return env | |
| def get_model_and_assets(): | |
| """Returns a tuple containing the model XML string and a dict of assets.""" | |
| root_dir = os.path.dirname(os.path.dirname(__file__)) | |
| xml = resources.GetResource(os.path.join(root_dir, 'custom_dmc_tasks', 'stickman.xml')) | |
| return xml, common.ASSETS | |
| def hands_up(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): | |
| """Returns the hands_up task.""" | |
| physics = Physics.from_xml_string(*get_model_and_assets()) | |
| task = Stickman(goal='hands_up', random=random) | |
| environment_kwargs = environment_kwargs or {} | |
| return control.Environment( | |
| physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, | |
| **environment_kwargs) | |
| def boxing(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): | |
| """Returns the boxing task.""" | |
| physics = Physics.from_xml_string(*get_model_and_assets()) | |
| task = Stickman(goal='boxing', random=random) | |
| environment_kwargs = environment_kwargs or {} | |
| return control.Environment( | |
| physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, | |
| **environment_kwargs) | |
| def arabesque(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): | |
| """Returns the Arabesque task.""" | |
| physics = Physics.from_xml_string(*get_model_and_assets()) | |
| task = Stickman(goal='arabesque', random=random) | |
| environment_kwargs = environment_kwargs or {} | |
| return control.Environment( | |
| physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, | |
| **environment_kwargs) | |
| def lying_down(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): | |
| """Returns the Lie Down task.""" | |
| physics = Physics.from_xml_string(*get_model_and_assets()) | |
| task = Stickman(goal='lying_down', random=random) | |
| environment_kwargs = environment_kwargs or {} | |
| return control.Environment( | |
| physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, | |
| **environment_kwargs) | |
| def legs_up(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): | |
| """Returns the Legs Up task.""" | |
| physics = Physics.from_xml_string(*get_model_and_assets()) | |
| task = Stickman(goal='legs_up', random=random) | |
| environment_kwargs = environment_kwargs or {} | |
| return control.Environment( | |
| physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, | |
| **environment_kwargs) | |
| def high_kick(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): | |
| """Returns the High Kick task.""" | |
| physics = Physics.from_xml_string(*get_model_and_assets()) | |
| task = Stickman(goal='high_kick', random=random) | |
| environment_kwargs = environment_kwargs or {} | |
| return control.Environment( | |
| physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, | |
| **environment_kwargs) | |
| def one_foot(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): | |
| """Returns the High Kick task.""" | |
| physics = Physics.from_xml_string(*get_model_and_assets()) | |
| task = Stickman(goal='one_foot', random=random) | |
| environment_kwargs = environment_kwargs or {} | |
| return control.Environment( | |
| physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, | |
| **environment_kwargs) | |
| def lunge_pose(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): | |
| """Returns the High Kick task.""" | |
| physics = Physics.from_xml_string(*get_model_and_assets()) | |
| task = Stickman(goal='lunge_pose', random=random) | |
| environment_kwargs = environment_kwargs or {} | |
| return control.Environment( | |
| physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, | |
| **environment_kwargs) | |
| def sit_knees(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): | |
| """Returns the High Kick task.""" | |
| physics = Physics.from_xml_string(*get_model_and_assets()) | |
| task = Stickman(goal='sit_knees', random=random) | |
| environment_kwargs = environment_kwargs or {} | |
| return control.Environment( | |
| physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, | |
| **environment_kwargs) | |
| def headstand(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): | |
| """Returns the Headstand task.""" | |
| physics = Physics.from_xml_string(*get_model_and_assets()) | |
| task = Stickman(goal='flip', move_speed=0, random=random) | |
| environment_kwargs = environment_kwargs or {} | |
| return control.Environment( | |
| physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, | |
| **environment_kwargs) | |
| def urlb_flip(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): | |
| """Returns the Flip task.""" | |
| physics = Physics.from_xml_string(*get_model_and_assets()) | |
| task = Stickman(goal='urlb_flip', move_speed=_SPIN_SPEED, random=random) | |
| environment_kwargs = environment_kwargs or {} | |
| return control.Environment( | |
| physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, | |
| **environment_kwargs) | |
| def flipping(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): | |
| """Returns the Flipping task.""" | |
| physics = Physics.from_xml_string(*get_model_and_assets()) | |
| task = Stickman(goal='flipping', move_speed=2 * _RUN_SPEED, random=random) | |
| environment_kwargs = environment_kwargs or {} | |
| return control.Environment( | |
| physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, | |
| **environment_kwargs) | |
| def flip(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): | |
| """Returns the Flip task.""" | |
| physics = Physics.from_xml_string(*get_model_and_assets()) | |
| task = Stickman(goal='flip', move_speed=2 * _RUN_SPEED, random=random) | |
| environment_kwargs = environment_kwargs or {} | |
| return control.Environment( | |
| physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, | |
| **environment_kwargs) | |
| def backflip(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): | |
| """Returns the Backflip task.""" | |
| physics = Physics.from_xml_string(*get_model_and_assets()) | |
| task = Stickman(goal='flip', move_speed=-2 * _RUN_SPEED, random=random) | |
| environment_kwargs = environment_kwargs or {} | |
| return control.Environment( | |
| physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, | |
| **environment_kwargs) | |
| def stand(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): | |
| """Returns the Stand task.""" | |
| physics = Physics.from_xml_string(*get_model_and_assets()) | |
| task = Stickman(move_speed=0, goal='stand', random=random) | |
| environment_kwargs = environment_kwargs or {} | |
| return control.Environment( | |
| physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, | |
| **environment_kwargs) | |
| def walk(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): | |
| """Returns the Walk task.""" | |
| physics = Physics.from_xml_string(*get_model_and_assets()) | |
| task = Stickman(move_speed=_WALK_SPEED, goal='walk', random=random) | |
| environment_kwargs = environment_kwargs or {} | |
| return control.Environment( | |
| physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, | |
| **environment_kwargs) | |
| def run(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): | |
| """Returns the Run task.""" | |
| physics = Physics.from_xml_string(*get_model_and_assets()) | |
| task = Stickman(move_speed=_RUN_SPEED, goal='run', random=random) | |
| environment_kwargs = environment_kwargs or {} | |
| return control.Environment( | |
| physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, | |
| **environment_kwargs) | |
| class Physics(mujoco.Physics): | |
| """Physics simulation with additional features for the stickman domain.""" | |
| def torso_upright(self): | |
| """Returns projection from z-axes of torso to the z-axes of world.""" | |
| return self.named.data.xmat['torso', 'zz'] | |
| def torso_height(self): | |
| """Returns the height of the torso.""" | |
| return self.named.data.xpos['torso', 'z'] | |
| def horizontal_velocity(self): | |
| """Returns the horizontal velocity of the center-of-mass.""" | |
| return self.named.data.sensordata['torso_subtreelinvel'][0] | |
| def orientations(self): | |
| """Returns planar orientations of all bodies.""" | |
| return self.named.data.xmat[1:, ['xx', 'xz']].ravel() | |
| def angmomentum(self): | |
| """Returns the angular momentum of torso of the stickman about Y axis.""" | |
| return self.named.data.subtree_angmom['torso'][1] | |
| class Stickman(base.Task): | |
| """A planar stickman task.""" | |
| def __init__(self, move_speed=0., goal='walk', forward=True, random=None): | |
| """Initializes an instance of `Stickman`. | |
| Args: | |
| move_speed: A float. If this value is zero, reward is given simply for | |
| standing up. Otherwise this specifies a target horizontal velocity for | |
| the walking task. | |
| random: Optional, either a `numpy.random.RandomState` instance, an | |
| integer seed for creating a new `RandomState`, or None to select a seed | |
| automatically (default). | |
| """ | |
| self._move_speed = move_speed | |
| self._forward = 1 if forward else -1 | |
| self._goal = goal | |
| super().__init__(random=random) | |
| def _hands_up_reward(self, physics): | |
| standing = self._stand_reward(physics) | |
| left_hand_height = physics.named.data.xpos['left_hand', 'z'] | |
| right_hand_height = physics.named.data.xpos['right_hand', 'z'] | |
| hand_height = (left_hand_height + right_hand_height) / 2 | |
| hands_up = rewards.tolerance(hand_height, | |
| bounds=(_YOGA_HANDS_UP_HEIGHT, float('inf')), | |
| margin=_YOGA_HANDS_UP_HEIGHT/2) | |
| return standing * hands_up | |
| def _boxing_reward(self, physics): | |
| # torso up, but lower than standing | |
| # foot up, higher than torso | |
| # foot down | |
| standing = self._stand_reward(physics) | |
| left_hand_velocity = abs(physics.named.data.subtree_linvel['left_hand'][0]) | |
| right_hand_velocity = abs(physics.named.data.subtree_linvel['right_hand'][0]) | |
| punch_reward = rewards.tolerance( | |
| max(left_hand_velocity, right_hand_velocity), | |
| bounds=(_PUNCH_SPEED, float('inf')), | |
| margin=_PUNCH_SPEED / 2, | |
| value_at_margin=0.5, | |
| sigmoid='linear') | |
| # left_hand_dist = physics.named.data.xpos['left_hand', 'x'] - physics.named.data.xpos['torso', 'x'] | |
| # right_hand_dist = physics.named.data.xpos['right_hand', 'x'] - physics.named.data.xpos['torso', 'x'] | |
| # punch_reward = rewards.tolerance( | |
| # max(left_hand_dist, right_hand_dist), | |
| # bounds=(_PUNCH_DIST, float('inf')), | |
| # margin=_PUNCH_DIST / 2,) | |
| return standing * punch_reward | |
| def _arabesque_reward(self, physics): | |
| # standing horizontal | |
| # one foot up, same height as torso | |
| # one foot down | |
| standing = rewards.tolerance(physics.torso_height(), | |
| bounds=(_YOGA_STAND_HEIGHT, float('inf')), | |
| margin=_YOGA_STAND_HEIGHT/2) | |
| left_foot_height = physics.named.data.xpos['left_foot', 'z'] | |
| right_foot_height = physics.named.data.xpos['right_foot', 'z'] | |
| max_foot = 'right_foot' if right_foot_height > left_foot_height else 'left_foot' | |
| min_foot = 'right_foot' if right_foot_height <= left_foot_height else 'left_foot' | |
| min_foot_height = physics.named.data.xpos[min_foot, 'z'] | |
| max_foot_height = physics.named.data.xpos[max_foot, 'z'] | |
| min_foot_down = rewards.tolerance(min_foot_height, | |
| bounds=(-float('inf'), _YOGA_LIE_DOWN_HEIGHT), | |
| margin=_YOGA_LIE_DOWN_HEIGHT*1.5) | |
| max_foot_up = rewards.tolerance(max_foot_height, | |
| bounds=(_YOGA_STAND_HEIGHT, float('inf')), | |
| margin=_YOGA_STAND_HEIGHT/2) | |
| min_foot_x = physics.named.data.xpos[min_foot, 'x'] | |
| max_foot_x = physics.named.data.xpos[max_foot, 'x'] | |
| correct_foot_pose = 0.1 if max_foot_x > min_foot_x else 1.0 | |
| feet_pose = (min_foot_down + max_foot_up * 2) / 3 | |
| return standing * feet_pose * correct_foot_pose | |
| def _lying_down_reward(self, physics): | |
| # torso down and horizontal | |
| # thigh and feet down | |
| torso_down = rewards.tolerance(physics.torso_height(), | |
| bounds=(-float('inf'), _YOGA_LIE_DOWN_HEIGHT), | |
| margin=_YOGA_LIE_DOWN_HEIGHT*1.5) | |
| horizontal = 1 - abs(physics.torso_upright()) | |
| thigh_height = (physics.named.data.xpos['left_thigh', 'z'] + physics.named.data.xpos['right_thigh', 'z']) / 2 | |
| thigh_down = rewards.tolerance(thigh_height, | |
| bounds=(-float('inf'), _YOGA_LIE_DOWN_HEIGHT), | |
| margin=_YOGA_LIE_DOWN_HEIGHT*1.5) | |
| leg_height = (physics.named.data.xpos['left_leg', 'z'] + physics.named.data.xpos['right_leg', 'z']) / 2 | |
| leg_down = rewards.tolerance(leg_height, | |
| bounds=(-float('inf'), _YOGA_LIE_DOWN_HEIGHT), | |
| margin=_YOGA_LIE_DOWN_HEIGHT*1.5) | |
| feet_height = (physics.named.data.xpos['left_foot', 'z'] + physics.named.data.xpos['right_foot', 'z']) / 2 | |
| feet_down = rewards.tolerance(feet_height, | |
| bounds=(-float('inf'), _YOGA_LIE_DOWN_HEIGHT), | |
| margin=_YOGA_LIE_DOWN_HEIGHT*1.5) | |
| return (3*torso_down + horizontal + thigh_down + feet_down + leg_down) / 7 | |
| def _legs_up_reward(self, physics): | |
| # torso down and horizontal | |
| # legs up with thigh down | |
| torso_down = rewards.tolerance(physics.torso_height(), | |
| bounds=(-float('inf'), _YOGA_LIE_DOWN_HEIGHT), | |
| margin=_YOGA_LIE_DOWN_HEIGHT*1.5) | |
| horizontal = 1 - abs(physics.torso_upright()) | |
| torso_down = (3*torso_down +horizontal) / 4 | |
| feet_height = (physics.named.data.xpos['left_foot', 'z'] + physics.named.data.xpos['right_foot', 'z']) / 2 | |
| feet_up = rewards.tolerance(feet_height, | |
| bounds=(_YOGA_FEET_UP_LIE_DOWN_HEIGHT, float('inf')), | |
| margin=_YOGA_FEET_UP_LIE_DOWN_HEIGHT/2) | |
| return torso_down * feet_up | |
| def _high_kick_reward(self, physics): | |
| # torso up, but lower than standing | |
| # foot up, higher than torso | |
| # foot down | |
| standing = rewards.tolerance(physics.torso_height(), | |
| bounds=(_YOGA_STAND_HEIGHT, float('inf')), | |
| margin=_YOGA_STAND_HEIGHT/2) | |
| left_foot_height = physics.named.data.xpos['left_foot', 'z'] | |
| right_foot_height = physics.named.data.xpos['right_foot', 'z'] | |
| min_foot_height = min(left_foot_height, right_foot_height) | |
| max_foot_height = max(left_foot_height, right_foot_height) | |
| min_foot_down = rewards.tolerance(min_foot_height, | |
| bounds=(-float('inf'), _YOGA_LIE_DOWN_HEIGHT), | |
| margin=_YOGA_LIE_DOWN_HEIGHT*1.5) | |
| max_foot_up = rewards.tolerance(max_foot_height, | |
| bounds=(_STAND_HEIGHT, float('inf')), | |
| margin=_STAND_HEIGHT/2) | |
| feet_pose = (3 * max_foot_up + min_foot_down) / 4 | |
| return standing * feet_pose | |
| def _one_foot_reward(self, physics): | |
| # torso up, standing | |
| # foot up higher than foot down | |
| standing = rewards.tolerance(physics.torso_height(), | |
| bounds=(_YOGA_STAND_HEIGHT, float('inf')), | |
| margin=_YOGA_STAND_HEIGHT/2) | |
| left_foot_height = physics.named.data.xpos['left_foot', 'z'] | |
| right_foot_height = physics.named.data.xpos['right_foot', 'z'] | |
| min_foot_height = min(left_foot_height, right_foot_height) | |
| max_foot_height = max(left_foot_height, right_foot_height) | |
| min_foot_down = rewards.tolerance(min_foot_height, | |
| bounds=(-float('inf'), _YOGA_LIE_DOWN_HEIGHT), | |
| margin=_YOGA_LIE_DOWN_HEIGHT*1.5) | |
| max_foot_up = rewards.tolerance(max_foot_height, | |
| bounds=(_YOGA_FEET_UP_HEIGHT, float('inf')), | |
| margin=_YOGA_FEET_UP_HEIGHT/2) | |
| return standing * max_foot_up * min_foot_down | |
| def _lunge_pose_reward(self, physics): | |
| # torso up, standing, but lower | |
| # leg up higher than leg down | |
| # horiontal thigh and leg | |
| standing = rewards.tolerance(physics.torso_height(), | |
| bounds=(_YOGA_KNEESTAND_HEIGHT, float('inf')), | |
| margin=_YOGA_KNEESTAND_HEIGHT/2) | |
| upright = (1 + physics.torso_upright()) / 2 | |
| torso = (3*standing + upright) / 4 | |
| left_leg_height = physics.named.data.xpos['left_leg', 'z'] | |
| right_leg_height = physics.named.data.xpos['right_leg', 'z'] | |
| min_leg_height = min(left_leg_height, right_leg_height) | |
| max_leg_height = max(left_leg_height, right_leg_height) | |
| min_leg_down = rewards.tolerance(min_leg_height, | |
| bounds=(-float('inf'), _YOGA_LIE_DOWN_HEIGHT), | |
| margin=_YOGA_LIE_DOWN_HEIGHT*1.5) | |
| max_leg_up = rewards.tolerance(max_leg_height, | |
| bounds=(_YOGA_KNEE_HEIGHT, float('inf')), | |
| margin=_YOGA_KNEE_HEIGHT / 2) | |
| max_thigh = 'left_thigh' if max_leg_height == left_leg_height else 'right_thigh' | |
| min_leg = 'left_leg' if min_leg_height == left_leg_height else 'right_leg' | |
| max_thigh_horiz = 1 - abs(physics.named.data.xmat[max_thigh, 'zz']) | |
| min_leg_horiz = 1 - abs(physics.named.data.xmat[min_leg, 'zz']) | |
| legs = (min_leg_down + max_leg_up + max_thigh_horiz + min_leg_horiz) / 4 | |
| return torso * legs | |
| def _sit_knees_reward(self, physics): | |
| # torso up, standing, but lower | |
| # foot up higher than foot down | |
| standing = rewards.tolerance(physics.torso_height(), | |
| bounds=(_YOGA_SITTING_HEIGHT, float('inf')), | |
| margin=_YOGA_SITTING_HEIGHT/2) | |
| upright = (1 + physics.torso_upright()) / 2 | |
| torso_up = (3*standing + upright) / 4 | |
| legs_height = (physics.named.data.xpos['left_leg', 'z'] + physics.named.data.xpos['right_leg', 'z']) / 2 | |
| legs_down = rewards.tolerance(legs_height, | |
| bounds=(-float('inf'), _YOGA_SITTING_LEGS_HEIGHT), | |
| margin=_YOGA_SITTING_LEGS_HEIGHT*1.5) | |
| feet_height = (physics.named.data.xpos['left_foot', 'z'] + physics.named.data.xpos['right_foot', 'z']) / 2 | |
| feet_down = rewards.tolerance(feet_height, | |
| bounds=(-float('inf'), _YOGA_LIE_DOWN_HEIGHT), | |
| margin=_YOGA_LIE_DOWN_HEIGHT*1.5) | |
| l_thigh_foot_distance = max(0.1, abs(physics.named.data.xpos['left_foot', 'x'] - physics.named.data.xpos['left_thigh', 'x'])) - 0.1 | |
| r_thigh_foot_distance = max(0.1, abs(physics.named.data.xpos['right_foot', 'x'] - physics.named.data.xpos['right_thigh', 'x'])) - 0.1 | |
| close = np.exp(-(l_thigh_foot_distance + r_thigh_foot_distance)/2) | |
| legs = (3 * legs_down + feet_down) / 4 | |
| return torso_up * legs * close | |
| def _urlb_flip_reward(self, physics): | |
| standing = rewards.tolerance(physics.torso_height(), | |
| bounds=(_STAND_HEIGHT, float('inf')), | |
| margin=_STAND_HEIGHT / 2) | |
| upright = (1 + physics.torso_upright()) / 2 | |
| stand_reward = (3 * standing + upright) / 4 | |
| move_reward = rewards.tolerance(self._forward * | |
| physics.named.data.subtree_angmom['torso'][1], # physics.angmomentum(), | |
| bounds=(_SPIN_SPEED, float('inf')), | |
| margin=_SPIN_SPEED, | |
| value_at_margin=0, | |
| sigmoid='linear') | |
| return stand_reward * (5 * move_reward + 1) / 6 | |
| def _flip_reward(self, physics): | |
| thigh_height = (physics.named.data.xpos['left_thigh', 'z'] + physics.named.data.xpos['right_thigh', 'z']) / 2 | |
| thigh_up = rewards.tolerance(thigh_height, | |
| bounds=(_YOGA_STAND_HEIGHT, float('inf')), | |
| margin=_YOGA_STAND_HEIGHT/2) | |
| feet_height = (physics.named.data.xpos['left_foot', 'z'] + physics.named.data.xpos['right_foot', 'z']) / 2 | |
| legs_up = rewards.tolerance(feet_height, | |
| bounds=(_YOGA_LEGS_UP_HEIGHT, float('inf')), | |
| margin=_YOGA_LEGS_UP_HEIGHT/2) | |
| upside_down_reward = (3*legs_up + 2*thigh_up) / 5 | |
| if self._move_speed == 0: | |
| return upside_down_reward | |
| move_reward = rewards.tolerance(physics.named.data.subtree_angmom['torso'][1], # physics.angmomentum(), | |
| bounds=(self._move_speed, float('inf')) if self._move_speed > 0 else (-float('inf'), self._move_speed), | |
| margin=abs(self._move_speed)/2, | |
| value_at_margin=0.5, | |
| sigmoid='linear') | |
| return upside_down_reward * (5*move_reward + 1) / 6 | |
| def _stand_reward(self, physics): | |
| standing = rewards.tolerance(physics.torso_height(), | |
| bounds=(_STAND_HEIGHT, float('inf')), | |
| margin=_STAND_HEIGHT / 2) | |
| upright = (1 + physics.torso_upright()) / 2 | |
| return (3 * standing + upright) / 4 | |
| def initialize_episode(self, physics): | |
| """Sets the state of the environment at the start of each episode. | |
| In 'standing' mode, use initial orientation and small velocities. | |
| In 'random' mode, randomize joint angles and let fall to the floor. | |
| Args: | |
| physics: An instance of `Physics`. | |
| """ | |
| randomizers.randomize_limited_and_rotational_joints(physics, self.random) | |
| super().initialize_episode(physics) | |
| def get_observation(self, physics): | |
| """Returns an observation of body orientations, height and velocites.""" | |
| obs = collections.OrderedDict() | |
| obs['orientations'] = physics.orientations() | |
| obs['height'] = physics.torso_height() | |
| obs['velocity'] = physics.velocity() | |
| return obs | |
| def get_reward(self, physics): | |
| """Returns a reward to the agent.""" | |
| if self._goal in ['stand', 'walk', 'run']: | |
| stand_reward = self._stand_reward(physics) | |
| move_reward = rewards.tolerance( | |
| self._forward * physics.horizontal_velocity(), | |
| bounds=(self._move_speed, float('inf')), | |
| margin=self._move_speed / 2, | |
| value_at_margin=0.5, | |
| sigmoid='linear') | |
| return stand_reward * (5 * move_reward + 1) / 6 | |
| if self._goal == 'flipping': | |
| self._move_speed = abs(self._move_speed) | |
| pos_rew = self._flip_reward(physics) | |
| self._move_speed = -abs(self._move_speed) | |
| neg_rew = self._flip_reward(physics) | |
| return max(pos_rew, neg_rew) | |
| try: | |
| reward_fn = getattr(self, f'_{self._goal}_reward') | |
| return reward_fn(physics) | |
| except Exception as e: | |
| print(e) | |
| raise NotImplementedError(f'Goal {self._goal} or function "_{self._goal}_reward" not implemented.') | |
| if __name__ == '__main__': | |
| from dm_control import viewer | |
| import numpy as np | |
| env = boxing() | |
| env.task.visualize_reward = True | |
| action_spec = env.action_spec() | |
| def zero_policy(time_step): | |
| print(time_step.reward) | |
| return np.zeros(action_spec.shape) | |
| ts = env.reset() | |
| while True: | |
| ts = env.step(zero_policy(ts)) | |
| viewer.launch(env, policy=zero_policy) | |
| # obs = env.reset() | |
| # next_obs, reward, done, info = env.step(np.zeros(6)) |