Spaces:
Running
Running
| import os | |
| import pytest | |
| import numpy as np | |
| from easydict import EasyDict | |
| from ding.utils import set_pkg_seed | |
| from dizoo.mujoco.envs import MujocoEnv | |
| def test_mujoco_env_delay_reward(delay_reward_step): | |
| set_pkg_seed(1234, use_cuda=False) | |
| env = MujocoEnv( | |
| EasyDict( | |
| { | |
| 'env_id': 'Ant-v3', | |
| 'action_clip': False, | |
| 'delay_reward_step': delay_reward_step, | |
| 'save_replay_gif': False, | |
| 'replay_path_gif': None | |
| } | |
| ) | |
| ) | |
| env.seed(1234) | |
| env.reset() | |
| action_dim = env.action_space.shape | |
| for i in range(25): | |
| # Both ``env.random_action()``, and utilizing ``np.random`` as well as action space, | |
| # can generate legal random action. | |
| if i < 10: | |
| action = np.random.random(size=action_dim) | |
| else: | |
| action = env.random_action() | |
| timestep = env.step(action) | |
| print(timestep.reward) | |
| assert timestep.reward.shape == (1, ), timestep.reward.shape | |
| def test_mujoco_env_eval_episode_return(): | |
| set_pkg_seed(1234, use_cuda=False) | |
| env = MujocoEnv( | |
| EasyDict( | |
| { | |
| 'env_id': 'Ant-v3', | |
| 'action_clip': False, | |
| 'delay_reward_step': 4, | |
| 'save_replay_gif': False, | |
| 'replay_path_gif': None | |
| } | |
| ) | |
| ) | |
| env.seed(1234) | |
| env.reset() | |
| action_dim = env.action_space.shape | |
| eval_episode_return = np.array([0.], dtype=np.float32) | |
| while True: | |
| action = np.random.random(size=action_dim) | |
| timestep = env.step(action) | |
| eval_episode_return += timestep.reward | |
| # print("{}(dtype: {})".format(timestep.reward, timestep.reward.dtype)) | |
| if timestep.done: | |
| print( | |
| "{}({}), {}({})".format( | |
| timestep.info['eval_episode_return'], type(timestep.info['eval_episode_return']), | |
| eval_episode_return, type(eval_episode_return) | |
| ) | |
| ) | |
| # timestep.reward and the cumulative reward in wrapper EvalEpisodeReturn are not the same. | |
| assert abs(timestep.info['eval_episode_return'].item() - eval_episode_return.item()) / \ | |
| abs(timestep.info['eval_episode_return'].item()) < 1e-5 | |
| break | |