File size: 6,977 Bytes
abb3f94 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 |
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
from collections import deque
from typing import Any, NamedTuple
import dm_env
import numpy as np
from dm_control import manipulation, suite
from dm_control.suite.wrappers import action_scale, pixels
from dm_env import StepType, specs
class ExtendedTimeStep(NamedTuple):
step_type: Any
reward: Any
discount: Any
observation: Any
action: Any
def first(self):
return self.step_type == StepType.FIRST
def mid(self):
return self.step_type == StepType.MID
def last(self):
return self.step_type == StepType.LAST
def __getitem__(self, attr):
return getattr(self, attr)
class ActionRepeatWrapper(dm_env.Environment):
def __init__(self, env, num_repeats):
self._env = env
self._num_repeats = num_repeats
def step(self, action):
reward = 0.0
discount = 1.0
for i in range(self._num_repeats):
time_step = self._env.step(action)
reward += (time_step.reward or 0.0) * discount
discount *= time_step.discount
if time_step.last():
break
return time_step._replace(reward=reward, discount=discount)
def observation_spec(self):
return self._env.observation_spec()
def action_spec(self):
return self._env.action_spec()
def reset(self):
return self._env.reset()
def __getattr__(self, name):
return getattr(self._env, name)
class FrameStackWrapper(dm_env.Environment):
def __init__(self, env, num_frames, pixels_key='pixels'):
self._env = env
self._num_frames = num_frames
self._frames = deque([], maxlen=num_frames)
self._pixels_key = pixels_key
wrapped_obs_spec = env.observation_spec()
assert pixels_key in wrapped_obs_spec
pixels_shape = wrapped_obs_spec[pixels_key].shape
# remove batch dim
if len(pixels_shape) == 4:
pixels_shape = pixels_shape[1:]
self._obs_spec = specs.BoundedArray(shape=np.concatenate(
[[pixels_shape[2] * num_frames], pixels_shape[:2]], axis=0),
dtype=np.uint8,
minimum=0,
maximum=255,
name='observation')
def _transform_observation(self, time_step):
assert len(self._frames) == self._num_frames
obs = np.concatenate(list(self._frames), axis=0)
return time_step._replace(observation=obs)
def _extract_pixels(self, time_step):
pixels = time_step.observation[self._pixels_key]
# remove batch dim
if len(pixels.shape) == 4:
pixels = pixels[0]
# transpose: 84 x 84 x 3 -> 3 x 84 x 84
return pixels.transpose(2, 0, 1).copy()
def reset(self):
time_step = self._env.reset()
pixels = self._extract_pixels(time_step)
for _ in range(self._num_frames):
self._frames.append(pixels)
return self._transform_observation(time_step)
def step(self, action):
time_step = self._env.step(action)
pixels = self._extract_pixels(time_step)
self._frames.append(pixels)
return self._transform_observation(time_step)
def observation_spec(self):
return self._obs_spec
def action_spec(self):
return self._env.action_spec()
def __getattr__(self, name):
return getattr(self._env, name)
class ActionDTypeWrapper(dm_env.Environment):
def __init__(self, env, dtype):
self._env = env
wrapped_action_spec = env.action_spec()
self._action_spec = specs.BoundedArray(wrapped_action_spec.shape,
dtype,
wrapped_action_spec.minimum,
wrapped_action_spec.maximum,
'action')
def step(self, action):
action = action.astype(self._env.action_spec().dtype)
return self._env.step(action)
def observation_spec(self):
return self._env.observation_spec()
def action_spec(self):
return self._action_spec
def reset(self):
return self._env.reset()
def __getattr__(self, name):
return getattr(self._env, name)
class ExtendedTimeStepWrapper(dm_env.Environment):
def __init__(self, env):
self._env = env
def reset(self):
time_step = self._env.reset()
return self._augment_time_step(time_step)
def step(self, action):
time_step = self._env.step(action)
return self._augment_time_step(time_step, action)
def _augment_time_step(self, time_step, action=None):
if action is None:
action_spec = self.action_spec()
action = np.zeros(action_spec.shape, dtype=action_spec.dtype)
return ExtendedTimeStep(observation=time_step.observation,
step_type=time_step.step_type,
action=action,
reward=time_step.reward or 0.0,
discount=time_step.discount or 1.0)
def observation_spec(self):
return self._env.observation_spec()
def action_spec(self):
return self._env.action_spec()
def __getattr__(self, name):
return getattr(self._env, name)
def make(name, frame_stack, action_repeat, seed):
domain, task = name.split('_', 1)
# overwrite cup to ball_in_cup
domain = dict(cup='ball_in_cup').get(domain, domain)
# make sure reward is not visualized
if (domain, task) in suite.ALL_TASKS:
env = suite.load(domain,
task,
task_kwargs={'random': seed},
visualize_reward=False)
pixels_key = 'pixels'
else:
name = f'{domain}_{task}_vision'
env = manipulation.load(name, seed=seed)
pixels_key = 'front_close'
# add wrappers
env = ActionDTypeWrapper(env, np.float32)
env = ActionRepeatWrapper(env, action_repeat)
env = action_scale.Wrapper(env, minimum=-1.0, maximum=+1.0)
# add renderings for clasical tasks
if (domain, task) in suite.ALL_TASKS:
# zoom in camera for quadruped
camera_id = dict(quadruped=2).get(domain, 0)
render_kwargs = dict(height=84, width=84, camera_id=camera_id)
env = pixels.Wrapper(env,
pixels_only=True,
render_kwargs=render_kwargs)
# stack several frames
env = FrameStackWrapper(env, frame_stack, pixels_key)
env = ExtendedTimeStepWrapper(env)
return env
|