File size: 17,414 Bytes
96da58e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
"""
The base config class that is used for all algorithm configs in this repository.
Subclasses get registered into a global dictionary, making it easy to instantiate
the correct config class given the algorithm name.
"""

import six # preserve metaclass compatibility between python 2 and 3
from copy import deepcopy

import robomimic
from robomimic.config.config import Config

# global dictionary for remembering name - class mappings
REGISTERED_CONFIGS = {}


def get_all_registered_configs():
    """
    Give access to dictionary of all registered configs for external use.
    """
    return deepcopy(REGISTERED_CONFIGS)


def config_factory(algo_name, dic=None):
    """
    Creates an instance of a config from the algo name. Optionally pass
    a dictionary to instantiate the config from the dictionary.
    """
    if algo_name not in REGISTERED_CONFIGS:
        raise Exception("Config for algo name {} not found. Make sure it is a registered config among: {}".format(
            algo_name, ', '.join(REGISTERED_CONFIGS)))
    return REGISTERED_CONFIGS[algo_name](dict_to_load=dic)


class ConfigMeta(type):
    """
    Define a metaclass for constructing a config class.
    It registers configs into the global registry.
    """
    def __new__(meta, name, bases, class_dict):
        cls = super(ConfigMeta, meta).__new__(meta, name, bases, class_dict)
        if cls.__name__ != "BaseConfig":
            REGISTERED_CONFIGS[cls.ALGO_NAME] = cls
        return cls


@six.add_metaclass(ConfigMeta)
class BaseConfig(Config):
    def __init__(self, dict_to_load=None):
        if dict_to_load is not None:
            super(BaseConfig, self).__init__(dict_to_load)
            return

        super(BaseConfig, self).__init__()

        # store algo name class property in the config (must be implemented by subclasses)
        self.algo_name = type(self).ALGO_NAME

        self.experiment_config()
        self.train_config()
        self.algo_config()
        self.observation_config()
        self.meta_config()

        # After Config init, new keys cannot be added to the config, except under nested
        # attributes that have called @do_not_lock_keys
        self.lock_keys()

    @property
    @classmethod
    def ALGO_NAME(cls):
        # must be specified by subclasses
        raise NotImplementedError

    def experiment_config(self):
        """
        This function populates the `config.experiment` attribute of the config, 
        which has several experiment settings such as the name of the training run, 
        whether to do logging, whether to save models (and how often), whether to render 
        videos, and whether to do rollouts (and how often). This class has a default 
        implementation that usually doesn't need to be overriden.
        """

        self.experiment.name = "test"                               # name of experiment used to make log files
        self.experiment.validate = False                            # whether to do validation or not
        self.experiment.logging.terminal_output_to_txt = True       # whether to log stdout to txt file 
        self.experiment.logging.log_tb = True                       # enable tensorboard logging
        self.experiment.logging.log_wandb = False                   # enable wandb logging
        self.experiment.logging.wandb_proj_name = "debug"           # project name if using wandb


        ## save config - if and when to save model checkpoints ##
        self.experiment.save.enabled = True                         # whether model saving should be enabled or disabled
        self.experiment.save.every_n_seconds = None                 # save model every n seconds (set to None to disable)
        self.experiment.save.every_n_epochs = 50                    # save model every n epochs (set to None to disable)
        self.experiment.save.epochs = []                            # save model on these specific epochs
        self.experiment.save.on_best_validation = False             # save models that achieve best validation score
        self.experiment.save.on_best_rollout_return = False         # save models that achieve best rollout return
        self.experiment.save.on_best_rollout_success_rate = True    # save models that achieve best success rate

        # epoch definitions - if not None, set an epoch to be this many gradient steps, else the full dataset size will be used
        self.experiment.epoch_every_n_steps = 100                   # number of gradient steps in train epoch (None for full dataset pass)
        self.experiment.validation_epoch_every_n_steps = 10         # number of gradient steps in valid epoch (None for full dataset pass)

        # envs to evaluate model on (assuming rollouts are enabled), to override the metadata stored in dataset
        self.experiment.env = None                                  # no need to set this (unless you want to override)
        self.experiment.additional_envs = None                      # additional environments that should get evaluated


        ## rendering config ##
        self.experiment.render = False                              # render on-screen or not
        self.experiment.render_video = True                         # render evaluation rollouts to videos
        self.experiment.keep_all_videos = False                     # save all videos, instead of only saving those for saved model checkpoints
        self.experiment.video_skip = 5                              # render video frame every n environment steps during rollout


        ## evaluation rollout config ##
        self.experiment.rollout.enabled = True                      # enable evaluation rollouts
        self.experiment.rollout.n = 50                              # number of rollouts per evaluation
        self.experiment.rollout.horizon = 400                       # maximum number of env steps per rollout
        self.experiment.rollout.rate = 50                           # do rollouts every @rate epochs
        self.experiment.rollout.warmstart = 0                       # number of epochs to wait before starting rollouts
        self.experiment.rollout.terminate_on_success = True         # end rollout early after task success

        # for updating the evaluation env meta data
        self.experiment.env_meta_update_dict = Config()
        self.experiment.env_meta_update_dict.do_not_lock_keys()

    def train_config(self):
        """
        This function populates the `config.train` attribute of the config, which 
        has several settings related to the training process, such as the dataset 
        to use for training, and how the data loader should load the data. This 
        class has a default implementation that usually doesn't need to be overriden.
        """

        # Path to hdf5 dataset to use for training
        self.train.data = None                                      

        # Write all results to this directory. A new folder with the timestamp will be created
        # in this directory, and it will contain three subfolders - "log", "models", and "videos".
        # The "log" directory will contain tensorboard and stdout txt logs. The "models" directory
        # will contain saved model checkpoints. The "videos" directory contains evaluation rollout
        # videos.
        self.train.output_dir = "../{}_trained_models".format(self.algo_name)


        ## dataset loader config ##

        # num workers for loading data - generally set to 0 for low-dim datasets, and 2 for image datasets
        self.train.num_data_workers = 0  

        # One of ["all", "low_dim", or None]. Set to "all" to cache entire hdf5 in memory - this is 
        # by far the fastest for data loading. Set to "low_dim" to cache all non-image data. Set
        # to None to use no caching - in this case, every batch sample is retrieved via file i/o.
        # You should almost never set this to None, even for large image datasets.
        self.train.hdf5_cache_mode = "all"

        # used for parallel data loading
        self.train.hdf5_use_swmr = True

        # whether to load "next_obs" group from hdf5 - only needed for batch / offline RL algorithms
        self.train.hdf5_load_next_obs = True

        # if true, normalize observations at train and test time, using the global mean and standard deviation
        # of each observation in each dimension, computed across the training set. See SequenceDataset.normalize_obs
        # in utils/dataset.py for more information.
        self.train.hdf5_normalize_obs = False

        # if provided, use the list of demo keys under the hdf5 group "mask/@hdf5_filter_key" for training, instead 
        # of the full dataset. This provides a convenient way to train on only a subset of the trajectories in a dataset.
        self.train.hdf5_filter_key = None

        # if provided, use the list of demo keys under the hdf5 group "mask/@hdf5_validation_filter_key" for validation.
        # Must be provided if @experiment.validate is True.
        self.train.hdf5_validation_filter_key = None

        # length of experience sequence to fetch from the dataset
        # and whether to pad the beginning / end of the sequence at boundaries of trajectory in dataset
        self.train.seq_length = 1
        self.train.pad_seq_length = True
        self.train.frame_stack = 1
        self.train.pad_frame_stack = True

        # keys from hdf5 to load into each batch, besides "obs" and "next_obs". If algorithms
        # require additional keys from each trajectory in the hdf5, they should be specified here.
        self.train.dataset_keys = (
            "actions", 
            "rewards", 
            "dones",
        )

        self.train.action_keys = ["actions"]

        # specifing each action keys to load and their corresponding normalization/conversion requirement
        # e.g. for dataset keys "action/eef_pos" and "action/eef_rot"
        # the desired value of self.train.action_config is: 
        # {
        #   "action/eef_pos": {
        #       "normalization": "min_max",
        #       "rot_conversion: None  
        #   },
        #   "action/eef_rot": {
        #       "normalization": None,
        #       "rot_conversion: "axis_angle_to_6d"
        #   }
        # }
        # self.train.action_config.actions.normalization = None # "min_max"
        # self.train.action_config.actions.rot_conversion = None # "axis_angle_to_6d"
        self.train.action_config = {}
        # self.train.action_config.do_not_lock_keys()

        # one of [None, "last"] - set to "last" to include goal observations in each batch
        self.train.goal_mode = None


        ## learning config ##
        self.train.cuda = True          # use GPU or not
        self.train.batch_size = 100     # batch size
        self.train.num_epochs = 2000    # number of training epochs
        self.train.seed = 1             # seed for training (for reproducibility)

        self.train.data_format = "robomimic" # either "robomimic" or "r2d2"

    def algo_config(self):
        """
        This function populates the `config.algo` attribute of the config, and is given to the 
        `Algo` subclass (see `algo/algo.py`) for each algorithm through the `algo_config` 
        argument to the constructor. Any parameter that an algorithm needs to determine its 
        training and test-time behavior should be populated here. This function should be 
        implemented by every subclass.
        """
        pass

    def observation_config(self):
        """
        This function populates the `config.observation` attribute of the config, and is given 
        to the `Algo` subclass (see `algo/algo.py`) for each algorithm through the `obs_config` 
        argument to the constructor. This portion of the config is used to specify what 
        observation modalities should be used by the networks for training, and how the 
        observation modalities should be encoded by the networks. While this class has a 
        default implementation that usually doesn't need to be overriden, certain algorithm 
        configs may choose to, in order to have seperate configs for different networks 
        in the algorithm. 
        """

        # observation modalities
        self.observation.modalities.obs.low_dim = [             # specify low-dim observations for agent
            "robot0_eef_pos", 
            "robot0_eef_quat", 
            "robot0_gripper_qpos", 
            "object",
        ]
        self.observation.modalities.obs.rgb = []              # specify rgb image observations for agent
        self.observation.modalities.obs.depth = []
        self.observation.modalities.obs.scan = []
        self.observation.modalities.goal.low_dim = []           # specify low-dim goal observations to condition agent on
        self.observation.modalities.goal.rgb = []             # specify rgb image goal observations to condition agent on
        self.observation.modalities.goal.depth = []
        self.observation.modalities.goal.scan = []
        self.observation.modalities.obs.do_not_lock_keys()
        self.observation.modalities.goal.do_not_lock_keys()

        # observation encoder architectures (per obs modality)
        # This applies to all networks that take observation dicts as input

        # =============== Low Dim default encoder (no encoder) ===============
        self.observation.encoder.low_dim.core_class = None
        self.observation.encoder.low_dim.core_kwargs = Config()                 # No kwargs by default
        self.observation.encoder.low_dim.core_kwargs.do_not_lock_keys()

        # Low Dim: Obs Randomizer settings
        self.observation.encoder.low_dim.obs_randomizer_class = None
        self.observation.encoder.low_dim.obs_randomizer_kwargs = Config()       # No kwargs by default
        self.observation.encoder.low_dim.obs_randomizer_kwargs.do_not_lock_keys()

        # =============== RGB default encoder (ResNet backbone + linear layer output) ===============
        self.observation.encoder.rgb.core_class = "VisualCore"                  # Default VisualCore class combines backbone (like ResNet-18) with pooling operation (like spatial softmax)
        self.observation.encoder.rgb.core_kwargs = Config()                     # See models/obs_core.py for important kwargs to set and defaults used
        self.observation.encoder.rgb.core_kwargs.do_not_lock_keys()

        # RGB: Obs Randomizer settings
        self.observation.encoder.rgb.obs_randomizer_class = None                # Can set to 'CropRandomizer' to use crop randomization
        self.observation.encoder.rgb.obs_randomizer_kwargs = Config()           # See models/obs_core.py for important kwargs to set and defaults used
        self.observation.encoder.rgb.obs_randomizer_kwargs.do_not_lock_keys()

        # Allow for other custom modalities to be specified
        self.observation.encoder.do_not_lock_keys()

        # =============== Depth default encoder (same as rgb) ===============
        self.observation.encoder.depth = deepcopy(self.observation.encoder.rgb)

        # =============== Scan default encoder (Conv1d backbone + linear layer output) ===============
        self.observation.encoder.scan = deepcopy(self.observation.encoder.rgb)

        # Scan: Modify the core class + kwargs, otherwise, is same as rgb encoder
        self.observation.encoder.scan.core_class = "ScanCore"                   # Default ScanCore class uses Conv1D to process this modality
        self.observation.encoder.scan.core_kwargs = Config()                    # See models/obs_core.py for important kwargs to set and defaults used
        self.observation.encoder.scan.core_kwargs.do_not_lock_keys()

    def meta_config(self):
        """
        This function populates the `config.meta` attribute of the config. This portion of the config 
        is used to specify job information primarily for hyperparameter sweeps.
        It contains hyperparameter keys and values, which are populated automatically
        by the hyperparameter config generator (see `utils/hyperparam_utils.py`).
        These values are read by the wandb logger (see `utils/log_utils.py`) to set job tags.
        """
        
        self.meta.hp_base_config_file = None            # base config file in hyperparam sweep
        self.meta.hp_keys = []                          # relevant keys (swept) in hyperparam sweep
        self.meta.hp_values = []                        # values corresponding to keys in hyperparam sweep
    
    @property
    def use_goals(self):
        # whether the agent is goal-conditioned
        return len([obs_key for modality in self.observation.modalities.goal.values() for obs_key in modality]) > 0

    @property
    def all_obs_keys(self):
        """
        This grabs the union of observation keys over all modalities (e.g.: low_dim, rgb, depth, etc.) and over all
        modality groups (e.g: obs, goal, subgoal, etc...)

        Returns:
            n-array: all observation keys used for this model
        """
        # pool all modalities
        return sorted(tuple(set([
            obs_key for group in [
                self.observation.modalities.obs.values(),
                self.observation.modalities.goal.values()
            ]
            for modality in group
            for obs_key in modality
         ])))