| callbacks: |
| rollout_lh: |
| tasks: |
| _target_: calvin_env.envs.tasks.Tasks |
| tasks: |
| rotate_red_block_right: |
| - rotate_object |
| - block_red |
| - -60 |
| rotate_red_block_left: |
| - rotate_object |
| - block_red |
| - 60 |
| rotate_blue_block_right: |
| - rotate_object |
| - block_blue |
| - -60 |
| rotate_blue_block_left: |
| - rotate_object |
| - block_blue |
| - 60 |
| rotate_pink_block_right: |
| - rotate_object |
| - block_pink |
| - -60 |
| rotate_pink_block_left: |
| - rotate_object |
| - block_pink |
| - 60 |
| push_red_block_right: |
| - push_object |
| - block_red |
| - 0.1 |
| - 0 |
| push_red_block_left: |
| - push_object |
| - block_red |
| - -0.1 |
| - 0 |
| push_blue_block_right: |
| - push_object |
| - block_blue |
| - 0.1 |
| - 0 |
| push_blue_block_left: |
| - push_object |
| - block_blue |
| - -0.1 |
| - 0 |
| push_pink_block_right: |
| - push_object |
| - block_pink |
| - 0.1 |
| - 0 |
| push_pink_block_left: |
| - push_object |
| - block_pink |
| - -0.1 |
| - 0 |
| move_slider_left: |
| - move_door_rel |
| - base__slide |
| - 0.15 |
| move_slider_right: |
| - move_door_rel |
| - base__slide |
| - -0.15 |
| open_drawer: |
| - move_door_rel |
| - base__drawer |
| - 0.12 |
| close_drawer: |
| - move_door_rel |
| - base__drawer |
| - -0.12 |
| lift_red_block_table: |
| - lift_object |
| - block_red |
| - 0.05 |
| - table |
| - base_link |
| lift_red_block_slider: |
| - lift_object |
| - block_red |
| - 0.03 |
| - table |
| - plank_link |
| lift_red_block_drawer: |
| - lift_object |
| - block_red |
| - 0.05 |
| - table |
| - drawer_link |
| lift_blue_block_table: |
| - lift_object |
| - block_blue |
| - 0.05 |
| - table |
| - base_link |
| lift_blue_block_slider: |
| - lift_object |
| - block_blue |
| - 0.03 |
| - table |
| - plank_link |
| lift_blue_block_drawer: |
| - lift_object |
| - block_blue |
| - 0.05 |
| - table |
| - drawer_link |
| lift_pink_block_table: |
| - lift_object |
| - block_pink |
| - 0.05 |
| - table |
| - base_link |
| lift_pink_block_slider: |
| - lift_object |
| - block_pink |
| - 0.03 |
| - table |
| - plank_link |
| lift_pink_block_drawer: |
| - lift_object |
| - block_pink |
| - 0.05 |
| - table |
| - drawer_link |
| place_in_slider: |
| - place_object |
| - table |
| - plank_link |
| place_in_drawer: |
| - place_object |
| - table |
| - drawer_link |
| stack_block: |
| - stack_objects |
| unstack_block: |
| - unstack_objects |
| turn_on_lightbulb: |
| - toggle_light |
| - lightbulb |
| - 0 |
| - 1 |
| turn_off_lightbulb: |
| - toggle_light |
| - lightbulb |
| - 1 |
| - 0 |
| turn_on_led: |
| - toggle_light |
| - led |
| - 0 |
| - 1 |
| turn_off_led: |
| - toggle_light |
| - led |
| - 1 |
| - 0 |
| push_into_drawer: |
| - push_object_into |
| - - block_red |
| - block_blue |
| - block_pink |
| - table |
| - base_link |
| - table |
| - drawer_link |
| val_annotations: |
| rotate_red_block_right: |
| - take the red block and rotate it to the right |
| rotate_red_block_left: |
| - take the red block and rotate it to the left |
| rotate_blue_block_right: |
| - take the blue block and rotate it to the right |
| rotate_blue_block_left: |
| - take the blue block and rotate it to the left |
| rotate_pink_block_right: |
| - take the pink block and rotate it to the right |
| rotate_pink_block_left: |
| - take the pink block and rotate it to the left |
| push_red_block_right: |
| - go push the red block right |
| push_red_block_left: |
| - go push the red block left |
| push_blue_block_right: |
| - go push the blue block right |
| push_blue_block_left: |
| - go push the blue block left |
| push_pink_block_right: |
| - go push the pink block right |
| push_pink_block_left: |
| - go push the pink block left |
| move_slider_left: |
| - push the sliding door to the left side |
| move_slider_right: |
| - push the sliding door to the right side |
| open_drawer: |
| - pull the handle to open the drawer |
| close_drawer: |
| - push the handle to close the drawer |
| lift_red_block_table: |
| - grasp and lift the red block |
| lift_blue_block_table: |
| - grasp and lift the blue block |
| lift_pink_block_table: |
| - grasp and lift the pink block |
| lift_red_block_slider: |
| - lift the red block from the sliding cabinet |
| lift_blue_block_slider: |
| - lift the blue block from the sliding cabinet |
| lift_pink_block_slider: |
| - lift the pink block from the sliding cabinet |
| lift_red_block_drawer: |
| - Take the red block from the drawer |
| lift_blue_block_drawer: |
| - Take the blue block from the drawer |
| lift_pink_block_drawer: |
| - Take the pink block from the drawer |
| place_in_slider: |
| - store the grasped block in the sliding cabinet |
| place_in_drawer: |
| - store the grasped block in the drawer |
| push_into_drawer: |
| - slide the block that it falls into the drawer |
| stack_block: |
| - stack the grasped block |
| unstack_block: |
| - remove the stacked block |
| turn_on_lightbulb: |
| - use the switch to turn on the light bulb |
| turn_off_lightbulb: |
| - use the switch to turn off the light bulb |
| turn_on_led: |
| - press the button to turn on the led light |
| turn_off_led: |
| - press the button to turn off the led light |
| _target_: mode.rollout.rollout_long_horizon.RolloutLongHorizon |
| _recursive_: false |
| env_cfg: |
| _target_: mode.wrappers.hulc_wrapper.HulcWrapper |
| skip_epochs: ${rollout_lh_skip_epochs} |
| rollout_freq: 5 |
| num_videos: 0 |
| num_sequences: 1000 |
| replan_freq: 30 |
| ep_len: 360 |
| empty_cache: false |
| log_video_to_file: false |
| save_dir: ./videos |
| lang_folder: ${lang_folder} |
| debug: false |
| ema: |
| _target_: mode.callbacks.ema.EMA |
| decay: 0.999 |
| start_step: 0 |
| save_ema_weights_in_callback_state: true |
| evaluate_ema_weights_instead: true |
| power: 0.6666666666666666 |
| inv_gamma: 1.0 |
| min_value: 0.0 |
| max_value: 0.9999 |
| checkpoint: |
| _target_: pytorch_lightning.callbacks.ModelCheckpoint |
| verbose: true |
| dirpath: saved_models |
| filename: '{epoch:02d}' |
| every_n_epochs: ${callbacks.rollout_lh.rollout_freq} |
| datamodule: |
| transforms: |
| train: |
| rgb_static: |
| - _target_: torchvision.transforms.Resize |
| size: 224 |
| antialias: true |
| - _target_: mode.utils.transforms.RandomShiftsAug |
| pad: 10 |
| - _target_: mode.utils.transforms.ScaleImageTensor |
| - _target_: torchvision.transforms.Normalize |
| mean: |
| - 0.48145466 |
| - 0.4578275 |
| - 0.40821073 |
| std: |
| - 0.26862954 |
| - 0.26130258 |
| - 0.27577711 |
| rgb_gripper: |
| - _target_: torchvision.transforms.Resize |
| size: 224 |
| antialias: true |
| - _target_: mode.utils.transforms.RandomShiftsAug |
| pad: 4 |
| - _target_: mode.utils.transforms.ScaleImageTensor |
| - _target_: torchvision.transforms.Normalize |
| mean: |
| - 0.48145466 |
| - 0.4578275 |
| - 0.40821073 |
| std: |
| - 0.26862954 |
| - 0.26130258 |
| - 0.27577711 |
| robot_obs: |
| - _target_: mode.utils.transforms.NormalizeVector |
| scene_obs: |
| - _target_: mode.utils.transforms.NormalizeVector |
| val: |
| rgb_static: |
| - _target_: torchvision.transforms.Resize |
| size: 224 |
| antialias: true |
| - _target_: mode.utils.transforms.ScaleImageTensor |
| - _target_: torchvision.transforms.Normalize |
| mean: |
| - 0.48145466 |
| - 0.4578275 |
| - 0.40821073 |
| std: |
| - 0.26862954 |
| - 0.26130258 |
| - 0.27577711 |
| rgb_gripper: |
| - _target_: torchvision.transforms.Resize |
| size: 224 |
| antialias: true |
| - _target_: mode.utils.transforms.ScaleImageTensor |
| - _target_: torchvision.transforms.Normalize |
| mean: |
| - 0.48145466 |
| - 0.4578275 |
| - 0.40821073 |
| std: |
| - 0.26862954 |
| - 0.26130258 |
| - 0.27577711 |
| robot_obs: |
| - _target_: mode.utils.transforms.NormalizeVector |
| scene_obs: |
| - _target_: mode.utils.transforms.NormalizeVector |
| _target_: mode.datasets.hulc_data_module.HulcDataModule |
| _recursive_: false |
| root_data_dir: ${root_data_dir} |
| action_space: 7 |
| action_max: |
| - 1.0 |
| - 1.0 |
| - 1.0 |
| - 1.0 |
| - 1.0 |
| - 1.0 |
| - 1.0 |
| action_min: |
| - -1.0 |
| - -1.0 |
| - -1.0 |
| - -1.0 |
| - -1.0 |
| - -1.0 |
| - -1 |
| shuffle_val: false |
| observation_space: |
| rgb_obs: |
| - rgb_static |
| - rgb_gripper |
| depth_obs: [] |
| state_obs: |
| - robot_obs |
| actions: |
| - rel_actions |
| language: |
| - language |
| proprioception_dims: |
| n_state_obs: 8 |
| keep_indices: |
| - - 0 |
| - 7 |
| - - 14 |
| - 15 |
| robot_orientation_idx: |
| - 3 |
| - 6 |
| normalize: true |
| normalize_robot_orientation: true |
| datasets: |
| lang_dataset: |
| _target_: mode.datasets.disk_dataset.ExtendedDiskDataset |
| key: lang |
| save_format: npz |
| batch_size: ${batch_size} |
| min_window_size: ${act_seq_len} |
| max_window_size: ${act_seq_len} |
| proprio_state: ${datamodule.proprioception_dims} |
| obs_space: ${datamodule.observation_space} |
| skip_frames: 1 |
| pad: false |
| lang_folder: ${lang_folder} |
| aux_lang_loss_window: 8 |
| num_workers: ${num_workers} |
| action_seq_len: ${act_seq_len} |
| obs_seq_len: ${obs_seq_len} |
| future_range: 1 |
| use_extracted_rel_actions: ${use_extracted_rel_actions} |
| model: |
| _target_: mode.models.mode_agent.MoDEAgent |
| _recursive_: false |
| multistep: ${multistep} |
| use_lr_scheduler: true |
| entropy_gamma: 0.01 |
| router_z_delta: 0.0 |
| use_proprio: false |
| seed: ${seed} |
| sampler_type: ddim |
| num_sampling_steps: 10 |
| sigma_data: 0.5 |
| sigma_min: 0.001 |
| sigma_max: 80 |
| noise_scheduler: exponential |
| sigma_sample_density_type: loglogistic |
| ckpt_path: /home/yiming/pretrained_models/MoDE/MoDE_Pretrained |
| start_from_pretrained: true |
| act_window_size: ${act_seq_len} |
| latent_dim: 1024 |
| obs_enc_dim: ${obs_dim} |
| cond_dim: 512 |
| resnet_type: '50' |
| optimizer: |
| _target_: torch.optim.AdamW |
| transformer_weight_decay: 0.05 |
| obs_encoder_weight_decay: 0.05 |
| learning_rate: 0.0001 |
| betas: |
| - 0.9 |
| - 0.95 |
| lr_scheduler: |
| lr_scheduler: |
| init_lr: 0.0001 |
| init_lr_scale: 0.1 |
| final_lr_scale: 1.0e-06 |
| total_steps: 45000 |
| phase_ratio: (0.02, 0.08, 0.9) |
| lr: 0.0001 |
| model: |
| _target_: mode.models.edm_diffusion.score_wrappers.GCDenoiser |
| _recursive_: false |
| sigma_data: ${model.sigma_data} |
| inner_model: |
| _target_: mode.models.networks.modedit.MoDeDiT |
| action_dim: ${datamodule.action_space} |
| goal_dim: ${model.cond_dim} |
| obs_dim: 2048 |
| goal_conditioned: true |
| causal: true |
| use_custom_attn_mask: false |
| use_proprio: ${model.use_proprio} |
| state_dim: ${proprio_dims} |
| embed_dim: ${model.latent_dim} |
| n_layers: 10 |
| goal_seq_len: 1 |
| obs_seq_len: ${obs_seq_len} |
| action_seq_len: ${act_seq_len} |
| embed_pdrob: 0 |
| goal_drop: 0.1 |
| attn_pdrop: 0.3 |
| mlp_pdrop: 0.1 |
| n_heads: 8 |
| device: ${device} |
| linear_output: true |
| cond_router: true |
| num_experts: 4 |
| top_k: 2 |
| router_normalize: true |
| use_goal_in_routing: false |
| use_argmax: false |
| use_shared_expert: false |
| use_noise_token_as_input: true |
| init_style: olmoe |
| language_goal: |
| _target_: mode.models.networks.clip_lang_encoder.LangClip |
| _recursive_: false |
| model_name: ${clip_lang_model_name} |
| root_data_dir: data/lebai |
| lang_folder: lang_clip_resnet50 |
| vis_clip_model_name: ViT-B/16 |
| clip_lang_model_name: ViT-B/32 |
| log_dir: ./logs |
| slurm: false |
| seed: 421 |
| device: cuda |
| batch_size: 64 |
| devices: 4 |
| act_dim: 7 |
| proprio_dims: 7 |
| obs_dim: 512 |
| goal_dim: 512 |
| obs_seq_len: 1 |
| act_seq_len: 32 |
| multistep: 32 |
| p_last_state: 0 |
| gen_img_res: 112 |
| max_epochs: 100 |
| rollout_lh_skip_epochs: 9 |
| num_workers: 12 |
| benchmark_name: lebai_mode |
| use_extracted_rel_actions: true |
| trainer: |
| devices: ${devices} |
| precision: bf16 |
| max_epochs: ${max_epochs} |
| sync_batchnorm: true |
| accelerator: gpu |
| strategy: ddp |
| logger: |
| _target_: pytorch_lightning.loggers.WandbLogger |
| save_dir: . |
| name: logger |
| group: mode |
| log_model: false |
| project: ${benchmark_name} |
| entity: yimingwu0 |
| id: ??? |
|
|