yixuan1999 commited on
Commit
ff7dea0
·
verified ·
1 Parent(s): 77cd849

Upload folder using huggingface_hub

Browse files
bimanual_rope_cam0/.hydra/config.yaml ADDED
@@ -0,0 +1,291 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment:
2
+ debug: ${debug}
3
+ tasks:
4
+ - training
5
+ num_nodes: 1
6
+ num_devices: 1
7
+ training:
8
+ precision: 32-true
9
+ compile: false
10
+ lr: 8.0e-05
11
+ batch_size: 4
12
+ max_epochs: -1
13
+ max_steps: 1000005
14
+ max_time: null
15
+ data:
16
+ num_workers: 4
17
+ shuffle: true
18
+ optim:
19
+ accumulate_grad_batches: 1
20
+ gradient_clip_val: 1.0
21
+ checkpointing:
22
+ every_n_train_steps: 10000
23
+ every_n_epochs: null
24
+ train_time_interval: null
25
+ enable_version_counter: false
26
+ log_every_n_steps: 100
27
+ validation:
28
+ precision: 16-mixed
29
+ compile: false
30
+ batch_size: 2
31
+ val_every_n_step: 30000
32
+ val_every_n_epoch: null
33
+ limit_batch: 1.0
34
+ inference_mode: true
35
+ data:
36
+ num_workers: 4
37
+ shuffle: false
38
+ test:
39
+ precision: 16-mixed
40
+ compile: false
41
+ batch_size: 8
42
+ limit_batch: 1
43
+ data:
44
+ num_workers: 16
45
+ shuffle: false
46
+ logging:
47
+ metrics:
48
+ - fvd
49
+ dataset:
50
+ debug: ${debug}
51
+ dataset_dir: /work/hdd/bcyd/ywang41/diffusion-forcing/data/real_aloha/bimanual_rope_1201
52
+ horizon: 10
53
+ aug_mode: none
54
+ n_frames: ${dataset.horizon}
55
+ observation_shape:
56
+ - 3
57
+ - ${dataset.resolution}
58
+ - ${dataset.resolution}
59
+ context_length: 2
60
+ frame_skip: ${dataset.skip_frame}
61
+ skip_frame: 1
62
+ pad_after: 7
63
+ pad_before: 1
64
+ rotation_rep: rotation_6d
65
+ seed: 42
66
+ val_ratio: 0.1
67
+ skip_idx: 1
68
+ use_cache: true
69
+ resolution: 128
70
+ obs_keys:
71
+ - camera_0_color
72
+ low_dim_keys: []
73
+ delta_action: false
74
+ goal_sample: intermediate
75
+ action_mode: bimanual_rope
76
+ load_mask: false
77
+ shape_meta:
78
+ action:
79
+ shape:
80
+ - 4
81
+ obs:
82
+ camera_0_color:
83
+ shape:
84
+ - 3
85
+ - ${dataset.resolution}
86
+ - ${dataset.resolution}
87
+ type: rgb
88
+ camera_1_color:
89
+ shape:
90
+ - 3
91
+ - ${dataset.resolution}
92
+ - ${dataset.resolution}
93
+ type: rgb
94
+ val_horizon: 200
95
+ algorithm:
96
+ debug: ${debug}
97
+ lr: ${experiment.training.lr}
98
+ weight_decay: 0.0001
99
+ warmup_steps: 10000
100
+ lr_scheduler: linear
101
+ optimizer_beta:
102
+ - 0.9
103
+ - 0.999
104
+ latent_dim: 512
105
+ action_dim: 8
106
+ enc_dim: 64
107
+ num_components: 1
108
+ obs_keys: ${dataset.obs_keys}
109
+ x_shape:
110
+ - ${eval:'3 * len(${dataset.obs_keys})'}
111
+ - ${dataset.resolution}
112
+ - ${dataset.resolution}
113
+ norm_scale: 6.0
114
+ num_latent_downsample: 2
115
+ num_views: ${eval:'len(${dataset.obs_keys})'}
116
+ num_latent_channel: ${eval:'4 * ${algorithm.num_views}'}
117
+ latent_resolution: ${eval:'${dataset.resolution} // int(2 ** ${algorithm.num_latent_downsample})'}
118
+ training_stage: 2
119
+ load_ae: ${eval:'None'}
120
+ dtype: ${torch:float}
121
+ mask_prev_action: false
122
+ device: cuda
123
+ noise_level: log_normal
124
+ val_render: true
125
+ scheduling_matrix: autoregressive
126
+ uncertainty_scale: 1.0
127
+ guidance_scale: 1.0
128
+ n_frames: ${dataset.horizon}
129
+ dyn_infer_steps: 1
130
+ dec_infer_steps: 3
131
+ last_frame_loss_only: false
132
+ prev_frame_noise_scale: 0.1
133
+ robust_latent: false
134
+ delta: ${eval:'0.00054 * ${algorithm.num_latent_channel} * ${algorithm.latent_resolution}
135
+ * ${algorithm.latent_resolution}'}
136
+ sampling_strategy: terminal_only
137
+ sampling_strategy_params: []
138
+ dynamics:
139
+ _target_: interactive_world_sim.algorithms.latent_dynamics.models.cm_latent_dynamics.CMLatentDynamics
140
+ action_dim: ${algorithm.action_dim}
141
+ latent_dim: ${algorithm.num_latent_channel}
142
+ dim: 64
143
+ action_emb_dim: 512
144
+ resnet_block_groups: 8
145
+ dim_mults:
146
+ - 1
147
+ - 2
148
+ attn_dim_head: 128
149
+ attn_heads: 4
150
+ use_linear_attn: true
151
+ use_init_temporal_attn: true
152
+ init_kernel_size: 5
153
+ is_causal: true
154
+ time_emb_type: rotary
155
+ dtype: ${algorithm.dtype}
156
+ noise_scheduler:
157
+ _target_: interactive_world_sim.utils.cm_utils.DDPMScheduler
158
+ x_shape: ${algorithm.x_shape}
159
+ timesteps: ${algorithm.diffusion.timesteps}
160
+ sampling_timesteps: ${algorithm.diffusion.sampling_timesteps}
161
+ beta_schedule: ${algorithm.diffusion.beta_schedule}
162
+ schedule_fn_kwargs: ${algorithm.diffusion.schedule_fn_kwargs}
163
+ objective: ${algorithm.diffusion.objective}
164
+ loss_weighting: uniform
165
+ snr_clip: 5.0
166
+ cum_snr_decay: ${algorithm.diffusion.cum_snr_decay}
167
+ ddim_sampling_eta: ${algorithm.diffusion.ddim_sampling_eta}
168
+ clip_noise: ${algorithm.diffusion.clip_noise}
169
+ stabilization_level: ${algorithm.diffusion.stabilization_level}
170
+ dtype: ${algorithm.dtype}
171
+ diffusion:
172
+ beta_schedule: sigmoid
173
+ objective: pred_v
174
+ use_fused_snr: true
175
+ cum_snr_decay: 0.96
176
+ clip_noise: 6.0
177
+ schedule_fn_kwargs: {}
178
+ timesteps: 1000
179
+ sampling_timesteps: 50
180
+ ddim_sampling_eta: 0.0
181
+ snr_clip: 5.0
182
+ model_channels: ${algorithm.enc_dim}
183
+ num_latent_downsample: ${algorithm.num_latent_downsample}
184
+ num_latent_channel: ${algorithm.num_latent_channel}
185
+ num_res_blocks: 2
186
+ attention_resolutions:
187
+ - 2
188
+ - 4
189
+ - 8
190
+ dropout: 0.1
191
+ channel_mult:
192
+ - 1
193
+ - 2
194
+ - 3
195
+ num_head_channels: 64
196
+ resblock_updown: true
197
+ use_scale_shift_norm: true
198
+ num_components: ${algorithm.num_components}
199
+ image_size: ${dataset.resolution}
200
+ stabilization_level: 15
201
+ metrics:
202
+ - fvd
203
+ cost_fn:
204
+ debug: ${debug}
205
+ lr: ${experiment.training.lr}
206
+ cfg_ae:
207
+ debug: ${debug}
208
+ lr: ${experiment.training.lr}
209
+ weight_decay: 0.0001
210
+ warmup_steps: 10000
211
+ optimizer_beta:
212
+ - 0.9
213
+ - 0.999
214
+ latent_dim: 32
215
+ action_dim: 10
216
+ enc_dim: 64
217
+ num_components: 1
218
+ obs_keys: ${dataset.obs_keys}
219
+ x_shape:
220
+ - ${eval:'3 * len(${dataset.obs_keys})'}
221
+ - ${dataset.resolution}
222
+ - ${dataset.resolution}
223
+ norm_scale: 6.0
224
+ num_latent_downsample: 2
225
+ num_latent_channel: 4
226
+ training_stage: 1
227
+ load_ae: ${eval:'None'}
228
+ device: cuda
229
+ noise_level: random_all
230
+ dynamics:
231
+ _target_: algorithms.latent_dynamics.models.film_conv2d_dyn_v3.FiLMConv2DV3Dyn
232
+ horizon: 9
233
+ action_dim: ${cost_fn.cfg_ae.action_dim}
234
+ cond_dim: ${cost_fn.cfg_ae.num_latent_channel}
235
+ diffusion:
236
+ beta_schedule: sigmoid
237
+ objective: pred_v
238
+ use_fused_snr: true
239
+ cum_snr_decay: 0.96
240
+ clip_noise: 6.0
241
+ schedule_fn_kwargs: {}
242
+ timesteps: 1000
243
+ sampling_timesteps: 50
244
+ ddim_sampling_eta: 0.0
245
+ snr_clip: 5.0
246
+ model_channels: ${cost_fn.cfg_ae.enc_dim}
247
+ num_latent_downsample: ${cost_fn.cfg_ae.num_latent_downsample}
248
+ num_latent_channel: ${cost_fn.cfg_ae.num_latent_channel}
249
+ num_res_blocks: 2
250
+ attention_resolutions:
251
+ - 2
252
+ - 4
253
+ - 8
254
+ dropout: 0.1
255
+ channel_mult:
256
+ - 1
257
+ - 2
258
+ - 3
259
+ num_head_channels: 64
260
+ resblock_updown: true
261
+ use_scale_shift_norm: true
262
+ num_components: ${cost_fn.cfg_ae.num_components}
263
+ image_size: ${dataset.resolution}
264
+ stabilization_level: 15
265
+ metrics:
266
+ - fvd
267
+ weight_decay: 0.0001
268
+ warmup_steps: 10000
269
+ optimizer_beta:
270
+ - 0.9
271
+ - 0.999
272
+ obs_keys: ${dataset.obs_keys}
273
+ x_shape:
274
+ - ${eval:'${cost_fn.cfg_ae.num_latent_channel} * len(${dataset.obs_keys})'}
275
+ - ${eval:'${dataset.resolution} // int(2 ** ${cost_fn.cfg_ae.num_latent_downsample})'}
276
+ - ${eval:'${dataset.resolution} // int(2 ** ${cost_fn.cfg_ae.num_latent_downsample})'}
277
+ load_ae: ${eval:'None'}
278
+ latent_dim: 512
279
+ enc_dim: 16
280
+ mlp_dims:
281
+ - 1024
282
+ - 512
283
+ device: cuda
284
+ debug: false
285
+ wandb:
286
+ entity: yixuan1999
287
+ project: diffusion-forcing
288
+ mode: online
289
+ resume: null
290
+ load: null
291
+ name: train_dyn_latent_student_v1_4_bimanual_rope_1201_cam_0_stage_2
bimanual_rope_cam0/.hydra/hydra.yaml ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}
4
+ sweep:
5
+ dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
6
+ subdir: ${hydra.job.num}
7
+ launcher:
8
+ _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
9
+ sweeper:
10
+ _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
11
+ max_batch_size: null
12
+ params: null
13
+ help:
14
+ app_name: ${hydra.job.name}
15
+ header: '${hydra.help.app_name} is powered by Hydra.
16
+
17
+ '
18
+ footer: 'Powered by Hydra (https://hydra.cc)
19
+
20
+ Use --hydra-help to view Hydra specific help
21
+
22
+ '
23
+ template: '${hydra.help.header}
24
+
25
+ == Configuration groups ==
26
+
27
+ Compose your configuration from those groups (group=option)
28
+
29
+
30
+ $APP_CONFIG_GROUPS
31
+
32
+
33
+ == Config ==
34
+
35
+ Override anything in the config (foo.bar=value)
36
+
37
+
38
+ $CONFIG
39
+
40
+
41
+ ${hydra.help.footer}
42
+
43
+ '
44
+ hydra_help:
45
+ template: 'Hydra (${hydra.runtime.version})
46
+
47
+ See https://hydra.cc for more info.
48
+
49
+
50
+ == Flags ==
51
+
52
+ $FLAGS_HELP
53
+
54
+
55
+ == Configuration groups ==
56
+
57
+ Compose your configuration from those groups (For example, append hydra/job_logging=disabled
58
+ to command line)
59
+
60
+
61
+ $HYDRA_CONFIG_GROUPS
62
+
63
+
64
+ Use ''--cfg hydra'' to Show the Hydra config.
65
+
66
+ '
67
+ hydra_help: ???
68
+ hydra_logging:
69
+ version: 1
70
+ formatters:
71
+ simple:
72
+ format: '[%(asctime)s][HYDRA] %(message)s'
73
+ handlers:
74
+ console:
75
+ class: logging.StreamHandler
76
+ formatter: simple
77
+ stream: ext://sys.stdout
78
+ root:
79
+ level: INFO
80
+ handlers:
81
+ - console
82
+ loggers:
83
+ logging_example:
84
+ level: DEBUG
85
+ disable_existing_loggers: false
86
+ job_logging:
87
+ version: 1
88
+ formatters:
89
+ simple:
90
+ format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
91
+ handlers:
92
+ console:
93
+ class: logging.StreamHandler
94
+ formatter: simple
95
+ stream: ext://sys.stdout
96
+ file:
97
+ class: logging.FileHandler
98
+ formatter: simple
99
+ filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
100
+ root:
101
+ level: INFO
102
+ handlers:
103
+ - console
104
+ - file
105
+ disable_existing_loggers: false
106
+ env: {}
107
+ mode: RUN
108
+ searchpath: []
109
+ callbacks: {}
110
+ output_subdir: .hydra
111
+ overrides:
112
+ hydra:
113
+ - hydra.mode=RUN
114
+ task:
115
+ - +name=train_dyn_latent_student_v1_4_bimanual_rope_1201_cam_0_stage_2
116
+ - algorithm=student_v1_4
117
+ - experiment=exp_latent_dyn
118
+ - dataset=real_aloha_dataset
119
+ - dataset.dataset_dir=/work/hdd/bcyd/ywang41/diffusion-forcing/data/real_aloha/bimanual_rope_1201
120
+ - dataset.horizon=10
121
+ - +dataset.val_horizon=200
122
+ - dataset.skip_frame=1
123
+ - dataset.obs_keys=[camera_0_color]
124
+ - dataset.action_mode=bimanual_rope
125
+ - experiment.training.batch_size=4
126
+ - experiment.training.max_steps=1000005
127
+ - experiment.training.log_every_n_steps=100
128
+ - experiment.validation.limit_batch=1.0
129
+ - experiment.validation.batch_size=2
130
+ - experiment.validation.val_every_n_step=30000
131
+ - experiment.training.checkpointing.every_n_train_steps=10000
132
+ - experiment.training.data.num_workers=4
133
+ - experiment.validation.data.num_workers=4
134
+ - algorithm.latent_dim=512
135
+ - algorithm.action_dim=8
136
+ - algorithm.training_stage=2
137
+ - algorithm.mask_prev_action=False
138
+ - algorithm.lr_scheduler=linear
139
+ - algorithm.noise_scheduler.loss_weighting=uniform
140
+ - algorithm.noise_scheduler.snr_clip=5.0
141
+ - algorithm.sampling_strategy=terminal_only
142
+ - algorithm.sampling_strategy_params=[]
143
+ - algorithm.dyn_infer_steps=1
144
+ - algorithm.dec_infer_steps=3
145
+ - algorithm.prev_frame_noise_scale=0.1
146
+ - algorithm.last_frame_loss_only=False
147
+ - algorithm.load_ae=outputs/2025-12-07/13-24-13/checkpoints/epoch\=29-step\=880000.ckpt
148
+ job:
149
+ name: main
150
+ chdir: null
151
+ override_dirname: +dataset.val_horizon=200,+name=train_dyn_latent_student_v1_4_bimanual_rope_1201_cam_0_stage_2,algorithm.action_dim=8,algorithm.dec_infer_steps=3,algorithm.dyn_infer_steps=1,algorithm.last_frame_loss_only=False,algorithm.latent_dim=512,algorithm.load_ae=outputs/2025-12-07/13-24-13/checkpoints/epoch\=29-step\=880000.ckpt,algorithm.lr_scheduler=linear,algorithm.mask_prev_action=False,algorithm.noise_scheduler.loss_weighting=uniform,algorithm.noise_scheduler.snr_clip=5.0,algorithm.prev_frame_noise_scale=0.1,algorithm.sampling_strategy=terminal_only,algorithm.sampling_strategy_params=[],algorithm.training_stage=2,algorithm=student_v1_4,dataset.action_mode=bimanual_rope,dataset.dataset_dir=/work/hdd/bcyd/ywang41/diffusion-forcing/data/real_aloha/bimanual_rope_1201,dataset.horizon=10,dataset.obs_keys=[camera_0_color],dataset.skip_frame=1,dataset=real_aloha_dataset,experiment.training.batch_size=4,experiment.training.checkpointing.every_n_train_steps=10000,experiment.training.data.num_workers=4,experiment.training.log_every_n_steps=100,experiment.training.max_steps=1000005,experiment.validation.batch_size=2,experiment.validation.data.num_workers=4,experiment.validation.limit_batch=1.0,experiment.validation.val_every_n_step=30000,experiment=exp_latent_dyn
152
+ id: ???
153
+ num: ???
154
+ config_name: config
155
+ env_set: {}
156
+ env_copy: []
157
+ config:
158
+ override_dirname:
159
+ kv_sep: '='
160
+ item_sep: ','
161
+ exclude_keys: []
162
+ runtime:
163
+ version: 1.3.2
164
+ version_base: '1.3'
165
+ cwd: /projects/bcyd/ywang41/diffusion-forcing
166
+ config_sources:
167
+ - path: hydra.conf
168
+ schema: pkg
169
+ provider: hydra
170
+ - path: /projects/bcyd/ywang41/diffusion-forcing/configurations
171
+ schema: file
172
+ provider: main
173
+ - path: ''
174
+ schema: structured
175
+ provider: schema
176
+ output_dir: /projects/bcyd/ywang41/diffusion-forcing/outputs/2026-01-07/23-27-27
177
+ choices:
178
+ cost_fn: cost_fn_v0_2
179
+ cost_fn/cfg_ae: latent_dyn_v3_2_film
180
+ cluster: null
181
+ algorithm: student_v1_4
182
+ dataset: real_aloha_dataset
183
+ experiment: exp_latent_dyn
184
+ hydra/env: default
185
+ hydra/callbacks: null
186
+ hydra/job_logging: default
187
+ hydra/hydra_logging: default
188
+ hydra/hydra_help: default
189
+ hydra/help: default
190
+ hydra/sweeper: basic
191
+ hydra/launcher: basic
192
+ hydra/output: default
193
+ verbose: false
bimanual_rope_cam0/.hydra/overrides.yaml ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ - +name=train_dyn_latent_student_v1_4_bimanual_rope_1201_cam_0_stage_2
2
+ - algorithm=student_v1_4
3
+ - experiment=exp_latent_dyn
4
+ - dataset=real_aloha_dataset
5
+ - dataset.dataset_dir=/work/hdd/bcyd/ywang41/diffusion-forcing/data/real_aloha/bimanual_rope_1201
6
+ - dataset.horizon=10
7
+ - +dataset.val_horizon=200
8
+ - dataset.skip_frame=1
9
+ - dataset.obs_keys=[camera_0_color]
10
+ - dataset.action_mode=bimanual_rope
11
+ - experiment.training.batch_size=4
12
+ - experiment.training.max_steps=1000005
13
+ - experiment.training.log_every_n_steps=100
14
+ - experiment.validation.limit_batch=1.0
15
+ - experiment.validation.batch_size=2
16
+ - experiment.validation.val_every_n_step=30000
17
+ - experiment.training.checkpointing.every_n_train_steps=10000
18
+ - experiment.training.data.num_workers=4
19
+ - experiment.validation.data.num_workers=4
20
+ - algorithm.latent_dim=512
21
+ - algorithm.action_dim=8
22
+ - algorithm.training_stage=2
23
+ - algorithm.mask_prev_action=False
24
+ - algorithm.lr_scheduler=linear
25
+ - algorithm.noise_scheduler.loss_weighting=uniform
26
+ - algorithm.noise_scheduler.snr_clip=5.0
27
+ - algorithm.sampling_strategy=terminal_only
28
+ - algorithm.sampling_strategy_params=[]
29
+ - algorithm.dyn_infer_steps=1
30
+ - algorithm.dec_infer_steps=3
31
+ - algorithm.prev_frame_noise_scale=0.1
32
+ - algorithm.last_frame_loss_only=False
33
+ - algorithm.load_ae=outputs/2025-12-07/13-24-13/checkpoints/epoch\=29-step\=880000.ckpt
bimanual_rope_cam0/checkpoints/best.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fd50e6b05eed1866ded620fe5177dc00fed0a6d9d8cf969f72b0651382944a1
3
+ size 232147820