FaLLOUTan commited on
Commit
a91c4e8
·
verified ·
1 Parent(s): 7263591

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. config.json +26 -46
  2. model.safetensors +2 -2
  3. train_config.json +34 -58
config.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "type": "diffusion",
3
- "n_obs_steps": 2,
4
  "normalization_mapping": {
5
  "VISUAL": "MEAN_STD",
6
- "STATE": "MIN_MAX",
7
- "ACTION": "MIN_MAX"
8
  },
9
  "input_features": {
10
  "observation.state": {
@@ -13,7 +13,7 @@
13
  6
14
  ]
15
  },
16
- "observation.images.side": {
17
  "type": "VISUAL",
18
  "shape": [
19
  3,
@@ -21,7 +21,7 @@
21
  640
22
  ]
23
  },
24
- "observation.images.wrist": {
25
  "type": "VISUAL",
26
  "shape": [
27
  3,
@@ -48,45 +48,25 @@
48
  },
49
  "device": "cuda",
50
  "use_amp": false,
51
- "horizon": 16,
52
- "n_action_steps": 8,
53
- "drop_n_last_frames": 7,
54
  "vision_backbone": "resnet18",
55
- "crop_shape": [
56
- 84,
57
- 84
58
- ],
59
- "crop_is_random": true,
60
- "pretrained_backbone_weights": null,
61
- "use_group_norm": true,
62
- "spatial_softmax_num_keypoints": 32,
63
- "use_separate_rgb_encoder_per_camera": false,
64
- "down_dims": [
65
- 512,
66
- 1024,
67
- 2048
68
- ],
69
- "kernel_size": 5,
70
- "n_groups": 8,
71
- "diffusion_step_embed_dim": 128,
72
- "use_film_scale_modulation": true,
73
- "noise_scheduler_type": "DDPM",
74
- "num_train_timesteps": 100,
75
- "beta_schedule": "squaredcos_cap_v2",
76
- "beta_start": 0.0001,
77
- "beta_end": 0.02,
78
- "prediction_type": "epsilon",
79
- "clip_sample": true,
80
- "clip_sample_range": 1.0,
81
- "num_inference_steps": null,
82
- "do_mask_loss_for_padding": false,
83
- "optimizer_lr": 0.0001,
84
- "optimizer_betas": [
85
- 0.95,
86
- 0.999
87
- ],
88
- "optimizer_eps": 1e-08,
89
- "optimizer_weight_decay": 1e-06,
90
- "scheduler_name": "cosine",
91
- "scheduler_warmup_steps": 500
92
  }
 
1
  {
2
+ "type": "act",
3
+ "n_obs_steps": 1,
4
  "normalization_mapping": {
5
  "VISUAL": "MEAN_STD",
6
+ "STATE": "MEAN_STD",
7
+ "ACTION": "MEAN_STD"
8
  },
9
  "input_features": {
10
  "observation.state": {
 
13
  6
14
  ]
15
  },
16
+ "observation.images.handeye": {
17
  "type": "VISUAL",
18
  "shape": [
19
  3,
 
21
  640
22
  ]
23
  },
24
+ "observation.images.side": {
25
  "type": "VISUAL",
26
  "shape": [
27
  3,
 
48
  },
49
  "device": "cuda",
50
  "use_amp": false,
51
+ "chunk_size": 100,
52
+ "n_action_steps": 100,
 
53
  "vision_backbone": "resnet18",
54
+ "pretrained_backbone_weights": "ResNet18_Weights.IMAGENET1K_V1",
55
+ "replace_final_stride_with_dilation": false,
56
+ "pre_norm": false,
57
+ "dim_model": 512,
58
+ "n_heads": 8,
59
+ "dim_feedforward": 3200,
60
+ "feedforward_activation": "relu",
61
+ "n_encoder_layers": 4,
62
+ "n_decoder_layers": 1,
63
+ "use_vae": true,
64
+ "latent_dim": 32,
65
+ "n_vae_encoder_layers": 4,
66
+ "temporal_ensemble_coeff": null,
67
+ "dropout": 0.1,
68
+ "kl_weight": 10.0,
69
+ "optimizer_lr": 1e-05,
70
+ "optimizer_weight_decay": 0.0001,
71
+ "optimizer_lr_backbone": 1e-05
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5bf47709f69c24494d92aa825696a1c533cd6f7b907ac4a5c4c3b265a86a710f
3
- size 1081198080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a58b510b038c0f927f75b2c10427ecd2b3cff6a155fcc9e62a991df2564ea37
3
+ size 206701344
train_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "dataset": {
3
- "repo_id": "Rorschach4153/so101_30_1",
4
  "root": null,
5
  "episodes": null,
6
  "image_transforms": {
@@ -66,12 +66,12 @@
66
  },
67
  "env": null,
68
  "policy": {
69
- "type": "diffusion",
70
- "n_obs_steps": 2,
71
  "normalization_mapping": {
72
  "VISUAL": "MEAN_STD",
73
- "STATE": "MIN_MAX",
74
- "ACTION": "MIN_MAX"
75
  },
76
  "input_features": {
77
  "observation.state": {
@@ -80,7 +80,7 @@
80
  6
81
  ]
82
  },
83
- "observation.images.side": {
84
  "type": "VISUAL",
85
  "shape": [
86
  3,
@@ -88,7 +88,7 @@
88
  640
89
  ]
90
  },
91
- "observation.images.wrist": {
92
  "type": "VISUAL",
93
  "shape": [
94
  3,
@@ -115,50 +115,30 @@
115
  },
116
  "device": "cuda",
117
  "use_amp": false,
118
- "horizon": 16,
119
- "n_action_steps": 8,
120
- "drop_n_last_frames": 7,
121
  "vision_backbone": "resnet18",
122
- "crop_shape": [
123
- 84,
124
- 84
125
- ],
126
- "crop_is_random": true,
127
- "pretrained_backbone_weights": null,
128
- "use_group_norm": true,
129
- "spatial_softmax_num_keypoints": 32,
130
- "use_separate_rgb_encoder_per_camera": false,
131
- "down_dims": [
132
- 512,
133
- 1024,
134
- 2048
135
- ],
136
- "kernel_size": 5,
137
- "n_groups": 8,
138
- "diffusion_step_embed_dim": 128,
139
- "use_film_scale_modulation": true,
140
- "noise_scheduler_type": "DDPM",
141
- "num_train_timesteps": 100,
142
- "beta_schedule": "squaredcos_cap_v2",
143
- "beta_start": 0.0001,
144
- "beta_end": 0.02,
145
- "prediction_type": "epsilon",
146
- "clip_sample": true,
147
- "clip_sample_range": 1.0,
148
- "num_inference_steps": null,
149
- "do_mask_loss_for_padding": false,
150
- "optimizer_lr": 0.0001,
151
- "optimizer_betas": [
152
- 0.95,
153
- 0.999
154
- ],
155
- "optimizer_eps": 1e-08,
156
- "optimizer_weight_decay": 1e-06,
157
- "scheduler_name": "cosine",
158
- "scheduler_warmup_steps": 500
159
  },
160
- "output_dir": "outputs/train/act_so101_test_09",
161
- "job_name": "act_so101_test",
162
  "resume": false,
163
  "seed": 1000,
164
  "num_workers": 4,
@@ -170,21 +150,17 @@
170
  "save_freq": 20000,
171
  "use_policy_training_preset": true,
172
  "optimizer": {
173
- "type": "adam",
174
- "lr": 0.0001,
175
- "weight_decay": 1e-06,
176
  "grad_clip_norm": 10.0,
177
  "betas": [
178
- 0.95,
179
  0.999
180
  ],
181
  "eps": 1e-08
182
  },
183
- "scheduler": {
184
- "type": "diffuser",
185
- "num_warmup_steps": 500,
186
- "name": "cosine"
187
- },
188
  "eval": {
189
  "n_episodes": 50,
190
  "batch_size": 50,
 
1
  {
2
  "dataset": {
3
+ "repo_id": "pbvr/so101_test014",
4
  "root": null,
5
  "episodes": null,
6
  "image_transforms": {
 
66
  },
67
  "env": null,
68
  "policy": {
69
+ "type": "act",
70
+ "n_obs_steps": 1,
71
  "normalization_mapping": {
72
  "VISUAL": "MEAN_STD",
73
+ "STATE": "MEAN_STD",
74
+ "ACTION": "MEAN_STD"
75
  },
76
  "input_features": {
77
  "observation.state": {
 
80
  6
81
  ]
82
  },
83
+ "observation.images.handeye": {
84
  "type": "VISUAL",
85
  "shape": [
86
  3,
 
88
  640
89
  ]
90
  },
91
+ "observation.images.side": {
92
  "type": "VISUAL",
93
  "shape": [
94
  3,
 
115
  },
116
  "device": "cuda",
117
  "use_amp": false,
118
+ "chunk_size": 100,
119
+ "n_action_steps": 100,
 
120
  "vision_backbone": "resnet18",
121
+ "pretrained_backbone_weights": "ResNet18_Weights.IMAGENET1K_V1",
122
+ "replace_final_stride_with_dilation": false,
123
+ "pre_norm": false,
124
+ "dim_model": 512,
125
+ "n_heads": 8,
126
+ "dim_feedforward": 3200,
127
+ "feedforward_activation": "relu",
128
+ "n_encoder_layers": 4,
129
+ "n_decoder_layers": 1,
130
+ "use_vae": true,
131
+ "latent_dim": 32,
132
+ "n_vae_encoder_layers": 4,
133
+ "temporal_ensemble_coeff": null,
134
+ "dropout": 0.1,
135
+ "kl_weight": 10.0,
136
+ "optimizer_lr": 1e-05,
137
+ "optimizer_weight_decay": 0.0001,
138
+ "optimizer_lr_backbone": 1e-05
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  },
140
+ "output_dir": "outputs/train/act_so101_test_50",
141
+ "job_name": "act_so101_test_01",
142
  "resume": false,
143
  "seed": 1000,
144
  "num_workers": 4,
 
150
  "save_freq": 20000,
151
  "use_policy_training_preset": true,
152
  "optimizer": {
153
+ "type": "adamw",
154
+ "lr": 1e-05,
155
+ "weight_decay": 0.0001,
156
  "grad_clip_norm": 10.0,
157
  "betas": [
158
+ 0.9,
159
  0.999
160
  ],
161
  "eps": 1e-08
162
  },
163
+ "scheduler": null,
 
 
 
 
164
  "eval": {
165
  "n_episodes": 50,
166
  "batch_size": 50,