Robotics
LeRobot
Safetensors
diffusion
U-RIL commited on
Commit
b55f325
·
verified ·
1 Parent(s): 47e3a34

Upload policy weights, train config and readme

Browse files
Files changed (4) hide show
  1. README.md +5 -5
  2. config.json +46 -26
  3. model.safetensors +2 -2
  4. train_config.json +61 -37
README.md CHANGED
@@ -1,21 +1,21 @@
1
  ---
2
- datasets: U-RIL/record-test
3
  library_name: lerobot
4
  license: apache-2.0
5
- model_name: act
6
  pipeline_tag: robotics
7
  tags:
8
- - act
9
  - lerobot
10
  - robotics
11
  ---
12
 
13
- # Model Card for act
14
 
15
  <!-- Provide a quick summary of what the model is/does. -->
16
 
17
 
18
- [Action Chunking with Transformers (ACT)](https://huggingface.co/papers/2304.13705) is an imitation-learning method that predicts short action chunks instead of single steps. It learns from teleoperated data and often achieves high success rates.
19
 
20
 
21
  This policy has been trained and pushed to the Hub using [LeRobot](https://github.com/huggingface/lerobot).
 
1
  ---
2
+ datasets: U-RIL/record-Pastery
3
  library_name: lerobot
4
  license: apache-2.0
5
+ model_name: diffusion
6
  pipeline_tag: robotics
7
  tags:
8
+ - diffusion
9
  - lerobot
10
  - robotics
11
  ---
12
 
13
+ # Model Card for diffusion
14
 
15
  <!-- Provide a quick summary of what the model is/does. -->
16
 
17
 
18
+ [Diffusion Policy](https://huggingface.co/papers/2303.04137) treats visuomotor control as a generative diffusion process, producing smooth, multi-step action trajectories that excel at contact-rich manipulation.
19
 
20
 
21
  This policy has been trained and pushed to the Hub using [LeRobot](https://github.com/huggingface/lerobot).
config.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "type": "act",
3
- "n_obs_steps": 1,
4
  "normalization_mapping": {
5
  "VISUAL": "MEAN_STD",
6
- "STATE": "MEAN_STD",
7
- "ACTION": "MEAN_STD"
8
  },
9
  "input_features": {
10
  "observation.state": {
@@ -13,11 +13,11 @@
13
  6
14
  ]
15
  },
16
- "observation.images.laptop": {
17
  "type": "VISUAL",
18
  "shape": [
19
  3,
20
- 360,
21
  640
22
  ]
23
  }
@@ -37,25 +37,45 @@
37
  "private": null,
38
  "tags": null,
39
  "license": null,
40
- "chunk_size": 100,
41
- "n_action_steps": 100,
 
42
  "vision_backbone": "resnet18",
43
- "pretrained_backbone_weights": "ResNet18_Weights.IMAGENET1K_V1",
44
- "replace_final_stride_with_dilation": false,
45
- "pre_norm": false,
46
- "dim_model": 512,
47
- "n_heads": 8,
48
- "dim_feedforward": 3200,
49
- "feedforward_activation": "relu",
50
- "n_encoder_layers": 4,
51
- "n_decoder_layers": 1,
52
- "use_vae": true,
53
- "latent_dim": 32,
54
- "n_vae_encoder_layers": 4,
55
- "temporal_ensemble_coeff": null,
56
- "dropout": 0.1,
57
- "kl_weight": 10.0,
58
- "optimizer_lr": 1e-05,
59
- "optimizer_weight_decay": 0.0001,
60
- "optimizer_lr_backbone": 1e-05
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  }
 
1
  {
2
+ "type": "diffusion",
3
+ "n_obs_steps": 2,
4
  "normalization_mapping": {
5
  "VISUAL": "MEAN_STD",
6
+ "STATE": "MIN_MAX",
7
+ "ACTION": "MIN_MAX"
8
  },
9
  "input_features": {
10
  "observation.state": {
 
13
  6
14
  ]
15
  },
16
+ "observation.images.front": {
17
  "type": "VISUAL",
18
  "shape": [
19
  3,
20
+ 480,
21
  640
22
  ]
23
  }
 
37
  "private": null,
38
  "tags": null,
39
  "license": null,
40
+ "horizon": 16,
41
+ "n_action_steps": 8,
42
+ "drop_n_last_frames": 7,
43
  "vision_backbone": "resnet18",
44
+ "crop_shape": [
45
+ 84,
46
+ 84
47
+ ],
48
+ "crop_is_random": true,
49
+ "pretrained_backbone_weights": null,
50
+ "use_group_norm": true,
51
+ "spatial_softmax_num_keypoints": 32,
52
+ "use_separate_rgb_encoder_per_camera": false,
53
+ "down_dims": [
54
+ 512,
55
+ 1024,
56
+ 2048
57
+ ],
58
+ "kernel_size": 5,
59
+ "n_groups": 8,
60
+ "diffusion_step_embed_dim": 128,
61
+ "use_film_scale_modulation": true,
62
+ "noise_scheduler_type": "DDPM",
63
+ "num_train_timesteps": 100,
64
+ "beta_schedule": "squaredcos_cap_v2",
65
+ "beta_start": 0.0001,
66
+ "beta_end": 0.02,
67
+ "prediction_type": "epsilon",
68
+ "clip_sample": true,
69
+ "clip_sample_range": 1.0,
70
+ "num_inference_steps": null,
71
+ "do_mask_loss_for_padding": false,
72
+ "optimizer_lr": 0.0001,
73
+ "optimizer_betas": [
74
+ 0.95,
75
+ 0.999
76
+ ],
77
+ "optimizer_eps": 1e-08,
78
+ "optimizer_weight_decay": 1e-06,
79
+ "scheduler_name": "cosine",
80
+ "scheduler_warmup_steps": 500
81
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:880348cf1ba910f0f60f84c489dcf0a135576cb69ce05b3132d54dffe84fda2e
3
- size 206700800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1593d16c4d0b089b31d9c4c9d03d99bb0560893b727631085c348ecf6c64af9
3
+ size 1051837384
train_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "dataset": {
3
- "repo_id": "U-RIL/record-test",
4
  "root": null,
5
  "episodes": null,
6
  "image_transforms": {
@@ -66,12 +66,12 @@
66
  },
67
  "env": null,
68
  "policy": {
69
- "type": "act",
70
- "n_obs_steps": 1,
71
  "normalization_mapping": {
72
  "VISUAL": "MEAN_STD",
73
- "STATE": "MEAN_STD",
74
- "ACTION": "MEAN_STD"
75
  },
76
  "input_features": {
77
  "observation.state": {
@@ -80,11 +80,11 @@
80
  6
81
  ]
82
  },
83
- "observation.images.laptop": {
84
  "type": "VISUAL",
85
  "shape": [
86
  3,
87
- 360,
88
  640
89
  ]
90
  }
@@ -104,52 +104,76 @@
104
  "private": null,
105
  "tags": null,
106
  "license": null,
107
- "chunk_size": 100,
108
- "n_action_steps": 100,
 
109
  "vision_backbone": "resnet18",
110
- "pretrained_backbone_weights": "ResNet18_Weights.IMAGENET1K_V1",
111
- "replace_final_stride_with_dilation": false,
112
- "pre_norm": false,
113
- "dim_model": 512,
114
- "n_heads": 8,
115
- "dim_feedforward": 3200,
116
- "feedforward_activation": "relu",
117
- "n_encoder_layers": 4,
118
- "n_decoder_layers": 1,
119
- "use_vae": true,
120
- "latent_dim": 32,
121
- "n_vae_encoder_layers": 4,
122
- "temporal_ensemble_coeff": null,
123
- "dropout": 0.1,
124
- "kl_weight": 10.0,
125
- "optimizer_lr": 1e-05,
126
- "optimizer_weight_decay": 0.0001,
127
- "optimizer_lr_backbone": 1e-05
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  },
129
- "output_dir": "outputs/train/act_record_test",
130
- "job_name": "act_record_test",
131
  "resume": false,
132
  "seed": 1000,
133
  "num_workers": 4,
134
- "batch_size": 8,
135
- "steps": 10000,
136
  "eval_freq": 2000,
137
  "log_freq": 200,
138
  "save_checkpoint": true,
139
- "save_freq": 20000,
140
  "use_policy_training_preset": true,
141
  "optimizer": {
142
- "type": "adamw",
143
- "lr": 1e-05,
144
- "weight_decay": 0.0001,
145
  "grad_clip_norm": 10.0,
146
  "betas": [
147
- 0.9,
148
  0.999
149
  ],
150
  "eps": 1e-08
151
  },
152
- "scheduler": null,
 
 
 
 
153
  "eval": {
154
  "n_episodes": 50,
155
  "batch_size": 50,
 
1
  {
2
  "dataset": {
3
+ "repo_id": "U-RIL/record-Pastery",
4
  "root": null,
5
  "episodes": null,
6
  "image_transforms": {
 
66
  },
67
  "env": null,
68
  "policy": {
69
+ "type": "diffusion",
70
+ "n_obs_steps": 2,
71
  "normalization_mapping": {
72
  "VISUAL": "MEAN_STD",
73
+ "STATE": "MIN_MAX",
74
+ "ACTION": "MIN_MAX"
75
  },
76
  "input_features": {
77
  "observation.state": {
 
80
  6
81
  ]
82
  },
83
+ "observation.images.front": {
84
  "type": "VISUAL",
85
  "shape": [
86
  3,
87
+ 480,
88
  640
89
  ]
90
  }
 
104
  "private": null,
105
  "tags": null,
106
  "license": null,
107
+ "horizon": 16,
108
+ "n_action_steps": 8,
109
+ "drop_n_last_frames": 7,
110
  "vision_backbone": "resnet18",
111
+ "crop_shape": [
112
+ 84,
113
+ 84
114
+ ],
115
+ "crop_is_random": true,
116
+ "pretrained_backbone_weights": null,
117
+ "use_group_norm": true,
118
+ "spatial_softmax_num_keypoints": 32,
119
+ "use_separate_rgb_encoder_per_camera": false,
120
+ "down_dims": [
121
+ 512,
122
+ 1024,
123
+ 2048
124
+ ],
125
+ "kernel_size": 5,
126
+ "n_groups": 8,
127
+ "diffusion_step_embed_dim": 128,
128
+ "use_film_scale_modulation": true,
129
+ "noise_scheduler_type": "DDPM",
130
+ "num_train_timesteps": 100,
131
+ "beta_schedule": "squaredcos_cap_v2",
132
+ "beta_start": 0.0001,
133
+ "beta_end": 0.02,
134
+ "prediction_type": "epsilon",
135
+ "clip_sample": true,
136
+ "clip_sample_range": 1.0,
137
+ "num_inference_steps": null,
138
+ "do_mask_loss_for_padding": false,
139
+ "optimizer_lr": 0.0001,
140
+ "optimizer_betas": [
141
+ 0.95,
142
+ 0.999
143
+ ],
144
+ "optimizer_eps": 1e-08,
145
+ "optimizer_weight_decay": 1e-06,
146
+ "scheduler_name": "cosine",
147
+ "scheduler_warmup_steps": 500
148
  },
149
+ "output_dir": "outputs/train/diffusion_so100_test",
150
+ "job_name": "diffusion_92_ep",
151
  "resume": false,
152
  "seed": 1000,
153
  "num_workers": 4,
154
+ "batch_size": 16,
155
+ "steps": 22000,
156
  "eval_freq": 2000,
157
  "log_freq": 200,
158
  "save_checkpoint": true,
159
+ "save_freq": 2000,
160
  "use_policy_training_preset": true,
161
  "optimizer": {
162
+ "type": "adam",
163
+ "lr": 0.0001,
164
+ "weight_decay": 1e-06,
165
  "grad_clip_norm": 10.0,
166
  "betas": [
167
+ 0.95,
168
  0.999
169
  ],
170
  "eps": 1e-08
171
  },
172
+ "scheduler": {
173
+ "type": "diffuser",
174
+ "num_warmup_steps": 500,
175
+ "name": "cosine"
176
+ },
177
  "eval": {
178
  "n_episodes": 50,
179
  "batch_size": 50,