Robotics
LeRobot
Safetensors
diffusion
Deason11 commited on
Commit
b83f6c0
·
verified ·
1 Parent(s): 8edf3e6

Upload policy weights, train config and readme

Browse files
Files changed (4) hide show
  1. README.md +5 -5
  2. config.json +23 -31
  3. model.safetensors +2 -2
  4. train_config.json +30 -44
README.md CHANGED
@@ -2,20 +2,20 @@
2
  datasets: Deason11/pick_the_blue_cuboid_and_put_it_into_the_box
3
  library_name: lerobot
4
  license: apache-2.0
5
- model_name: pi0
6
  pipeline_tag: robotics
7
  tags:
8
- - pi0
9
- - robotics
10
  - lerobot
 
 
11
  ---
12
 
13
- # Model Card for pi0
14
 
15
  <!-- Provide a quick summary of what the model is/does. -->
16
 
17
 
18
- [Pi0](https://huggingface.co/papers/2410.24164) is a generalist vision-language-action transformer that converts multimodal observations and text instructions into robot actions for zero-shot task transfer.
19
 
20
 
21
  This policy has been trained and pushed to the Hub using [LeRobot](https://github.com/huggingface/lerobot).
 
2
  datasets: Deason11/pick_the_blue_cuboid_and_put_it_into_the_box
3
  library_name: lerobot
4
  license: apache-2.0
5
+ model_name: act
6
  pipeline_tag: robotics
7
  tags:
 
 
8
  - lerobot
9
+ - robotics
10
+ - act
11
  ---
12
 
13
+ # Model Card for act
14
 
15
  <!-- Provide a quick summary of what the model is/does. -->
16
 
17
 
18
+ [Action Chunking with Transformers (ACT)](https://huggingface.co/papers/2304.13705) is an imitation-learning method that predicts short action chunks instead of single steps. It learns from teleoperated data and often achieves high success rates.
19
 
20
 
21
  This policy has been trained and pushed to the Hub using [LeRobot](https://github.com/huggingface/lerobot).
config.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "type": "pi0",
3
  "n_obs_steps": 1,
4
  "normalization_mapping": {
5
- "VISUAL": "IDENTITY",
6
  "STATE": "MEAN_STD",
7
  "ACTION": "MEAN_STD"
8
  },
@@ -45,33 +45,25 @@
45
  "private": null,
46
  "tags": null,
47
  "license": null,
48
- "chunk_size": 50,
49
- "n_action_steps": 50,
50
- "max_state_dim": 32,
51
- "max_action_dim": 32,
52
- "resize_imgs_with_padding": [
53
- 224,
54
- 224
55
- ],
56
- "empty_cameras": 0,
57
- "adapt_to_pi_aloha": false,
58
- "use_delta_joint_actions_aloha": false,
59
- "tokenizer_max_length": 48,
60
- "proj_width": 1024,
61
- "num_steps": 10,
62
- "use_cache": true,
63
- "attention_implementation": "eager",
64
- "freeze_vision_encoder": true,
65
- "train_expert_only": true,
66
- "train_state_proj": true,
67
- "optimizer_lr": 2.5e-05,
68
- "optimizer_betas": [
69
- 0.9,
70
- 0.95
71
- ],
72
- "optimizer_eps": 1e-08,
73
- "optimizer_weight_decay": 1e-10,
74
- "scheduler_warmup_steps": 1000,
75
- "scheduler_decay_steps": 30000,
76
- "scheduler_decay_lr": 2.5e-06
77
  }
 
1
  {
2
+ "type": "act",
3
  "n_obs_steps": 1,
4
  "normalization_mapping": {
5
+ "VISUAL": "MEAN_STD",
6
  "STATE": "MEAN_STD",
7
  "ACTION": "MEAN_STD"
8
  },
 
45
  "private": null,
46
  "tags": null,
47
  "license": null,
48
+ "chunk_size": 100,
49
+ "n_action_steps": 100,
50
+ "vision_backbone": "resnet18",
51
+ "pretrained_backbone_weights": "ResNet18_Weights.IMAGENET1K_V1",
52
+ "replace_final_stride_with_dilation": false,
53
+ "pre_norm": false,
54
+ "dim_model": 512,
55
+ "n_heads": 8,
56
+ "dim_feedforward": 3200,
57
+ "feedforward_activation": "relu",
58
+ "n_encoder_layers": 4,
59
+ "n_decoder_layers": 1,
60
+ "use_vae": true,
61
+ "latent_dim": 32,
62
+ "n_vae_encoder_layers": 4,
63
+ "temporal_ensemble_coeff": null,
64
+ "dropout": 0.1,
65
+ "kl_weight": 10.0,
66
+ "optimizer_lr": 1e-05,
67
+ "optimizer_weight_decay": 0.0001,
68
+ "optimizer_lr_backbone": 1e-05
 
 
 
 
 
 
 
 
69
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cea14bb651f84eb15719b3f48006ec3c3a8cc18ea1c560ccf611174d971faac
3
- size 7536025176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e28263eaf07f46e83e66418c5e1627e6e2773588aff099d3d559f6870556e7d
3
+ size 206709364
train_config.json CHANGED
@@ -66,10 +66,10 @@
66
  },
67
  "env": null,
68
  "policy": {
69
- "type": "pi0",
70
  "n_obs_steps": 1,
71
  "normalization_mapping": {
72
- "VISUAL": "IDENTITY",
73
  "STATE": "MEAN_STD",
74
  "ACTION": "MEAN_STD"
75
  },
@@ -112,38 +112,30 @@
112
  "private": null,
113
  "tags": null,
114
  "license": null,
115
- "chunk_size": 50,
116
- "n_action_steps": 50,
117
- "max_state_dim": 32,
118
- "max_action_dim": 32,
119
- "resize_imgs_with_padding": [
120
- 224,
121
- 224
122
- ],
123
- "empty_cameras": 0,
124
- "adapt_to_pi_aloha": false,
125
- "use_delta_joint_actions_aloha": false,
126
- "tokenizer_max_length": 48,
127
- "proj_width": 1024,
128
- "num_steps": 10,
129
- "use_cache": true,
130
- "attention_implementation": "eager",
131
- "freeze_vision_encoder": true,
132
- "train_expert_only": true,
133
- "train_state_proj": true,
134
- "optimizer_lr": 2.5e-05,
135
- "optimizer_betas": [
136
- 0.9,
137
- 0.95
138
- ],
139
- "optimizer_eps": 1e-08,
140
- "optimizer_weight_decay": 1e-10,
141
- "scheduler_warmup_steps": 1000,
142
- "scheduler_decay_steps": 30000,
143
- "scheduler_decay_lr": 2.5e-06
144
  },
145
- "output_dir": "outputs/train/2025-08-08/21-24-14_pi0",
146
- "job_name": "pi0",
147
  "resume": false,
148
  "seed": 1000,
149
  "num_workers": 4,
@@ -156,22 +148,16 @@
156
  "use_policy_training_preset": true,
157
  "optimizer": {
158
  "type": "adamw",
159
- "lr": 2.5e-05,
160
- "weight_decay": 1e-10,
161
  "grad_clip_norm": 10.0,
162
  "betas": [
163
  0.9,
164
- 0.95
165
  ],
166
  "eps": 1e-08
167
  },
168
- "scheduler": {
169
- "type": "cosine_decay_with_warmup",
170
- "num_warmup_steps": 1000,
171
- "num_decay_steps": 30000,
172
- "peak_lr": 2.5e-05,
173
- "decay_lr": 2.5e-06
174
- },
175
  "eval": {
176
  "n_episodes": 50,
177
  "batch_size": 50,
@@ -183,7 +169,7 @@
183
  "project": "lerobot",
184
  "entity": null,
185
  "notes": null,
186
- "run_id": "1u4th40g",
187
  "mode": null
188
  }
189
  }
 
66
  },
67
  "env": null,
68
  "policy": {
69
+ "type": "act",
70
  "n_obs_steps": 1,
71
  "normalization_mapping": {
72
+ "VISUAL": "MEAN_STD",
73
  "STATE": "MEAN_STD",
74
  "ACTION": "MEAN_STD"
75
  },
 
112
  "private": null,
113
  "tags": null,
114
  "license": null,
115
+ "chunk_size": 100,
116
+ "n_action_steps": 100,
117
+ "vision_backbone": "resnet18",
118
+ "pretrained_backbone_weights": "ResNet18_Weights.IMAGENET1K_V1",
119
+ "replace_final_stride_with_dilation": false,
120
+ "pre_norm": false,
121
+ "dim_model": 512,
122
+ "n_heads": 8,
123
+ "dim_feedforward": 3200,
124
+ "feedforward_activation": "relu",
125
+ "n_encoder_layers": 4,
126
+ "n_decoder_layers": 1,
127
+ "use_vae": true,
128
+ "latent_dim": 32,
129
+ "n_vae_encoder_layers": 4,
130
+ "temporal_ensemble_coeff": null,
131
+ "dropout": 0.1,
132
+ "kl_weight": 10.0,
133
+ "optimizer_lr": 1e-05,
134
+ "optimizer_weight_decay": 0.0001,
135
+ "optimizer_lr_backbone": 1e-05
 
 
 
 
 
 
 
 
136
  },
137
+ "output_dir": "outputs/train/2025-08-10/20-24-41_act",
138
+ "job_name": "act",
139
  "resume": false,
140
  "seed": 1000,
141
  "num_workers": 4,
 
148
  "use_policy_training_preset": true,
149
  "optimizer": {
150
  "type": "adamw",
151
+ "lr": 1e-05,
152
+ "weight_decay": 0.0001,
153
  "grad_clip_norm": 10.0,
154
  "betas": [
155
  0.9,
156
+ 0.999
157
  ],
158
  "eps": 1e-08
159
  },
160
+ "scheduler": null,
 
 
 
 
 
 
161
  "eval": {
162
  "n_episodes": 50,
163
  "batch_size": 50,
 
169
  "project": "lerobot",
170
  "entity": null,
171
  "notes": null,
172
+ "run_id": "j4ecaamf",
173
  "mode": null
174
  }
175
  }