Robotics
LeRobot
Safetensors
act
SonDePoisson commited on
Commit
d6eec22
·
verified ·
1 Parent(s): 7bfd39c

Upload policy weights, train config and readme

Browse files
Files changed (4) hide show
  1. README.md +5 -8
  2. config.json +69 -0
  3. model.safetensors +3 -0
  4. train_config.json +175 -0
README.md CHANGED
@@ -1,24 +1,21 @@
1
  ---
2
- datasets:
3
- - SonDePoisson/so101_test_dataset
4
  library_name: lerobot
5
  license: apache-2.0
6
- model_name: reward_classifier
7
  pipeline_tag: robotics
8
  tags:
 
9
  - lerobot
10
- - reward_classifier
11
  - robotics
12
- base_model:
13
- - lerobot/smolvla_base
14
  ---
15
 
16
- # Model Card for reward_classifier
17
 
18
  <!-- Provide a quick summary of what the model is/does. -->
19
 
20
 
21
- A reward classifier is a lightweight neural network that scores observations or trajectories for task success, providing a learned reward signal or offline evaluation when explicit rewards are unavailable.
22
 
23
 
24
  This policy has been trained and pushed to the Hub using [LeRobot](https://github.com/huggingface/lerobot).
 
1
  ---
2
+ datasets: SonDePoisson/so101_test_dataset
 
3
  library_name: lerobot
4
  license: apache-2.0
5
+ model_name: act
6
  pipeline_tag: robotics
7
  tags:
8
+ - act
9
  - lerobot
 
10
  - robotics
 
 
11
  ---
12
 
13
+ # Model Card for act
14
 
15
  <!-- Provide a quick summary of what the model is/does. -->
16
 
17
 
18
+ [Action Chunking with Transformers (ACT)](https://huggingface.co/papers/2304.13705) is an imitation-learning method that predicts short action chunks instead of single steps. It learns from teleoperated data and often achieves high success rates.
19
 
20
 
21
  This policy has been trained and pushed to the Hub using [LeRobot](https://github.com/huggingface/lerobot).
config.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "type": "act",
3
+ "n_obs_steps": 1,
4
+ "normalization_mapping": {
5
+ "VISUAL": "MEAN_STD",
6
+ "STATE": "MEAN_STD",
7
+ "ACTION": "MEAN_STD"
8
+ },
9
+ "input_features": {
10
+ "observation.state": {
11
+ "type": "STATE",
12
+ "shape": [
13
+ 21
14
+ ]
15
+ },
16
+ "observation.images.side": {
17
+ "type": "VISUAL",
18
+ "shape": [
19
+ 3,
20
+ 256,
21
+ 256
22
+ ]
23
+ },
24
+ "observation.images.top": {
25
+ "type": "VISUAL",
26
+ "shape": [
27
+ 3,
28
+ 256,
29
+ 256
30
+ ]
31
+ }
32
+ },
33
+ "output_features": {
34
+ "action": {
35
+ "type": "ACTION",
36
+ "shape": [
37
+ 4
38
+ ]
39
+ }
40
+ },
41
+ "device": "mps",
42
+ "use_amp": false,
43
+ "push_to_hub": true,
44
+ "repo_id": "SonDePoisson/so101_test_model",
45
+ "private": null,
46
+ "tags": null,
47
+ "license": null,
48
+ "chunk_size": 100,
49
+ "n_action_steps": 100,
50
+ "vision_backbone": "resnet18",
51
+ "pretrained_backbone_weights": "ResNet18_Weights.IMAGENET1K_V1",
52
+ "replace_final_stride_with_dilation": false,
53
+ "pre_norm": false,
54
+ "dim_model": 512,
55
+ "n_heads": 8,
56
+ "dim_feedforward": 3200,
57
+ "feedforward_activation": "relu",
58
+ "n_encoder_layers": 4,
59
+ "n_decoder_layers": 1,
60
+ "use_vae": true,
61
+ "latent_dim": 32,
62
+ "n_vae_encoder_layers": 4,
63
+ "temporal_ensemble_coeff": null,
64
+ "dropout": 0.1,
65
+ "kl_weight": 10.0,
66
+ "optimizer_lr": 1e-05,
67
+ "optimizer_weight_decay": 0.0001,
68
+ "optimizer_lr_backbone": 1e-05
69
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61ad9b11299ec080a64c52178d54041ea775aba6b5e6e959a93c61a0f4b1177a
3
+ size 206754384
train_config.json ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": {
3
+ "repo_id": "SonDePoisson/so101_test_dataset",
4
+ "root": ".dev/datasets/SonDePoisson/so101_test_dataset",
5
+ "episodes": null,
6
+ "image_transforms": {
7
+ "enable": false,
8
+ "max_num_transforms": 3,
9
+ "random_order": false,
10
+ "tfs": {
11
+ "brightness": {
12
+ "weight": 1.0,
13
+ "type": "ColorJitter",
14
+ "kwargs": {
15
+ "brightness": [
16
+ 0.8,
17
+ 1.2
18
+ ]
19
+ }
20
+ },
21
+ "contrast": {
22
+ "weight": 1.0,
23
+ "type": "ColorJitter",
24
+ "kwargs": {
25
+ "contrast": [
26
+ 0.8,
27
+ 1.2
28
+ ]
29
+ }
30
+ },
31
+ "saturation": {
32
+ "weight": 1.0,
33
+ "type": "ColorJitter",
34
+ "kwargs": {
35
+ "saturation": [
36
+ 0.5,
37
+ 1.5
38
+ ]
39
+ }
40
+ },
41
+ "hue": {
42
+ "weight": 1.0,
43
+ "type": "ColorJitter",
44
+ "kwargs": {
45
+ "hue": [
46
+ -0.05,
47
+ 0.05
48
+ ]
49
+ }
50
+ },
51
+ "sharpness": {
52
+ "weight": 1.0,
53
+ "type": "SharpnessJitter",
54
+ "kwargs": {
55
+ "sharpness": [
56
+ 0.5,
57
+ 1.5
58
+ ]
59
+ }
60
+ }
61
+ }
62
+ },
63
+ "revision": null,
64
+ "use_imagenet_stats": true,
65
+ "video_backend": "torchcodec"
66
+ },
67
+ "env": null,
68
+ "policy": {
69
+ "type": "act",
70
+ "n_obs_steps": 1,
71
+ "normalization_mapping": {
72
+ "VISUAL": "MEAN_STD",
73
+ "STATE": "MEAN_STD",
74
+ "ACTION": "MEAN_STD"
75
+ },
76
+ "input_features": {
77
+ "observation.state": {
78
+ "type": "STATE",
79
+ "shape": [
80
+ 21
81
+ ]
82
+ },
83
+ "observation.images.side": {
84
+ "type": "VISUAL",
85
+ "shape": [
86
+ 3,
87
+ 256,
88
+ 256
89
+ ]
90
+ },
91
+ "observation.images.top": {
92
+ "type": "VISUAL",
93
+ "shape": [
94
+ 3,
95
+ 256,
96
+ 256
97
+ ]
98
+ }
99
+ },
100
+ "output_features": {
101
+ "action": {
102
+ "type": "ACTION",
103
+ "shape": [
104
+ 4
105
+ ]
106
+ }
107
+ },
108
+ "device": "mps",
109
+ "use_amp": false,
110
+ "push_to_hub": true,
111
+ "repo_id": "SonDePoisson/so101_test_model",
112
+ "private": null,
113
+ "tags": null,
114
+ "license": null,
115
+ "chunk_size": 100,
116
+ "n_action_steps": 100,
117
+ "vision_backbone": "resnet18",
118
+ "pretrained_backbone_weights": "ResNet18_Weights.IMAGENET1K_V1",
119
+ "replace_final_stride_with_dilation": false,
120
+ "pre_norm": false,
121
+ "dim_model": 512,
122
+ "n_heads": 8,
123
+ "dim_feedforward": 3200,
124
+ "feedforward_activation": "relu",
125
+ "n_encoder_layers": 4,
126
+ "n_decoder_layers": 1,
127
+ "use_vae": true,
128
+ "latent_dim": 32,
129
+ "n_vae_encoder_layers": 4,
130
+ "temporal_ensemble_coeff": null,
131
+ "dropout": 0.1,
132
+ "kl_weight": 10.0,
133
+ "optimizer_lr": 1e-05,
134
+ "optimizer_weight_decay": 0.0001,
135
+ "optimizer_lr_backbone": 1e-05
136
+ },
137
+ "output_dir": "outputs/train/2025-08-27/10-42-53_so101_training",
138
+ "job_name": "so101_training",
139
+ "resume": false,
140
+ "seed": 1000,
141
+ "num_workers": 8,
142
+ "batch_size": 8,
143
+ "steps": 10000,
144
+ "eval_freq": 20000,
145
+ "log_freq": 200,
146
+ "save_checkpoint": true,
147
+ "save_freq": 20000,
148
+ "use_policy_training_preset": true,
149
+ "optimizer": {
150
+ "type": "adamw",
151
+ "lr": 1e-05,
152
+ "weight_decay": 0.0001,
153
+ "grad_clip_norm": 10.0,
154
+ "betas": [
155
+ 0.9,
156
+ 0.999
157
+ ],
158
+ "eps": 1e-08
159
+ },
160
+ "scheduler": null,
161
+ "eval": {
162
+ "n_episodes": 50,
163
+ "batch_size": 50,
164
+ "use_async_envs": false
165
+ },
166
+ "wandb": {
167
+ "enable": true,
168
+ "disable_artifact": false,
169
+ "project": "so101_training",
170
+ "entity": null,
171
+ "notes": null,
172
+ "run_id": "6ppdcojk",
173
+ "mode": null
174
+ }
175
+ }