Robotics
LeRobot
Safetensors
sac
bearlover365 commited on
Commit
2c061ce
·
verified ·
1 Parent(s): 82ac71f

Upload policy weights, train config and readme

Browse files
Files changed (4) hide show
  1. README.md +64 -0
  2. config.json +137 -0
  3. model.safetensors +3 -0
  4. train_config.json +252 -0
README.md ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ datasets:
3
+ - bearlover365/red_cube_always_in_same_place
4
+ - bearlover365/pick_place_one_white_sock_black_out_blinds
5
+ library_name: lerobot
6
+ license: apache-2.0
7
+ model_name: sac
8
+ pipeline_tag: robotics
9
+ tags:
10
+ - lerobot
11
+ - sac
12
+ - robotics
13
+ ---
14
+
15
+ # Model Card for sac
16
+
17
+ <!-- Provide a quick summary of what the model is/does. -->
18
+
19
+
20
+ [Soft Actor-Critic (SAC)](https://huggingface.co/papers/1801.01290) is an entropy-regularised actor-critic algorithm offering stable, sample-efficient learning in continuous-control environments.
21
+
22
+
23
+ This policy has been trained and pushed to the Hub using [LeRobot](https://github.com/huggingface/lerobot).
24
+ See the full documentation at [LeRobot Docs](https://huggingface.co/docs/lerobot/index).
25
+
26
+ ---
27
+
28
+ ## How to Get Started with the Model
29
+
30
+ For a complete walkthrough, see the [training guide](https://huggingface.co/docs/lerobot/il_robots#train-a-policy).
31
+ Below is the short version on how to train and run inference/eval:
32
+
33
+ ### Train from scratch
34
+
35
+ ```bash
36
+ python -m lerobot.scripts.train \
37
+ --dataset.repo_id=${HF_USER}/<dataset> \
38
+ --policy.type=act \
39
+ --output_dir=outputs/train/<desired_policy_repo_id> \
40
+ --job_name=lerobot_training \
41
+ --policy.device=cuda \
42
+ --policy.repo_id=${HF_USER}/<desired_policy_repo_id>
43
+ --wandb.enable=true
44
+ ```
45
+
46
+ _Writes checkpoints to `outputs/train/<desired_policy_repo_id>/checkpoints/`._
47
+
48
+ ### Evaluate the policy/run inference
49
+
50
+ ```bash
51
+ python -m lerobot.record \
52
+ --robot.type=so100_follower \
53
+ --dataset.repo_id=<hf_user>/eval_<dataset> \
54
+ --policy.path=<hf_user>/<desired_policy_repo_id> \
55
+ --episodes=10
56
+ ```
57
+
58
+ Prefix the dataset repo with **eval\_** and supply `--policy.path` pointing to a local or hub checkpoint.
59
+
60
+ ---
61
+
62
+ ## Model Details
63
+
64
+ - **License:** apache-2.0
config.json ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "type": "sac",
3
+ "n_obs_steps": 1,
4
+ "normalization_mapping": {
5
+ "VISUAL": "MEAN_STD",
6
+ "STATE": "MIN_MAX",
7
+ "ENV": "MIN_MAX",
8
+ "ACTION": "MIN_MAX"
9
+ },
10
+ "input_features": {
11
+ "observation.state": {
12
+ "type": "STATE",
13
+ "shape": [
14
+ 6
15
+ ]
16
+ }
17
+ },
18
+ "output_features": {
19
+ "action": {
20
+ "type": "ACTION",
21
+ "shape": [
22
+ 6
23
+ ]
24
+ }
25
+ },
26
+ "device": "cpu",
27
+ "use_amp": false,
28
+ "push_to_hub": true,
29
+ "repo_id": "bearlover365/multi_sac_smoke",
30
+ "private": null,
31
+ "tags": null,
32
+ "license": null,
33
+ "dataset_stats": {
34
+ "observation.image": {
35
+ "mean": [
36
+ 0.485,
37
+ 0.456,
38
+ 0.406
39
+ ],
40
+ "std": [
41
+ 0.229,
42
+ 0.224,
43
+ 0.225
44
+ ]
45
+ },
46
+ "observation.state": {
47
+ "min": [
48
+ 0.0,
49
+ 0.0
50
+ ],
51
+ "max": [
52
+ 1.0,
53
+ 1.0
54
+ ]
55
+ },
56
+ "action": {
57
+ "min": [
58
+ 0.0,
59
+ 0.0,
60
+ 0.0
61
+ ],
62
+ "max": [
63
+ 1.0,
64
+ 1.0,
65
+ 1.0
66
+ ]
67
+ }
68
+ },
69
+ "storage_device": "cpu",
70
+ "vision_encoder_name": null,
71
+ "freeze_vision_encoder": true,
72
+ "image_encoder_hidden_dim": 32,
73
+ "shared_encoder": true,
74
+ "num_discrete_actions": null,
75
+ "image_embedding_pooling_dim": 8,
76
+ "online_steps": 1000000,
77
+ "online_env_seed": 10000,
78
+ "online_buffer_capacity": 100000,
79
+ "offline_buffer_capacity": 100000,
80
+ "async_prefetch": false,
81
+ "online_step_before_learning": 100,
82
+ "policy_update_freq": 1,
83
+ "discount": 0.99,
84
+ "temperature_init": 1.0,
85
+ "num_critics": 2,
86
+ "num_subsample_critics": null,
87
+ "critic_lr": 0.0003,
88
+ "actor_lr": 0.0003,
89
+ "temperature_lr": 0.0003,
90
+ "critic_target_update_weight": 0.005,
91
+ "utd_ratio": 1,
92
+ "state_encoder_hidden_dim": 256,
93
+ "latent_dim": 256,
94
+ "target_entropy": null,
95
+ "use_backup_entropy": true,
96
+ "grad_clip_norm": 40.0,
97
+ "critic_network_kwargs": {
98
+ "hidden_dims": [
99
+ 256,
100
+ 256
101
+ ],
102
+ "activate_final": true,
103
+ "final_activation": null
104
+ },
105
+ "actor_network_kwargs": {
106
+ "hidden_dims": [
107
+ 256,
108
+ 256
109
+ ],
110
+ "activate_final": true
111
+ },
112
+ "policy_kwargs": {
113
+ "use_tanh_squash": true,
114
+ "std_min": 1e-05,
115
+ "std_max": 10.0,
116
+ "init_final": 0.05
117
+ },
118
+ "discrete_critic_network_kwargs": {
119
+ "hidden_dims": [
120
+ 256,
121
+ 256
122
+ ],
123
+ "activate_final": true,
124
+ "final_activation": null
125
+ },
126
+ "actor_learner_config": {
127
+ "learner_host": "127.0.0.1",
128
+ "learner_port": 50051,
129
+ "policy_parameters_push_frequency": 4,
130
+ "queue_get_timeout": 2
131
+ },
132
+ "concurrency": {
133
+ "actor": "threads",
134
+ "learner": "threads"
135
+ },
136
+ "use_torch_compile": true
137
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d9eb84215216a1fcfaceb61635067c6dcb32da44308bd8b562fe46f667d5b46
3
+ size 2712116
train_config.json ADDED
@@ -0,0 +1,252 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": {
3
+ "repo_id": [
4
+ "bearlover365/red_cube_always_in_same_place",
5
+ "bearlover365/pick_place_one_white_sock_black_out_blinds"
6
+ ],
7
+ "root": null,
8
+ "episodes": null,
9
+ "image_transforms": {
10
+ "enable": false,
11
+ "max_num_transforms": 3,
12
+ "random_order": false,
13
+ "tfs": {
14
+ "brightness": {
15
+ "weight": 1.0,
16
+ "type": "ColorJitter",
17
+ "kwargs": {
18
+ "brightness": [
19
+ 0.8,
20
+ 1.2
21
+ ]
22
+ }
23
+ },
24
+ "contrast": {
25
+ "weight": 1.0,
26
+ "type": "ColorJitter",
27
+ "kwargs": {
28
+ "contrast": [
29
+ 0.8,
30
+ 1.2
31
+ ]
32
+ }
33
+ },
34
+ "saturation": {
35
+ "weight": 1.0,
36
+ "type": "ColorJitter",
37
+ "kwargs": {
38
+ "saturation": [
39
+ 0.5,
40
+ 1.5
41
+ ]
42
+ }
43
+ },
44
+ "hue": {
45
+ "weight": 1.0,
46
+ "type": "ColorJitter",
47
+ "kwargs": {
48
+ "hue": [
49
+ -0.05,
50
+ 0.05
51
+ ]
52
+ }
53
+ },
54
+ "sharpness": {
55
+ "weight": 1.0,
56
+ "type": "SharpnessJitter",
57
+ "kwargs": {
58
+ "sharpness": [
59
+ 0.5,
60
+ 1.5
61
+ ]
62
+ }
63
+ }
64
+ }
65
+ },
66
+ "revision": null,
67
+ "use_imagenet_stats": true,
68
+ "video_backend": "pyav"
69
+ },
70
+ "env": null,
71
+ "policy": {
72
+ "type": "sac",
73
+ "n_obs_steps": 1,
74
+ "normalization_mapping": {
75
+ "VISUAL": "MEAN_STD",
76
+ "STATE": "MIN_MAX",
77
+ "ENV": "MIN_MAX",
78
+ "ACTION": "MIN_MAX"
79
+ },
80
+ "input_features": {
81
+ "observation.state": {
82
+ "type": "STATE",
83
+ "shape": [
84
+ 6
85
+ ]
86
+ }
87
+ },
88
+ "output_features": {
89
+ "action": {
90
+ "type": "ACTION",
91
+ "shape": [
92
+ 6
93
+ ]
94
+ }
95
+ },
96
+ "device": "cpu",
97
+ "use_amp": false,
98
+ "push_to_hub": true,
99
+ "repo_id": "bearlover365/multi_sac_smoke",
100
+ "private": null,
101
+ "tags": null,
102
+ "license": null,
103
+ "dataset_stats": {
104
+ "observation.image": {
105
+ "mean": [
106
+ 0.485,
107
+ 0.456,
108
+ 0.406
109
+ ],
110
+ "std": [
111
+ 0.229,
112
+ 0.224,
113
+ 0.225
114
+ ]
115
+ },
116
+ "observation.state": {
117
+ "min": [
118
+ 0.0,
119
+ 0.0
120
+ ],
121
+ "max": [
122
+ 1.0,
123
+ 1.0
124
+ ]
125
+ },
126
+ "action": {
127
+ "min": [
128
+ 0.0,
129
+ 0.0,
130
+ 0.0
131
+ ],
132
+ "max": [
133
+ 1.0,
134
+ 1.0,
135
+ 1.0
136
+ ]
137
+ }
138
+ },
139
+ "storage_device": "cpu",
140
+ "vision_encoder_name": null,
141
+ "freeze_vision_encoder": true,
142
+ "image_encoder_hidden_dim": 32,
143
+ "shared_encoder": true,
144
+ "num_discrete_actions": null,
145
+ "image_embedding_pooling_dim": 8,
146
+ "online_steps": 1000000,
147
+ "online_env_seed": 10000,
148
+ "online_buffer_capacity": 100000,
149
+ "offline_buffer_capacity": 100000,
150
+ "async_prefetch": false,
151
+ "online_step_before_learning": 100,
152
+ "policy_update_freq": 1,
153
+ "discount": 0.99,
154
+ "temperature_init": 1.0,
155
+ "num_critics": 2,
156
+ "num_subsample_critics": null,
157
+ "critic_lr": 0.0003,
158
+ "actor_lr": 0.0003,
159
+ "temperature_lr": 0.0003,
160
+ "critic_target_update_weight": 0.005,
161
+ "utd_ratio": 1,
162
+ "state_encoder_hidden_dim": 256,
163
+ "latent_dim": 256,
164
+ "target_entropy": null,
165
+ "use_backup_entropy": true,
166
+ "grad_clip_norm": 40.0,
167
+ "critic_network_kwargs": {
168
+ "hidden_dims": [
169
+ 256,
170
+ 256
171
+ ],
172
+ "activate_final": true,
173
+ "final_activation": null
174
+ },
175
+ "actor_network_kwargs": {
176
+ "hidden_dims": [
177
+ 256,
178
+ 256
179
+ ],
180
+ "activate_final": true
181
+ },
182
+ "policy_kwargs": {
183
+ "use_tanh_squash": true,
184
+ "std_min": 1e-05,
185
+ "std_max": 10.0,
186
+ "init_final": 0.05
187
+ },
188
+ "discrete_critic_network_kwargs": {
189
+ "hidden_dims": [
190
+ 256,
191
+ 256
192
+ ],
193
+ "activate_final": true,
194
+ "final_activation": null
195
+ },
196
+ "actor_learner_config": {
197
+ "learner_host": "127.0.0.1",
198
+ "learner_port": 50051,
199
+ "policy_parameters_push_frequency": 4,
200
+ "queue_get_timeout": 2
201
+ },
202
+ "concurrency": {
203
+ "actor": "threads",
204
+ "learner": "threads"
205
+ },
206
+ "use_torch_compile": true
207
+ },
208
+ "output_dir": "outputs/train/multi_smoke_sac_1755438250",
209
+ "job_name": "multi_smoke_sac",
210
+ "resume": false,
211
+ "seed": 1000,
212
+ "num_workers": 4,
213
+ "batch_size": 8,
214
+ "steps": 0,
215
+ "eval_freq": 10000,
216
+ "log_freq": 200,
217
+ "save_checkpoint": true,
218
+ "save_freq": 5000,
219
+ "use_policy_training_preset": true,
220
+ "optimizer": {
221
+ "type": "multi_adam",
222
+ "lr": 0.001,
223
+ "weight_decay": 0.0,
224
+ "grad_clip_norm": 10.0,
225
+ "optimizer_groups": {
226
+ "actor": {
227
+ "lr": 0.0003
228
+ },
229
+ "critic": {
230
+ "lr": 0.0003
231
+ },
232
+ "temperature": {
233
+ "lr": 0.0003
234
+ }
235
+ }
236
+ },
237
+ "scheduler": null,
238
+ "eval": {
239
+ "n_episodes": 50,
240
+ "batch_size": 50,
241
+ "use_async_envs": false
242
+ },
243
+ "wandb": {
244
+ "enable": false,
245
+ "disable_artifact": false,
246
+ "project": "lerobot",
247
+ "entity": null,
248
+ "notes": null,
249
+ "run_id": null,
250
+ "mode": null
251
+ }
252
+ }