Robotics
Safetensors
vision-language-action-model
File size: 10,945 Bytes
6fe7dc4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
{
    "dataset": {
        "type": "qwena1",
        "repo_id": "agibotworld/task_480 agibotworld/task_544 agibotworld/task_501 agibotworld/task_372 agibotworld/task_422 agibotworld/task_537 agibotworld/task_366 agibotworld/task_561 agibotworld/task_568 agibotworld/task_421 agibotworld/task_361 agibotworld/task_551 agibotworld/task_363 agibotworld/task_582 agibotworld/task_445 agibotworld/task_444 agibotworld/task_373 agibotworld/task_464 agibotworld/task_542 agibotworld/task_470 agibotworld/task_541 agibotworld/task_487 agibotworld/task_454 agibotworld/task_540 agibotworld/task_431 agibotworld/task_596 agibotworld/task_545 agibotworld/task_362 agibotworld/task_587 agibotworld/task_468 agibotworld/task_616 agibotworld/task_566 agibotworld/task_609 agibotworld/task_365 agibotworld/task_455 agibotworld/task_590 agibotworld/task_683 agibotworld/task_600 agibotworld/task_390 agibotworld/task_613 agibotworld/task_563 agibotworld/task_369 agibotworld/task_567 agibotworld/task_573 agibotworld/task_392 agibotworld/task_368 agibotworld/task_360 agibotworld/task_532 agibotworld/task_351 agibotworld/task_491 agibotworld/task_438 agibotworld/task_357 agibotworld/task_528 agibotworld/task_498 agibotworld/task_511 agibotworld/task_604 agibotworld/task_602 agibotworld/task_429 agibotworld/task_509 agibotworld/task_385 agibotworld/task_521 agibotworld/task_619 agibotworld/task_503 agibotworld/task_424 agibotworld/task_398 agibotworld/task_453 agibotworld/task_374 agibotworld/task_486 agibotworld/task_529 agibotworld/task_520 agibotworld/task_471 agibotworld/task_446 agibotworld/task_465 agibotworld/task_485 agibotworld/task_377 agibotworld/task_512 agibotworld/task_440 agibotworld/task_555 agibotworld/task_688 agibotworld/task_352 agibotworld/task_570 agibotworld/task_507 agibotworld/task_575 agibotworld/task_389 agibotworld/task_494 agibotworld/task_356 agibotworld/task_378 agibotworld/task_558 agibotworld/task_376 agibotworld/task_358 agibotworld/task_506 agibotworld/task_451 agibotworld/task_463 agibotworld/task_550 agibotworld/task_589 agibotworld/task_621 agibotworld/task_466 agibotworld/task_574 agibotworld/task_414 agibotworld/task_504 agibotworld/task_692 agibotworld/task_327 agibotworld/task_588 agibotworld/task_533 agibotworld/task_508 agibotworld/task_525 agibotworld/task_515 agibotworld/task_664 agibotworld/task_375 agibotworld/task_388 agibotworld/task_527 agibotworld/task_359 agibotworld/task_593 agibotworld/task_543 agibotworld/task_483 agibotworld/task_434 agibotworld/task_607 agibotworld/task_425 agibotworld/task_695 agibotworld/task_474 agibotworld/task_689 agibotworld/task_433 agibotworld/task_580 agibotworld/task_410 agibotworld/task_478 agibotworld/task_367 agibotworld/task_597 agibotworld/task_534 agibotworld/task_452 agibotworld/task_522 agibotworld/task_535 agibotworld/task_658 agibotworld/task_462 agibotworld/task_492 agibotworld/task_556 agibotworld/task_603 agibotworld/task_682 agibotworld/task_477 agibotworld/task_599 agibotworld/task_681 agibotworld/task_354 agibotworld/task_524 agibotworld/task_497 agibotworld/task_584 agibotworld/task_598",
        "root": null,
        "episodes": null,
        "image_transforms": {
            "enable": false,
            "max_num_transforms": 3,
            "random_order": false,
            "tfs": {
                "brightness": {
                    "weight": 1.0,
                    "type": "ColorJitter",
                    "kwargs": {
                        "brightness": [
                            0.8,
                            1.2
                        ]
                    }
                },
                "contrast": {
                    "weight": 1.0,
                    "type": "ColorJitter",
                    "kwargs": {
                        "contrast": [
                            0.8,
                            1.2
                        ]
                    }
                },
                "saturation": {
                    "weight": 1.0,
                    "type": "ColorJitter",
                    "kwargs": {
                        "saturation": [
                            0.5,
                            1.5
                        ]
                    }
                },
                "hue": {
                    "weight": 1.0,
                    "type": "ColorJitter",
                    "kwargs": {
                        "hue": [
                            -0.05,
                            0.05
                        ]
                    }
                },
                "sharpness": {
                    "weight": 1.0,
                    "type": "SharpnessJitter",
                    "kwargs": {
                        "sharpness": [
                            0.5,
                            1.5
                        ]
                    }
                },
                "affine": {
                    "weight": 1.0,
                    "type": "RandomAffine",
                    "kwargs": {
                        "degrees": [
                            -5.0,
                            5.0
                        ],
                        "translate": [
                            0.05,
                            0.05
                        ]
                    }
                }
            }
        },
        "revision": null,
        "use_imagenet_stats": true,
        "use_external_stats": true,
        "video_backend": "torchcodec",
        "streaming": false,
        "dist_loading": true,
        "buffer_size": 1024,
        "action_mode": "delta",
        "repack_transforms": {
            "inputs": [],
            "outputs": []
        },
        "data_transforms": {
            "inputs": [
                {
                    "type": "delta_action",
                    "mask": null,
                    "mapping": {}
                },
                {
                    "type": "resize_with_pad",
                    "height": 224,
                    "width": 224,
                    "mode": "bilinear"
                },
                {
                    "type": "remap_image_key",
                    "mapping": {}
                },
                {
                    "type": "qwena1_processor",
                    "pretrained_model_name_or_path": "Qwen/Qwen3-VL-2B-Instruct",
                    "max_length": 48,
                    "task_key": "task",
                    "padding_side": "right",
                    "padding": "max_length",
                    "truncation": true,
                    "spatial_merge_size": 2,
                    "vision_start_token_id": 151652,
                    "vision_end_token_id": 151653,
                    "image_token_id": 151655,
                    "process": null
                },
                {
                    "type": "normalize",
                    "selected_keys": null,
                    "mode": "mean_std",
                    "norm_stats": {}
                },
                {
                    "type": "compose_fields",
                    "mapping": {}
                },
                {
                    "type": "pad_state_and_action",
                    "max_state_dim": 32,
                    "max_action_dim": 32
                },
                {
                    "type": "unify_qwena1_inputs"
                }
            ],
            "outputs": []
        },
        "model_transforms": {
            "inputs": [],
            "outputs": []
        },
        "height": 224,
        "width": 224,
        "max_state_dim": 32,
        "max_action_dim": 32
    },
    "env": null,
    "policy": {
        "type": "qwena1",
        "n_obs_steps": 1,
        "input_features": {
            "observation.state": {
                "type": "STATE",
                "shape": [
                    32
                ]
            }
        },
        "output_features": {
            "action": {
                "type": "ACTION",
                "shape": [
                    32
                ]
            }
        },
        "device": "cuda",
        "use_amp": false,
        "push_to_hub": false,
        "repo_id": "jcaiaq/qwena1",
        "private": null,
        "tags": null,
        "license": null,
        "pretrained_path": null,
        "qwen3_vl_variant": "qwen3_vl_28l",
        "action_expert_variant": "qwen3_28l",
        "dtype": "bfloat16",
        "chunk_size": 50,
        "n_action_steps": 50,
        "max_state_dim": 32,
        "max_action_dim": 32,
        "num_inference_steps": 10,
        "time_sampling_beta_alpha": 1.5,
        "time_sampling_beta_beta": 1.0,
        "time_sampling_scale": 0.999,
        "time_sampling_offset": 0.001,
        "min_period": 0.004,
        "max_period": 4.0,
        "image_resolution": [
            224,
            224
        ],
        "empty_cameras": 0,
        "normalization_mapping": {
            "VISUAL": "IDENTITY",
            "STATE": "IDENTITY",
            "ACTION": "IDENTITY"
        },
        "gradient_checkpointing": false,
        "compile_model": false,
        "compile_mode": "max-autotune",
        "optimizer_lr": 5e-05,
        "optimizer_betas": [
            0.9,
            0.95
        ],
        "optimizer_eps": 1e-08,
        "optimizer_weight_decay": 0.01,
        "optimizer_grad_clip_norm": 1.0,
        "scheduler_warmup_steps": 0,
        "scheduler_decay_steps": 700000,
        "scheduler_decay_lr": 5e-05,
        "tokenizer_max_length": 48,
        "freeze_vision_encoder": false,
        "train_expert_only": false,
        "train_vlm_only": false,
        "scale_factor": 8,
        "lambda_gen": 0.01
    },
    "output_dir": "/mnt/shared-storage-user/internvla/Users/caijunhao/lerobot/outputs/qwena1/2025_12_19_17_36_47-qwena1-agibotworld-delta-28l-pretrain",
    "job_name": "2025_12_19_17_36_47-qwena1-agibotworld-delta-28l-pretrain",
    "resume": false,
    "seed": 7777,
    "num_workers": 8,
    "batch_size": 16,
    "steps": 700000,
    "eval_freq": 20000,
    "log_freq": 100,
    "save_checkpoint": true,
    "save_freq": 10000,
    "use_policy_training_preset": true,
    "optimizer": {
        "type": "adamw",
        "lr": 5e-05,
        "weight_decay": 0.01,
        "grad_clip_norm": 1.0,
        "betas": [
            0.9,
            0.95
        ],
        "eps": 1e-08
    },
    "scheduler": {
        "type": "cosine_decay_with_warmup",
        "num_warmup_steps": 0,
        "num_decay_steps": 700000,
        "peak_lr": 5e-05,
        "decay_lr": 5e-05
    },
    "eval": {
        "n_episodes": 50,
        "batch_size": 50,
        "use_async_envs": false
    },
    "wandb": {
        "enable": true,
        "disable_artifact": false,
        "project": "lerobot_qwena1",
        "entity": null,
        "notes": null,
        "run_id": "oz1n1l3h",
        "mode": "offline"
    },
    "checkpoint_path": null,
    "rename_map": {}
}