maximellerbach commited on
Commit
efdbc22
·
verified ·
1 Parent(s): 5a8fc04

Convert VLA-JEPA-LIBERO.pt to safetensors

Browse files
config.json ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "type": "vla_jepa",
3
+ "n_obs_steps": 1,
4
+ "input_features": {
5
+ "observation.images.image": {
6
+ "type": "VISUAL",
7
+ "shape": [
8
+ 3,
9
+ 224,
10
+ 224
11
+ ]
12
+ },
13
+ "observation.images.image2": {
14
+ "type": "VISUAL",
15
+ "shape": [
16
+ 3,
17
+ 224,
18
+ 224
19
+ ]
20
+ },
21
+ "observation.state": {
22
+ "type": "STATE",
23
+ "shape": [
24
+ 8
25
+ ]
26
+ }
27
+ },
28
+ "output_features": {
29
+ "action": {
30
+ "type": "ACTION",
31
+ "shape": [
32
+ 7
33
+ ]
34
+ }
35
+ },
36
+ "device": null,
37
+ "use_amp": false,
38
+ "use_peft": false,
39
+ "push_to_hub": true,
40
+ "repo_id": null,
41
+ "private": null,
42
+ "tags": null,
43
+ "license": null,
44
+ "pretrained_path": null,
45
+ "chunk_size": 7,
46
+ "n_action_steps": 7,
47
+ "normalization_mapping": {
48
+ "VISUAL": "IDENTITY",
49
+ "STATE": "MEAN_STD",
50
+ "ACTION": "MIN_MAX"
51
+ },
52
+ "qwen_model_name": "Qwen/Qwen3-VL-2B-Instruct",
53
+ "jepa_encoder_name": "facebook/vjepa2-vitl-fpc64-256",
54
+ "freeze_qwen": false,
55
+ "enable_world_model": true,
56
+ "reinit_modules": null,
57
+ "tokenizer_padding_side": "left",
58
+ "prompt_template": "Your task is {instruction}. Infer the temporal dynamics from frames {actions} and produce the corresponding policy actions {e_actions}.",
59
+ "special_action_token": "<|action_{}|>",
60
+ "embodied_action_token": "<|embodied_action|>",
61
+ "action_dim": 7,
62
+ "state_dim": 8,
63
+ "num_action_tokens_per_timestep": 8,
64
+ "num_embodied_action_tokens_per_instruction": 32,
65
+ "num_inference_timesteps": 4,
66
+ "action_hidden_size": 1024,
67
+ "action_model_type": "DiT-B",
68
+ "action_num_layers": 16,
69
+ "action_num_heads": 12,
70
+ "action_attention_head_dim": 64,
71
+ "action_dropout": 0.2,
72
+ "action_num_timestep_buckets": 1000,
73
+ "action_noise_beta_alpha": 1.5,
74
+ "action_noise_beta_beta": 1.0,
75
+ "action_noise_s": 0.999,
76
+ "num_target_vision_tokens": 32,
77
+ "action_max_seq_len": 1024,
78
+ "num_video_frames": 8,
79
+ "predictor_depth": 12,
80
+ "predictor_num_heads": 8,
81
+ "predictor_mlp_ratio": 4.0,
82
+ "predictor_dropout": 0.0,
83
+ "world_model_loss_weight": 0.1,
84
+ "jepa_tubelet_size": 2,
85
+ "repeated_diffusion_steps": 8,
86
+ "resize_images_to": [
87
+ 224,
88
+ 224
89
+ ],
90
+ "binarize_gripper_action": true,
91
+ "pre_snap_gripper_action": true,
92
+ "clip_normalized_actions": true,
93
+ "torch_dtype": "bfloat16",
94
+ "optimizer_lr": 0.0001,
95
+ "optimizer_betas": [
96
+ 0.9,
97
+ 0.95
98
+ ],
99
+ "optimizer_eps": 1e-08,
100
+ "optimizer_weight_decay": 1e-08,
101
+ "optimizer_grad_clip_norm": 1.0,
102
+ "scheduler_warmup_steps": 5000,
103
+ "scheduler_decay_steps": 30000,
104
+ "scheduler_decay_lr": 1e-06
105
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2a163c16889f89fb1d5e570d95f5c62313c84d0ecebdd384cd87c35e9a8540c
3
+ size 6163212550
policy_postprocessor.json ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "policy_postprocessor",
3
+ "steps": [
4
+ {
5
+ "registry_name": "vla_jepa_clip_actions",
6
+ "config": {}
7
+ },
8
+ {
9
+ "registry_name": "vla_jepa_pre_snap_gripper",
10
+ "config": {}
11
+ },
12
+ {
13
+ "registry_name": "unnormalizer_processor",
14
+ "config": {
15
+ "eps": 1e-08,
16
+ "features": {
17
+ "observation.images.image": {
18
+ "type": "VISUAL",
19
+ "shape": [
20
+ 3,
21
+ 224,
22
+ 224
23
+ ]
24
+ },
25
+ "observation.images.image2": {
26
+ "type": "VISUAL",
27
+ "shape": [
28
+ 3,
29
+ 224,
30
+ 224
31
+ ]
32
+ },
33
+ "observation.state": {
34
+ "type": "STATE",
35
+ "shape": [
36
+ 8
37
+ ]
38
+ },
39
+ "action": {
40
+ "type": "ACTION",
41
+ "shape": [
42
+ 7
43
+ ]
44
+ }
45
+ },
46
+ "norm_map": {
47
+ "VISUAL": "IDENTITY",
48
+ "STATE": "MEAN_STD",
49
+ "ACTION": "MIN_MAX"
50
+ }
51
+ },
52
+ "state_file": "policy_postprocessor_step_2_unnormalizer_processor.safetensors"
53
+ },
54
+ {
55
+ "registry_name": "vla_jepa_binarize_gripper",
56
+ "config": {}
57
+ },
58
+ {
59
+ "registry_name": "device_processor",
60
+ "config": {
61
+ "device": "cpu",
62
+ "float_dtype": null
63
+ }
64
+ }
65
+ ]
66
+ }
policy_postprocessor_step_2_unnormalizer_processor.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:840a296891a8a316541b54a963aa5a883fc26693accd54e7cce549249d83eac7
3
+ size 1316
policy_preprocessor.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "policy_preprocessor",
3
+ "steps": [
4
+ {
5
+ "registry_name": "rename_observations_processor",
6
+ "config": {
7
+ "rename_map": {}
8
+ }
9
+ },
10
+ {
11
+ "registry_name": "to_batch_processor",
12
+ "config": {}
13
+ },
14
+ {
15
+ "registry_name": "device_processor",
16
+ "config": {
17
+ "device": "cpu",
18
+ "float_dtype": null
19
+ }
20
+ },
21
+ {
22
+ "registry_name": "normalizer_processor",
23
+ "config": {
24
+ "eps": 1e-08,
25
+ "features": {
26
+ "observation.images.image": {
27
+ "type": "VISUAL",
28
+ "shape": [
29
+ 3,
30
+ 224,
31
+ 224
32
+ ]
33
+ },
34
+ "observation.images.image2": {
35
+ "type": "VISUAL",
36
+ "shape": [
37
+ 3,
38
+ 224,
39
+ 224
40
+ ]
41
+ },
42
+ "observation.state": {
43
+ "type": "STATE",
44
+ "shape": [
45
+ 8
46
+ ]
47
+ },
48
+ "action": {
49
+ "type": "ACTION",
50
+ "shape": [
51
+ 7
52
+ ]
53
+ }
54
+ },
55
+ "norm_map": {
56
+ "VISUAL": "IDENTITY",
57
+ "STATE": "MEAN_STD",
58
+ "ACTION": "MIN_MAX"
59
+ }
60
+ },
61
+ "state_file": "policy_preprocessor_step_3_normalizer_processor.safetensors"
62
+ }
63
+ ]
64
+ }
policy_preprocessor_step_3_normalizer_processor.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:840a296891a8a316541b54a963aa5a883fc26693accd54e7cce549249d83eac7
3
+ size 1316