verityw commited on
Commit
68c1426
·
1 Parent(s): cd7697d

initial commit

Browse files
checkpoints/step-080000-epoch-09-loss=0.0408.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c0e3a4f007e31fdc849cbf730f709c5358ffa5fa45ff324c299afd144c5a1de
3
+ size 30165309772
config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train_reasoner": true,
3
+ "use_fast_tokenizer": false,
4
+ "vla": {
5
+ "action_tokenizer": "action_tokenizer",
6
+ "base_vlm": "prism-dinosiglip-224px+7b",
7
+ "data_mix": "bridge",
8
+ "enable_gradient_checkpointing": true,
9
+ "enable_mixed_precision_training": true,
10
+ "epochs": 1000,
11
+ "expected_world_size": 8,
12
+ "freeze_llm_backbone": false,
13
+ "freeze_vision_backbone": false,
14
+ "global_batch_size": 256,
15
+ "image_sequence_len": 1,
16
+ "learning_rate": 2e-05,
17
+ "lr_scheduler_type": "constant",
18
+ "max_grad_norm": 1.0,
19
+ "max_steps": null,
20
+ "per_device_batch_size": 32,
21
+ "reduce_in_full_precision": true,
22
+ "save_every_n_steps": 25000,
23
+ "shuffle_buffer_size": 256000,
24
+ "train_strategy": "fsdp-full-shard",
25
+ "type": "prism-dinosiglip-224px+mx-bridge",
26
+ "unfreeze_last_llm_layer": false,
27
+ "use_wrist_image": false,
28
+ "vla_id": "prism-dinosiglip-224px+mx-bridge",
29
+ "warmup_ratio": 0.0,
30
+ "weight_decay": 0.0
31
+ }
32
+ }
config.yaml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ train_reasoner: true
2
+ use_fast_tokenizer: false
3
+ vla:
4
+ action_tokenizer: action_tokenizer
5
+ base_vlm: prism-dinosiglip-224px+7b
6
+ data_mix: bridge
7
+ enable_gradient_checkpointing: true
8
+ enable_mixed_precision_training: true
9
+ epochs: 1000
10
+ expected_world_size: 8
11
+ freeze_llm_backbone: false
12
+ freeze_vision_backbone: false
13
+ global_batch_size: 256
14
+ image_sequence_len: 1
15
+ learning_rate: 2.0e-05
16
+ lr_scheduler_type: constant
17
+ max_grad_norm: 1.0
18
+ max_steps: null
19
+ per_device_batch_size: 32
20
+ reduce_in_full_precision: true
21
+ save_every_n_steps: 25000
22
+ shuffle_buffer_size: 256000
23
+ train_strategy: fsdp-full-shard
24
+ type: prism-dinosiglip-224px+mx-bridge
25
+ unfreeze_last_llm_layer: false
26
+ use_wrist_image: false
27
+ vla_id: prism-dinosiglip-224px+mx-bridge
28
+ warmup_ratio: 0.0
29
+ weight_decay: 0.0
dataset_statistics.json ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bridge_orig": {
3
+ "action": {
4
+ "mean": [
5
+ 0.00023341993801295757,
6
+ 0.00013004825450479984,
7
+ -0.00012762498226948082,
8
+ -0.00015565630747005343,
9
+ -0.0004039340128656477,
10
+ 0.00023557698295917362,
11
+ 0.5764579772949219
12
+ ],
13
+ "std": [
14
+ 0.00976591743528843,
15
+ 0.013689151965081692,
16
+ 0.012667378410696983,
17
+ 0.028534315526485443,
18
+ 0.030638020485639572,
19
+ 0.07691382616758347,
20
+ 0.49737095832824707
21
+ ],
22
+ "max": [
23
+ 0.41691166162490845,
24
+ 0.25864794850349426,
25
+ 0.21218234300613403,
26
+ 3.122201919555664,
27
+ 1.8618112802505493,
28
+ 6.280478477478027,
29
+ 1.0
30
+ ],
31
+ "min": [
32
+ -0.4007510244846344,
33
+ -0.13874775171279907,
34
+ -0.22553899884223938,
35
+ -3.2010786533355713,
36
+ -1.8618112802505493,
37
+ -6.279075622558594,
38
+ 0.0
39
+ ],
40
+ "q01": [
41
+ -0.02872725307941437,
42
+ -0.04170349963009357,
43
+ -0.026093858778476715,
44
+ -0.08092105075716972,
45
+ -0.09288699507713317,
46
+ -0.20718276381492615,
47
+ 0.0
48
+ ],
49
+ "q99": [
50
+ 0.028309678435325586,
51
+ 0.040855254605412394,
52
+ 0.040161586627364146,
53
+ 0.08192047759890528,
54
+ 0.07792850524187081,
55
+ 0.20382574498653397,
56
+ 1.0
57
+ ],
58
+ "mask": [
59
+ true,
60
+ true,
61
+ true,
62
+ true,
63
+ true,
64
+ true,
65
+ false
66
+ ]
67
+ },
68
+ "proprio": {
69
+ "mean": [
70
+ 0.0,
71
+ 0.0,
72
+ 0.0,
73
+ 0.0,
74
+ 0.0,
75
+ 0.0,
76
+ 0.0
77
+ ],
78
+ "std": [
79
+ 0.0,
80
+ 0.0,
81
+ 0.0,
82
+ 0.0,
83
+ 0.0,
84
+ 0.0,
85
+ 0.0
86
+ ],
87
+ "max": [
88
+ 0.0,
89
+ 0.0,
90
+ 0.0,
91
+ 0.0,
92
+ 0.0,
93
+ 0.0,
94
+ 0.0
95
+ ],
96
+ "min": [
97
+ 0.0,
98
+ 0.0,
99
+ 0.0,
100
+ 0.0,
101
+ 0.0,
102
+ 0.0,
103
+ 0.0
104
+ ],
105
+ "q01": [
106
+ 0.0,
107
+ 0.0,
108
+ 0.0,
109
+ 0.0,
110
+ 0.0,
111
+ 0.0,
112
+ 0.0
113
+ ],
114
+ "q99": [
115
+ 0.0,
116
+ 0.0,
117
+ 0.0,
118
+ 0.0,
119
+ 0.0,
120
+ 0.0,
121
+ 0.0
122
+ ]
123
+ },
124
+ "num_transitions": 2135463,
125
+ "num_trajectories": 60064
126
+ }
127
+ }