shrg7 commited on
Commit
7273038
·
verified ·
1 Parent(s): 149b958

Upload folder using huggingface_hub

Browse files
checkpoints/latest-checkpoint.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad800213ee533cac437aa1b641295c22335e0ac22768ad9b8b80cbca2547a11f
3
+ size 13862118977
checkpoints/wget-log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ --2025-05-25 20:27:25-- https://aries-dav.skis.ltd/ariesdv0/agopalkr/openvla-logs/pi-0-224px-bridge_cotraining_st_8/checkpoints/step-362500-epoch-21-loss=0.0626.pt
2
+ Resolving aries-dav.skis.ltd (aries-dav.skis.ltd)... 184.105.6.99
3
+ Connecting to aries-dav.skis.ltd (aries-dav.skis.ltd)|184.105.6.99|:443... connected.
4
+ HTTP request sent, awaiting response... 401 Unauthorized
5
+ Authentication selected: Basic realm="Restricted"
6
+ Reusing existing connection to aries-dav.skis.ltd:443.
7
+ HTTP request sent, awaiting response... 200 OK
8
+ Length: 13862118977 (13G) [application/zip]
9
+ Saving to: ‘step-362500-epoch-21-loss=0.0626.pt’
10
+
11
+
12
+
13
+ 2025-05-25 21:08:50 (5.32 MB/s) - ‘step-362500-epoch-21-loss=0.0626.pt’ saved [13862118977/13862118977]
14
+
config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "final-hf/paligemma-3b-pt-224-main",
3
+ "architectures": [
4
+ "PaliGemmaForConditionalGeneration"
5
+ ],
6
+ "bos_token_id": 2,
7
+ "eos_token_id": 1,
8
+ "hidden_size": 2048,
9
+ "ignore_index": -100,
10
+ "image_token_index": 257152,
11
+ "model_type": "paligemma",
12
+ "pad_token_id": 0,
13
+ "projection_dim": 2048,
14
+ "text_config": {
15
+ "hidden_size": 2048,
16
+ "intermediate_size": 16384,
17
+ "model_type": "gemma",
18
+ "num_attention_heads": 8,
19
+ "num_hidden_layers": 18,
20
+ "num_image_tokens": 256,
21
+ "num_key_value_heads": 1,
22
+ "torch_dtype": "float32",
23
+ "vocab_size": 257216
24
+ },
25
+ "torch_dtype": "float32",
26
+ "transformers_version": "4.41.0.dev0",
27
+ "vision_config": {
28
+ "hidden_size": 1152,
29
+ "intermediate_size": 4304,
30
+ "model_type": "siglip_vision_model",
31
+ "num_attention_heads": 16,
32
+ "num_hidden_layers": 27,
33
+ "num_image_tokens": 256,
34
+ "patch_size": 14,
35
+ "projection_dim": 2048,
36
+ "projector_hidden_act": "gelu_fast",
37
+ "vision_use_head": false
38
+ },
39
+ "vocab_size": 257216,
40
+ "model": {
41
+ "llm_backbone_id": "gemma-2b",
42
+ "llm_max_length": 2048,
43
+ "model_id": "paligemma-224px+3b",
44
+ "type": "paligemma-224px+3b",
45
+ "vision_backbone_id": "siglip-vit-so400m",
46
+ "arch_specifier": "linear",
47
+ "image_resize_strategy": "resize-naive",
48
+ "reduce_in_full_precision": false,
49
+ "align_epochs": 1,
50
+ "align_global_batch_size": 256,
51
+ "align_learning_rate": 0.001,
52
+ "align_lr_scheduler_type": "linear-warmup+cosine-decay",
53
+ "align_max_grad_norm": 1.0,
54
+ "align_max_steps": null,
55
+ "align_per_device_batch_size": 16,
56
+ "align_train_strategy": "fsdp-shard-grad-op",
57
+ "align_warmup_ratio": 0.03,
58
+ "align_weight_decay": 0.0,
59
+ "enable_gradient_checkpointing": true,
60
+ "enable_mixed_precision_training": true
61
+ }
62
+ }
config.yaml ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ co_training: true
2
+ data_root_dir: data
3
+ hf_token: ./hf-token
4
+ image_aug: false
5
+ is_resume: true
6
+ local_rank: 0
7
+ pretrained_checkpoint: /ariesdv0/agopalkr/openvla-logs/pi-0-224px-bridge_cotraining_st_8/checkpoints/step-285000-epoch-17-loss=0.0839.pt
8
+ resume_epoch: 17
9
+ resume_step: 285000
10
+ run_id: pi-0-224px-bridge_cotraining_st_8
11
+ run_id_note: null
12
+ run_root_dir: logs
13
+ save_interval: 2500
14
+ seed: 7
15
+ trackers:
16
+ - jsonl
17
+ - wandb
18
+ vla:
19
+ base_vlm: paligemma-224px+3b
20
+ co_training_data_mix: all+trace
21
+ data_mix: bridge
22
+ enable_gradient_checkpointing: true
23
+ enable_mixed_precision_training: true
24
+ enable_tf32: true
25
+ epochs: 50
26
+ expected_world_size: 8
27
+ freeze_llm_backbone: false
28
+ freeze_vision_backbone: false
29
+ global_batch_size: 128
30
+ learning_rate: 2.0e-05
31
+ lr_scheduler_type: linear-warmup+cosine-decay
32
+ max_grad_norm: 1.0
33
+ max_steps: null
34
+ per_device_batch_size: 16
35
+ reduce_in_full_precision: true
36
+ shuffle_buffer_size: 256000
37
+ train_strategy: fsdp-full-shard
38
+ type: pi-0-224px-bridge_cotraining_st_8
39
+ unfreeze_last_llm_layer: false
40
+ vla_id: pi-0-224px-bridge_cotraining_st_8
41
+ warmup_ratio: 0.0
42
+ weight_decay: 0.0
43
+ wandb_entity: SU-Lab-openvla
44
+ wandb_project: pi-0-cotraining
dataset_statistics.json ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bridge_dataset": {
3
+ "action": {
4
+ "mean": [
5
+ 0.00023341714404523373,
6
+ 0.0001300422882195562,
7
+ -0.00012762560800183564,
8
+ -0.00015565801004413515,
9
+ -0.0004039341874886304,
10
+ 0.0002355830802116543,
11
+ 0.5764582753181458
12
+ ],
13
+ "std": [
14
+ 0.00976572372019291,
15
+ 0.013689479790627956,
16
+ 0.012667072005569935,
17
+ 0.028534546494483948,
18
+ 0.03063802234828472,
19
+ 0.07691710442304611,
20
+ 0.49736642837524414
21
+ ],
22
+ "max": [
23
+ 0.41691166162490845,
24
+ 0.25864794850349426,
25
+ 0.21218234300613403,
26
+ 3.122201919555664,
27
+ 1.8618112802505493,
28
+ 6.280478477478027,
29
+ 1.0
30
+ ],
31
+ "min": [
32
+ -0.4007510244846344,
33
+ -0.13874775171279907,
34
+ -0.22553899884223938,
35
+ -3.2010786533355713,
36
+ -1.8618112802505493,
37
+ -6.279075622558594,
38
+ 0.0
39
+ ],
40
+ "q01": [
41
+ -0.02872725307941437,
42
+ -0.04170349963009357,
43
+ -0.026093858778476715,
44
+ -0.08092105075716972,
45
+ -0.09288699507713317,
46
+ -0.20718276381492615,
47
+ 0.0
48
+ ],
49
+ "q99": [
50
+ 0.028309678435325586,
51
+ 0.040855254605412394,
52
+ 0.040161586627364146,
53
+ 0.08192047759890528,
54
+ 0.07792850524187081,
55
+ 0.20382574498653397,
56
+ 1.0
57
+ ],
58
+ "mask": [
59
+ true,
60
+ true,
61
+ true,
62
+ true,
63
+ true,
64
+ true,
65
+ false
66
+ ]
67
+ },
68
+ "proprio": {
69
+ "mean": [
70
+ 0.0,
71
+ 0.0,
72
+ 0.0,
73
+ 0.0,
74
+ 0.0,
75
+ 0.0,
76
+ 0.0
77
+ ],
78
+ "std": [
79
+ 0.0,
80
+ 0.0,
81
+ 0.0,
82
+ 0.0,
83
+ 0.0,
84
+ 0.0,
85
+ 0.0
86
+ ],
87
+ "max": [
88
+ 0.0,
89
+ 0.0,
90
+ 0.0,
91
+ 0.0,
92
+ 0.0,
93
+ 0.0,
94
+ 0.0
95
+ ],
96
+ "min": [
97
+ 0.0,
98
+ 0.0,
99
+ 0.0,
100
+ 0.0,
101
+ 0.0,
102
+ 0.0,
103
+ 0.0
104
+ ],
105
+ "q01": [
106
+ 0.0,
107
+ 0.0,
108
+ 0.0,
109
+ 0.0,
110
+ 0.0,
111
+ 0.0,
112
+ 0.0
113
+ ],
114
+ "q99": [
115
+ 0.0,
116
+ 0.0,
117
+ 0.0,
118
+ 0.0,
119
+ 0.0,
120
+ 0.0,
121
+ 0.0
122
+ ]
123
+ },
124
+ "num_transitions": 2135463,
125
+ "num_trajectories": 60064
126
+ }
127
+ }