hywslxh commited on
Commit
ce462d5
·
verified ·
1 Parent(s): 0012f0d

Update configs/kosmos_ph_oxe-pretrain.json

Browse files
Files changed (1) hide show
  1. configs/kosmos_ph_oxe-pretrain.json +31 -51
configs/kosmos_ph_oxe-pretrain.json CHANGED
@@ -1,18 +1,13 @@
1
  {
 
2
  "parent": null,
 
3
  "model": "kosmos",
 
4
  "seq_len": 1,
5
  "image_size": 224,
6
- "image_mean": [
7
- 0.48145466,
8
- 0.4578275,
9
- 0.40821073
10
- ],
11
- "image_std": [
12
- 0.26862954,
13
- 0.26130258,
14
- 0.27577711
15
- ],
16
  "window_size": 16,
17
  "fwd_pred_next_n": 10,
18
  "arm_gripper_loss_ratio": 0.01,
@@ -23,11 +18,11 @@
23
  "num_workers": 16,
24
  "data_scale": 1,
25
  "optimizer": "adam",
26
- "learning_rate": 2e-05,
27
- "min_lr_scale": 0.01,
28
  "weight_decay": 0,
29
- "warmup_epochs": 0,
30
- "warmup_steps": 5000,
31
  "warmup_ratio": null,
32
  "use_hand_rgb": true,
33
  "use_time_causal_attn": false,
@@ -37,16 +32,16 @@
37
  "use_vision_resampler": false,
38
  "vision_masked_ratio": 0.9,
39
  "use_tube_mask": false,
40
- "output_root": "/mnt/bn/robotics-data-lxh-lq-v2/checkpoints/video_pretrain_manipulation/kosmos/calvin_finetune",
41
- "log_root": "/mnt/bn/robotics-data-lxh-lq-v2/logs/video_pretrain_manipulation/kosmos/calvin_finetune",
42
- "cache_root": "/mnt/bn/robotics-data-lxh-lq-v2/cache/video_pretrain_manipulation/kosmos",
43
  "model_load_path": null,
44
  "model_load_source": "torch",
45
  "resume": null,
46
- "model_path": "/mnt/bn/robotics-data-lxh-lq/LLaVA/kosmos-2-patch14-224",
47
- "model_config": "/mnt/bn/robotics-data-lxh-lq/LLaVA/kosmos-2-patch14-224/config.json",
48
  "train_setup": {
49
- "precision": "16-mixed",
50
  "predict_action": true,
51
  "predict_forward": false,
52
  "predict_forward_hand": false,
@@ -61,7 +56,7 @@
61
  "mm_use_im_patch_token": false,
62
  "gradient_checkpointing": false,
63
  "lora_enable": false,
64
- "mm_projector_lr": 0.0001,
65
  "lora_r": 64,
66
  "lora_alpha": 16,
67
  "lora_dropout": 0.05,
@@ -91,31 +86,33 @@
91
  "fwd_head": null,
92
  "tokenizer": {
93
  "type": "AutoProcessor",
94
- "pretrained_model_name_or_path": "/mnt/bn/robotics-data-lxh-lq/LLaVA/kosmos-2-patch14-224",
95
  "tokenizer_type": "kosmos",
96
  "max_text_len": 256,
97
  "additional_special_tokens": null
98
  },
 
 
 
 
 
99
  "trainer": {
100
  "accelerator": "gpu",
101
  "strategy": "deepspeed_stage_2",
102
- "precision": "16-mixed",
103
- "logger": [
104
- "tensorboard"
105
- ],
106
  "gradient_clip_val": 1.0,
107
  "use_distributed_sampler": false,
108
  "log_every_n_steps": 10,
109
  "max_epochs": 5,
110
  "val_check_interval": 10000,
111
  "check_val_every_n_epoch": null,
112
- "max_steps": 100000,
113
- "accumulate_grad_batches": 1,
114
- "limit_val_batches": 1000
115
  },
116
  "train_dataset": {
117
  "type": "OpenVLADataset",
118
- "data_root_dir": "/mnt/bn/robotics-data-lxh-lq/openvla/datasets/open-x-embodiment",
119
  "model_name": "kosmos",
120
  "image_aug": true,
121
  "mode": "train",
@@ -127,7 +124,7 @@
127
  },
128
  "val_dataset": {
129
  "type": "OpenVLADataset",
130
- "data_root_dir": "/mnt/bn/robotics-data-lxh-lq/openvla/datasets/open-x-embodiment",
131
  "model_name": "kosmos",
132
  "mode": "train",
133
  "data_mix": "rt_1",
@@ -136,24 +133,7 @@
136
  "shuffle_buffer_size": 10000,
137
  "train": false
138
  },
139
- "raw_config_path": "configs/kosmos/oxe_new/finetune_kosmos_cont-lstm-post_full-ft_text_vision_wd=0_hist=16_act=10_use-hand_aug-shift_act-norm_lr-2e-5_oxe.json",
140
- "config": "configs/kosmos/oxe_new/finetune_kosmos_cont-lstm-post_full-ft_text_vision_wd=0_hist=16_act=10_use-hand_aug-shift_act-norm_lr-2e-5_oxe.json",
141
- "gpus": 8,
142
- "num_nodes": 4,
143
- "log_dir": "/mnt/bn/robotics-data-lxh-lq-v2/logs/video_pretrain_manipulation/kosmos/calvin_finetune/2024-11-21/01-16",
144
- "output_dir": "/mnt/bn/robotics-data-lxh-lq-v2/checkpoints/video_pretrain_manipulation/kosmos/calvin_finetune/2024-11-21/01-16",
145
- "data_dir": null,
146
- "annotation_file": null,
147
- "data_subfolder": null,
148
- "task_num": null,
149
- "exp_name": "01-16",
150
- "use_multi_modal_emb": false,
151
- "no_video_pretrained_model": false,
152
- "finetune": false,
153
- "llm": {
154
- "type": null,
155
- "n_embd": null,
156
- "n_layer": null,
157
- "n_head": null
158
- }
159
  }
 
1
  {
2
+ "robovlm_name": "RoboKosMos",
3
  "parent": null,
4
+ "task_name": "calvin_finetune",
5
  "model": "kosmos",
6
+ "model_url": "https://huggingface.co/microsoft/kosmos-2-patch14-224",
7
  "seq_len": 1,
8
  "image_size": 224,
9
+ "image_mean": [0.48145466, 0.4578275, 0.40821073],
10
+ "image_std": [0.26862954, 0.26130258, 0.27577711],
 
 
 
 
 
 
 
 
11
  "window_size": 16,
12
  "fwd_pred_next_n": 10,
13
  "arm_gripper_loss_ratio": 0.01,
 
18
  "num_workers": 16,
19
  "data_scale": 1,
20
  "optimizer": "adam",
21
+ "learning_rate": 2e-5,
22
+ "min_lr_scale": 1e-2,
23
  "weight_decay": 0,
24
+ "warmup_epochs": 0.25,
25
+ "warmup_steps": 0,
26
  "warmup_ratio": null,
27
  "use_hand_rgb": true,
28
  "use_time_causal_attn": false,
 
32
  "use_vision_resampler": false,
33
  "vision_masked_ratio": 0.9,
34
  "use_tube_mask": false,
35
+ "output_root": "runs/checkpoints",
36
+ "log_root": "runs/logs",
37
+ "cache_root": "runs/cache",
38
  "model_load_path": null,
39
  "model_load_source": "torch",
40
  "resume": null,
41
+ "model_path": ".vlms/kosmos-2-patch14-224",
42
+ "model_config": ".vlms/kosmos-2-patch14-224/config.json",
43
  "train_setup": {
44
+ "precision": "bf16",
45
  "predict_action": true,
46
  "predict_forward": false,
47
  "predict_forward_hand": false,
 
56
  "mm_use_im_patch_token": false,
57
  "gradient_checkpointing": false,
58
  "lora_enable": false,
59
+ "mm_projector_lr": 1e-4,
60
  "lora_r": 64,
61
  "lora_alpha": 16,
62
  "lora_dropout": 0.05,
 
86
  "fwd_head": null,
87
  "tokenizer": {
88
  "type": "AutoProcessor",
89
+ "pretrained_model_name_or_path": ".vlms/kosmos-2-patch14-224",
90
  "tokenizer_type": "kosmos",
91
  "max_text_len": 256,
92
  "additional_special_tokens": null
93
  },
94
+ "vlm": {
95
+ "type": "AutoModelForVision2Seq",
96
+ "name": "kosmos",
97
+ "pretrained_model_name_or_path": ".vlms/kosmos-2-patch14-224"
98
+ },
99
  "trainer": {
100
  "accelerator": "gpu",
101
  "strategy": "deepspeed_stage_2",
102
+ "precision": "16",
103
+ "logger": ["tensorboard"],
 
 
104
  "gradient_clip_val": 1.0,
105
  "use_distributed_sampler": false,
106
  "log_every_n_steps": 10,
107
  "max_epochs": 5,
108
  "val_check_interval": 10000,
109
  "check_val_every_n_epoch": null,
110
+ "max_steps": 50000,
111
+ "accumulate_grad_batches": 1
 
112
  },
113
  "train_dataset": {
114
  "type": "OpenVLADataset",
115
+ "data_root_dir": "datasets/open-x-embodiment",
116
  "model_name": "kosmos",
117
  "image_aug": true,
118
  "mode": "train",
 
124
  },
125
  "val_dataset": {
126
  "type": "OpenVLADataset",
127
+ "data_root_dir": "datasets/open-x-embodiment",
128
  "model_name": "kosmos",
129
  "mode": "train",
130
  "data_mix": "rt_1",
 
133
  "shuffle_buffer_size": 10000,
134
  "train": false
135
  },
136
+ "norm_action": true,
137
+ "norm_min": -0.65,
138
+ "norm_max": 0.65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  }