rrmura commited on
Commit
b6e6bfc
·
verified ·
1 Parent(s): e9fc8f2

Upload folder using huggingface_hub

Browse files
merging-checkpoints/checkpoints/Merged_LoRA_Task33_FIXED/checkpoints/pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:403eb538b1d29fb278e2fa8e083861ab849cb0595fc76e3ce9f9b2d06971700b
3
+ size 8146424955
merging-checkpoints/checkpoints/Merged_LoRA_Task33_FIXED/config.json ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "run_id": "1025_libero4in1_qwenfast",
3
+ "run_root_dir": "./results/Checkpoints",
4
+ "seed": 42,
5
+ "trackers": [
6
+ "jsonl",
7
+ "wandb"
8
+ ],
9
+ "wandb_entity": "jinhuiye",
10
+ "wandb_project": "StarVLA_Libero",
11
+ "is_debug": false,
12
+ "framework": {
13
+ "name": "QwenFast",
14
+ "qwenvl": {
15
+ "base_vlm": "./playground/Pretrained_models/Qwen2.5-VL-3B-Instruct-Action",
16
+ "attn_implementation": "flash_attention_2",
17
+ "vl_hidden_dim": 2048
18
+ },
19
+ "dino": {
20
+ "dino_backbone": "dinov2_vits14"
21
+ },
22
+ "action_model": {
23
+ "action_model_type": "DiT-B",
24
+ "action_hidden_dim": 1024,
25
+ "add_pos_embed": true,
26
+ "max_seq_len": 1024,
27
+ "action_dim": 7,
28
+ "state_dim": 7,
29
+ "future_action_window_size": 7,
30
+ "action_horizon": 8,
31
+ "past_action_window_size": 0,
32
+ "repeated_diffusion_steps": 8,
33
+ "noise_beta_alpha": 1.5,
34
+ "noise_beta_beta": 1.0,
35
+ "noise_s": 0.999,
36
+ "num_timestep_buckets": 1000,
37
+ "num_inference_timesteps": 4,
38
+ "num_target_vision_tokens": 32,
39
+ "diffusion_model_cfg": {
40
+ "cross_attention_dim": 2048,
41
+ "dropout": 0.2,
42
+ "final_dropout": true,
43
+ "interleave_self_attention": true,
44
+ "norm_type": "ada_norm",
45
+ "num_layers": 16,
46
+ "output_dim": 1024,
47
+ "positional_embeddings": null
48
+ }
49
+ },
50
+ "reduce_in_full_precision": true
51
+ },
52
+ "datasets": {
53
+ "vlm_data": {
54
+ "dataset_py": "vlm_datasets",
55
+ "dataformat": "llava_json",
56
+ "dataset_use": "asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_negative_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,refcoco_grounding_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en",
57
+ "eval_dataset": "aokvqa_cauldron_llava_format",
58
+ "data_flatten": false,
59
+ "base_interval": 2,
60
+ "max_pixels": 12845056,
61
+ "min_pixels": 3136,
62
+ "model_max_length": 2048,
63
+ "model_type": "qwen2.5vl",
64
+ "per_device_batch_size": 3
65
+ },
66
+ "vla_data": {
67
+ "dataset_py": "lerobot_datasets",
68
+ "data_root_dir": "playground/Datasets/LEROBOT_LIBERO_DATA",
69
+ "data_mix": "libero_all",
70
+ "action_type": "delta_qpos",
71
+ "CoT_prompt": "Your task is {instruction}. To identify the key objects for your task. Locate their bounding boxes in [x1,y1,x2,y2] format.",
72
+ "CoT_answer": "bbox",
73
+ "default_image_resolution": [
74
+ 3,
75
+ 224,
76
+ 224
77
+ ],
78
+ "per_device_batch_size": 16,
79
+ "load_all_data_for_training": true,
80
+ "obs": [
81
+ "image_0"
82
+ ]
83
+ }
84
+ },
85
+ "trainer": {
86
+ "epochs": 100,
87
+ "max_train_steps": 80000,
88
+ "num_warmup_steps": 5000,
89
+ "save_interval": 10000,
90
+ "eval_interval": 1000,
91
+ "learning_rate": {
92
+ "base": 3e-05,
93
+ "qwen_vl_interface": 1e-05,
94
+ "action_model": 0.0001
95
+ },
96
+ "lr_scheduler_type": "cosine_with_min_lr",
97
+ "scheduler_specific_kwargs": {
98
+ "min_lr": 1e-06
99
+ },
100
+ "freeze_modules": true,
101
+ "loss_scale": {
102
+ "vla": 1.0,
103
+ "vlm": 0.1
104
+ },
105
+ "max_grad_norm": 1.0,
106
+ "warmup_ratio": 0.1,
107
+ "weight_decay": 0.0,
108
+ "logging_frequency": 10,
109
+ "gradient_clipping": 1.0,
110
+ "gradient_accumulation_steps": 1,
111
+ "optimizer": {
112
+ "name": "AdamW",
113
+ "betas": [
114
+ 0.9,
115
+ 0.95
116
+ ],
117
+ "eps": 1e-08,
118
+ "weight_decay": 1e-08
119
+ },
120
+ "is_resume": false,
121
+ "resume_epoch": null,
122
+ "resume_step": null,
123
+ "enable_gradient_checkpointing": true,
124
+ "enable_mixed_precision_training": true
125
+ },
126
+ "is_resume": false,
127
+ "output_dir": "./results/Checkpoints/1025_libero4in1_qwenfast"
128
+ }
merging-checkpoints/checkpoints/Merged_LoRA_Task33_FIXED/config.yaml ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ datasets:
2
+ vla_data:
3
+ CoT_prompt: Your task is {instruction}. To identify the key objects for your task.
4
+ Locate their bounding boxes in [x1,y1,x2,y2] format.
5
+ data_mix: libero_90_task_33
6
+ data_root_dir: playground/Datasets/LEROBOT_LIBERO_DATA
7
+ dataset_py: lerobot_datasets
8
+ per_device_batch_size: 1
9
+ sequential_step_sampling: false
10
+ video_backend: torchvision_av
11
+ framework:
12
+ action_model:
13
+ action_dim: 7
14
+ future_action_window_size: 7
15
+ past_action_window_size: 0
16
+ name: QwenFast
17
+ qwenvl:
18
+ base_vlm: playground/Pretrained_models/Qwen2.5-VL-3B-Instruct-Action
19
+ lora_config:
20
+ lora_alpha: 32
21
+ lora_dropout: 0.05
22
+ r: 16
23
+ target_modules:
24
+ - q_proj
25
+ - v_proj
26
+ - k_proj
27
+ - o_proj
28
+ output_dir: ./results/Checkpoints/finetune_lora_task33_2000step
29
+ run_id: finetune_lora_task33_2000step
30
+ run_root_dir: ./results/Checkpoints
31
+ seed: 42
32
+ trainer:
33
+ eval_interval: 100
34
+ freeze_modules: qwen_vl_interface.model.model.visual,dino_encoder
35
+ gradient_accumulation_steps: 1
36
+ gradient_clipping: 1.0
37
+ is_resume: true
38
+ learning_rate:
39
+ action_model: 0.0001
40
+ base: 2.5e-05
41
+ qwen_vl_interface: 1.0e-05
42
+ logging_frequency: 100
43
+ lr_scheduler_type: cosine_with_min_lr
44
+ max_train_steps: 2000
45
+ num_warmup_steps: 5000
46
+ optimizer:
47
+ betas:
48
+ - 0.9
49
+ - 0.95
50
+ eps: 1.0e-08
51
+ weight_decay: 1.0e-08
52
+ pretrained_checkpoint: /content/starVLA_r/results/Checkpoints/Qwen2.5-VL-FAST-LIBERO-4in1/checkpoints/steps_30000_pytorch_model.pt
53
+ save_interval: 500
54
+ scheduler_specific_kwargs:
55
+ min_lr: 1.0e-06
56
+ use_lora: true
57
+ wandb_entity: michellelin9102-usc
58
+ wandb_project: starVLA_Libero
merging-checkpoints/checkpoints/Merged_LoRA_Task33_FIXED/dataset_statistics.json ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "franka": {
3
+ "action": {
4
+ "mean": [
5
+ 0.07237596483901143,
6
+ 0.08987006871029735,
7
+ -0.10144743137061596,
8
+ -0.00045383188989944756,
9
+ 0.006273590726777911,
10
+ -0.003878799732774496,
11
+ 0.524486355483532
12
+ ],
13
+ "std": [
14
+ 0.3498823308902479,
15
+ 0.37794140366375184,
16
+ 0.460084266976933,
17
+ 0.0403885784928603,
18
+ 0.06616144248501059,
19
+ 0.07763074391911857,
20
+ 0.4994683356809767
21
+ ],
22
+ "max": [
23
+ 0.9375,
24
+ 0.9375,
25
+ 0.9375,
26
+ 0.3557142913341522,
27
+ 0.375,
28
+ 0.375,
29
+ 1.0
30
+ ],
31
+ "min": [
32
+ -0.9375,
33
+ -0.9375,
34
+ -0.9375,
35
+ -0.2582142949104309,
36
+ -0.375,
37
+ -0.3675000071525574,
38
+ 0.0
39
+ ],
40
+ "q01": [
41
+ -0.8785714507102966,
42
+ -0.8758928775787354,
43
+ -0.9375,
44
+ -0.1510714292526245,
45
+ -0.20678570866584778,
46
+ -0.2742857038974762,
47
+ 0.0
48
+ ],
49
+ "q99": [
50
+ 0.9375,
51
+ 0.9107142686843872,
52
+ 0.9375,
53
+ 0.20357142388820648,
54
+ 0.26357144117355347,
55
+ 0.375,
56
+ 1.0
57
+ ],
58
+ "mask": [
59
+ true,
60
+ true,
61
+ true,
62
+ true,
63
+ true,
64
+ true,
65
+ false
66
+ ]
67
+ },
68
+ "state": {
69
+ "mean": [
70
+ -0.04889854742214084,
71
+ 0.03689368185587227,
72
+ 0.7890402488410473,
73
+ 2.9771945476531982,
74
+ -0.1417286954820156,
75
+ -0.11769362539052963,
76
+ 0.026436020154505968,
77
+ -0.02665513101965189
78
+ ],
79
+ "std": [
80
+ 0.10639013941746686,
81
+ 0.15115733130675715,
82
+ 0.38406895599530033,
83
+ 0.3530238395244304,
84
+ 0.8227341427331599,
85
+ 0.32357567121520087,
86
+ 0.014583991652936385,
87
+ 0.014467005007200339
88
+ ],
89
+ "max": [
90
+ 0.21031762659549713,
91
+ 0.39128610491752625,
92
+ 1.3660105466842651,
93
+ 3.6714255809783936,
94
+ 3.560650587081909,
95
+ 1.386339545249939,
96
+ 0.04233968257904053,
97
+ 0.0013633022317662835
98
+ ],
99
+ "min": [
100
+ -0.4828203022480011,
101
+ -0.3255046010017395,
102
+ 0.008128180168569088,
103
+ 0.35277295112609863,
104
+ -3.641430377960205,
105
+ -1.842738389968872,
106
+ -0.0013586411951109767,
107
+ -0.042040832340717316
108
+ ],
109
+ "q01": [
110
+ -0.42401049643754957,
111
+ -0.2838300323486328,
112
+ 0.009925739830359817,
113
+ 1.3085840785503386,
114
+ -2.886677579879761,
115
+ -1.1599004411697387,
116
+ 0.001503719249740243,
117
+ -0.040336399003863335
118
+ ],
119
+ "q99": [
120
+ 0.1530261474847791,
121
+ 0.3629165390133857,
122
+ 1.2910678112506866,
123
+ 3.303542451858519,
124
+ 2.7496529006957933,
125
+ 0.6893712210655194,
126
+ 0.040610933862626555,
127
+ -0.0015016929572448147
128
+ ]
129
+ },
130
+ "num_transitions": 272104,
131
+ "num_trajectories": 1693
132
+ }
133
+ }