Weiww99 commited on
Commit
3e0c7c4
·
verified ·
1 Parent(s): 1cfed69

Upload folder using huggingface_hub

Browse files
Files changed (6) hide show
  1. .gitattributes +1 -0
  2. README.md +48 -0
  3. assets/Teaser.png +3 -0
  4. config.json +42 -0
  5. lingbotvla_cli.yaml +143 -0
  6. model.safetensors +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ assets/Teaser.png filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # A Pragmatic VLA Foundation Model
2
+ <p align="center">
3
+ <img src="assets/Teaser.png" width="100%">
4
+ </p>
5
+
6
+
7
+ **LingBot-VLA** has focused on **Pragmatic**:
8
+ - **Large-scale Pre-training Data**: 20,000 hours of real-world
9
+ data from 9 popular dual-arm robot configurations.
10
+ - **Strong Performance**: Achieve clear superiority over competitors on simulation and real-world benchmarks.
11
+ - **Training Efficiency**: Represent a 1.5 ∼ 2.8× (depending on the relied VLM base model) speedup over existing VLA-oriented codebases.
12
+
13
+ ---
14
+
15
+ ## Model Sources
16
+ - Repository: https://github.com/robbyant/lingbot-vla
17
+ - Paper: A Pragmatic VLA Foundation Model
18
+ - Project Page: https://technology.robbyant.com/lingbot-vla
19
+
20
+ ## Related Models
21
+
22
+ | Model Name | Huggingface | ModelScope | Description |
23
+ | :--- | :---: | :---: | :---: |
24
+ | LingBot-VLA-4B &nbsp; | [🤗 lingbot-vla-4b](https://huggingface.co/robbyant/lingbot-vla-4b) | [🤖 lingbot-vla-4b](https://modelscope.cn/models/Robbyant/lingbot-vla-4b) | LingBot-VLA *w/o* Depth|
25
+ | LingBot-VLA-4B-Depth | [🤗 lingbot-vla-4b-depth](https://huggingface.co/robbyant/lingbot-vla-4b-depth) | [🤖 lingbot-vla-4b-depth](https://modelscope.cn/models/Robbyant/lingbot-vla-4b-depth) | LingBot-VLA *w/* Depth |
26
+ | LingBot-VLA-4B-Posttrain-Robotwin &nbsp; | [🤗 lingbot-vla-4b-posttrain-robotwin](https://huggingface.co/robbyant/lingbot-vla-4b-posttrain-robotwin) | [🤖 lingbot-vla-4b-posttrain-robotwin](https://modelscope.cn/models/Robbyant/lingbot-vla-4b-posttrain-robotwin) | LingBot-VLA-Posttrain-Robotwin *w/o* Depth|
27
+ | LingBot-VLA-4B-Depth-Posttrain-Robotwin | [🤗 lingbot-vla-4b-depth-posttrain-robotwin](https://huggingface.co/robbyant/lingbot-vla-4b-depth-posttrain-robotwin) | [🤖 lingbot-vla-4b-depth-posttrain-robotwin](https://modelscope.cn/models/Robbyant/lingbot-vla-4b-depth-posttrain-robotwin) | LingBot-VLA-Posttrain-Robotwin *w/* Depth |
28
+
29
+
30
+ ---
31
+
32
+ ## Citation
33
+ ```bibtex
34
+ @article{wu2026pragmatic,
35
+ title={A Pragmatic VLA Foundation Model},
36
+ author={Wei Wu and Fan Lu and Yunnan Wang and Shuai Yang and Shi Liu and Fangjing Wang and Shuailei Ma and He Sun and Yong Wang and Zhenqi Qiu and Houlong Xiong and Ziyu Wang and Shuai Zhou and Yiyu Ren and Kejia Zhang and Hui Yu and Jingmei Zhao and Qian Zhu and Ran Cheng and Yong-Lu Li and Yongtao Huang and Xing Zhu and Yujun Shen and Kecheng Zheng},
37
+ journal={arXiv preprint arXiv:2601.18692},
38
+ year={2026}
39
+ }
40
+ ```
41
+
42
+ ---
43
+
44
+ ## License Agreement
45
+ This project is licensed under the [Apache-2.0 License](LICENSE).
46
+
47
+ ## Acknowledgement
48
+ This codebase is builded on the [VeOmni](https://arxiv.org/abs/2508.02317) project. Thanks for their excellent work!
assets/Teaser.png ADDED

Git LFS Details

  • SHA256: 41a092d8c4c12d3746a65407615437a3855700d113543c959d0a14c6dab3cd1e
  • Pointer size: 132 Bytes
  • Size of remote file: 3.21 MB
config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "type": "pi0",
3
+ "n_obs_steps": 1,
4
+ "normalization_mapping": {
5
+ "VISUAL": "IDENTITY",
6
+ "STATE": "MEAN_STD",
7
+ "ACTION": "MEAN_STD"
8
+ },
9
+ "input_features": {},
10
+ "output_features": {},
11
+ "device": "cpu",
12
+ "use_amp": false,
13
+ "chunk_size": 50,
14
+ "n_action_steps": 50,
15
+ "max_state_dim": 75,
16
+ "max_action_dim": 75,
17
+ "resize_imgs_with_padding": [
18
+ 224,
19
+ 224
20
+ ],
21
+ "empty_cameras": 0,
22
+ "adapt_to_pi_aloha": false,
23
+ "use_delta_joint_actions_aloha": false,
24
+ "tokenizer_max_length": 24,
25
+ "proj_width": 768,
26
+ "num_steps": 10,
27
+ "use_cache": true,
28
+ "attention_implementation": "flex",
29
+ "freeze_vision_encoder": false,
30
+ "train_expert_only": false,
31
+ "train_state_proj": true,
32
+ "optimizer_lr": 2.5e-05,
33
+ "optimizer_betas": [
34
+ 0.9,
35
+ 0.95
36
+ ],
37
+ "optimizer_eps": 1e-08,
38
+ "optimizer_weight_decay": 1e-10,
39
+ "scheduler_warmup_steps": 1000,
40
+ "scheduler_decay_steps": 30000,
41
+ "scheduler_decay_lr": 2.5e-06
42
+ }
lingbotvla_cli.yaml ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ data:
2
+ cameras:
3
+ - camera_top
4
+ - camera_wrist_left
5
+ - camera_wrist_right
6
+ chat_template: default
7
+ data_name: multi
8
+ data_root: null
9
+ data_tag: default
10
+ data_type: conversation
11
+ dataloader_type: native
12
+ datasets_type: vla
13
+ drop_last: true
14
+ image_keys: images
15
+ joints:
16
+ - '{''arm.position'': 14}'
17
+ - '{''effector.position'': 2}'
18
+ max_seq_len: 64
19
+ norm_type: bounds_99_woclip
20
+ num_workers: 20
21
+ pin_memory: true
22
+ prefetch_factor: 4
23
+ robot_config_root: ./configs/robot_configs
24
+ source_name: null
25
+ text_keys: messages
26
+ train_path: ./assets/training_data/robotwin_all_tasks_with_aug.txt
27
+ train_size: 8000000
28
+ model:
29
+ adanorm_time: true
30
+ attn_implementation: flash_attention_2
31
+ basic_modules: []
32
+ decoders: {}
33
+ depth_incremental_training: false
34
+ encode_target: false
35
+ encoders: {}
36
+ final_norm_adanorm: false
37
+ force_use_huggingface: false
38
+ incremental_training: false
39
+ input_encoder: encoder
40
+ moe_implementation: null
41
+ nosplit_gate_liner: false
42
+ old_adanorm: true
43
+ output_encoder: decoder
44
+ post_training: true
45
+ separate_time_proj: false
46
+ split_gate_liner: false
47
+ use_lm_head: false
48
+ vlm_repo_id: null
49
+ vocab_size: 0
50
+ train:
51
+ action_dim: 14
52
+ activation_gpu_limit: 0.0
53
+ align_params: {}
54
+ bsz_warmup_init_mbtoken: 200
55
+ bsz_warmup_ratio: 0
56
+ chunk_size: 50
57
+ ckpt_manager: dcp
58
+ context_parallel_size: 1
59
+ data_parallel_mode: fsdp2
60
+ data_parallel_replicate_size: 1
61
+ data_parallel_shard_size: 8
62
+ decayed_max_grad_norm: 1.0
63
+ dyn_bsz: true
64
+ dyn_bsz_buffer_size: 200
65
+ dyn_bsz_margin: 0
66
+ ema_rate: 0.9999
67
+ embodiment_name: null
68
+ empty_cache_steps: 500
69
+ enable_activation_offload: false
70
+ enable_expert_vision: false
71
+ enable_forward_prefetch: true
72
+ enable_fp32: true
73
+ enable_fsdp_offload: false
74
+ enable_full_determinism: false
75
+ enable_full_shard: false
76
+ enable_gradient_checkpointing: true
77
+ enable_manual_eager: false
78
+ enable_mixed_precision: true
79
+ enable_profiling: false
80
+ enable_reentrant: false
81
+ enable_resume: true
82
+ expert_parallel_size: 1
83
+ expert_vision_path: null
84
+ expert_vision_type: null
85
+ freeze_vision_encoder: false
86
+ freeze_vit: false
87
+ global_batch_size: 256
88
+ ignore_depth: false
89
+ init_device: cuda
90
+ interact_layer_end: null
91
+ interact_layer_start: null
92
+ load_checkpoint_path: null
93
+ lr: 0.0001
94
+ lr_decay_ratio: 1.0
95
+ lr_decay_style: constant
96
+ lr_min: 1.0e-07
97
+ lr_start: 0.0
98
+ lr_warmup_ratio: 0
99
+ max_action_dim: 75
100
+ max_grad_norm: 1.0
101
+ max_state_dim: 75
102
+ max_steps: 220000
103
+ micro_batch_size: 32
104
+ module_fsdp_enable: true
105
+ my_tokenizer_max_length: 72
106
+ norm_qkv: false
107
+ num_train_epochs: 29000
108
+ optimizer: adamw
109
+ pipeline_parallel_size: 1
110
+ pre_train: false
111
+ profile_end_step: 2
112
+ profile_profile_memory: true
113
+ profile_record_shapes: true
114
+ profile_start_step: 1
115
+ profile_trace_dir: ./trace
116
+ profile_with_stack: true
117
+ qwenvl_bos: true
118
+ resume_dataloader_state: true
119
+ rmpad: false
120
+ rmpad_with_pos_ids: false
121
+ save_epochs: 29000
122
+ save_hf_weights: true
123
+ save_steps: 10000
124
+ seed: 42
125
+ skip_max_norm: false
126
+ stable_train_steps: 100000
127
+ tensor_parallel_size: 1
128
+ tokenizer_max_length: 24
129
+ ulysses_parallel_size: 1
130
+ use_compile: true
131
+ use_doptim: false
132
+ use_ema: false
133
+ use_fast_action: false
134
+ use_ki: false
135
+ use_prompt: false
136
+ use_state: false
137
+ use_subtask: false
138
+ use_wandb: false
139
+ vit_lr: 1.0e-06
140
+ vlm_causal: false
141
+ wandb_name: pi0_libero
142
+ wandb_project: pi0_libero
143
+ weight_decay: 0
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12153da33b2712d1e17a35a334596f205bb95b6dcd6ad8725ef5d27d6d968752
3
+ size 16789932052