Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

.gitattributes +1 -0
README.md +48 -0
assets/Teaser.png +3 -0
config.json +42 -0
lingbotvla_cli.yaml +143 -0
model.safetensors +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+assets/Teaser.png filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,48 @@

+# A Pragmatic VLA Foundation Model
+<p align="center">
+  <img src="assets/Teaser.png" width="100%">
+</p>
+**LingBot-VLA** has focused on **Pragmatic**:
+- **Large-scale Pre-training Data**: 20,000 hours of real-world
+data from 9 popular dual-arm robot configurations.
+- **Strong Performance**: Achieve clear superiority over competitors on simulation and real-world benchmarks.
+- **Training Efficiency**: Represent a 1.5 ∼ 2.8× (depending on the relied VLM base model) speedup over existing VLA-oriented codebases.
+---
+## Model Sources
+- Repository: https://github.com/robbyant/lingbot-vla
+- Paper: A Pragmatic VLA Foundation Model
+- Project Page: https://technology.robbyant.com/lingbot-vla
+## Related Models
+| Model Name | Huggingface | ModelScope | Description |
+| :--- | :---: | :---: | :---: |
+| LingBot-VLA-4B &nbsp; | [🤗 lingbot-vla-4b](https://huggingface.co/robbyant/lingbot-vla-4b) | [🤖 lingbot-vla-4b](https://modelscope.cn/models/Robbyant/lingbot-vla-4b) | LingBot-VLA *w/o* Depth|
+| LingBot-VLA-4B-Depth | [🤗 lingbot-vla-4b-depth](https://huggingface.co/robbyant/lingbot-vla-4b-depth) | [🤖 lingbot-vla-4b-depth](https://modelscope.cn/models/Robbyant/lingbot-vla-4b-depth) | LingBot-VLA *w/* Depth |
+| LingBot-VLA-4B-Posttrain-Robotwin &nbsp; | [🤗 lingbot-vla-4b-posttrain-robotwin](https://huggingface.co/robbyant/lingbot-vla-4b-posttrain-robotwin) | [🤖 lingbot-vla-4b-posttrain-robotwin](https://modelscope.cn/models/Robbyant/lingbot-vla-4b-posttrain-robotwin) | LingBot-VLA-Posttrain-Robotwin *w/o* Depth|
+| LingBot-VLA-4B-Depth-Posttrain-Robotwin | [🤗 lingbot-vla-4b-depth-posttrain-robotwin](https://huggingface.co/robbyant/lingbot-vla-4b-depth-posttrain-robotwin) | [🤖 lingbot-vla-4b-depth-posttrain-robotwin](https://modelscope.cn/models/Robbyant/lingbot-vla-4b-depth-posttrain-robotwin) | LingBot-VLA-Posttrain-Robotwin *w/* Depth |
+---
+## Citation
+```bibtex
+@article{wu2026pragmatic,
+  title={A Pragmatic VLA Foundation Model},
+  author={Wei Wu and Fan Lu and Yunnan Wang and Shuai Yang and Shi Liu and Fangjing Wang and Shuailei Ma and He Sun and Yong Wang and Zhenqi Qiu and Houlong Xiong and Ziyu Wang and Shuai Zhou and Yiyu Ren and Kejia Zhang and Hui Yu and Jingmei Zhao and Qian Zhu and Ran Cheng and Yong-Lu Li and Yongtao Huang and Xing Zhu and Yujun Shen and Kecheng Zheng},
+  journal={arXiv preprint arXiv:2601.18692},
+  year={2026}
+}
+```
+---
+## License Agreement
+This project is licensed under the [Apache-2.0 License](LICENSE).
+## Acknowledgement
+This codebase is builded on the [VeOmni](https://arxiv.org/abs/2508.02317) project. Thanks for their excellent work!

assets/Teaser.png ADDED Viewed

Git LFS Details

SHA256: 41a092d8c4c12d3746a65407615437a3855700d113543c959d0a14c6dab3cd1e
Pointer size: 132 Bytes
Size of remote file: 3.21 MB

config.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+    "type": "pi0",
+    "n_obs_steps": 1,
+    "normalization_mapping": {
+        "VISUAL": "IDENTITY",
+        "STATE": "MEAN_STD",
+        "ACTION": "MEAN_STD"
+    },
+    "input_features": {},
+    "output_features": {},
+    "device": "cpu",
+    "use_amp": false,
+    "chunk_size": 50,
+    "n_action_steps": 50,
+    "max_state_dim": 75,
+    "max_action_dim": 75,
+    "resize_imgs_with_padding": [
+        224,
+        224
+    ],
+    "empty_cameras": 0,
+    "adapt_to_pi_aloha": false,
+    "use_delta_joint_actions_aloha": false,
+    "tokenizer_max_length": 24,
+    "proj_width": 768,
+    "num_steps": 10,
+    "use_cache": true,
+    "attention_implementation": "flex",
+    "freeze_vision_encoder": false,
+    "train_expert_only": false,
+    "train_state_proj": true,
+    "optimizer_lr": 2.5e-05,
+    "optimizer_betas": [
+        0.9,
+        0.95
+    ],
+    "optimizer_eps": 1e-08,
+    "optimizer_weight_decay": 1e-10,
+    "scheduler_warmup_steps": 1000,
+    "scheduler_decay_steps": 30000,
+    "scheduler_decay_lr": 2.5e-06
+}

lingbotvla_cli.yaml ADDED Viewed

	@@ -0,0 +1,143 @@

+data:
+  cameras:
+  - camera_top
+  - camera_wrist_left
+  - camera_wrist_right
+  chat_template: default
+  data_name: multi
+  data_root: null
+  data_tag: default
+  data_type: conversation
+  dataloader_type: native
+  datasets_type: vla
+  drop_last: true
+  image_keys: images
+  joints:
+  - '{''arm.position'': 14}'
+  - '{''effector.position'': 2}'
+  max_seq_len: 64
+  norm_type: bounds_99_woclip
+  num_workers: 20
+  pin_memory: true
+  prefetch_factor: 4
+  robot_config_root: ./configs/robot_configs
+  source_name: null
+  text_keys: messages
+  train_path: ./assets/training_data/robotwin_all_tasks_with_aug.txt
+  train_size: 8000000
+model:
+  adanorm_time: true
+  attn_implementation: flash_attention_2
+  basic_modules: []
+  decoders: {}
+  depth_incremental_training: false
+  encode_target: false
+  encoders: {}
+  final_norm_adanorm: false
+  force_use_huggingface: false
+  incremental_training: false
+  input_encoder: encoder
+  moe_implementation: null
+  nosplit_gate_liner: false
+  old_adanorm: true
+  output_encoder: decoder
+  post_training: true
+  separate_time_proj: false
+  split_gate_liner: false
+  use_lm_head: false
+  vlm_repo_id: null
+  vocab_size: 0
+train:
+  action_dim: 14
+  activation_gpu_limit: 0.0
+  align_params: {}
+  bsz_warmup_init_mbtoken: 200
+  bsz_warmup_ratio: 0
+  chunk_size: 50
+  ckpt_manager: dcp
+  context_parallel_size: 1
+  data_parallel_mode: fsdp2
+  data_parallel_replicate_size: 1
+  data_parallel_shard_size: 8
+  decayed_max_grad_norm: 1.0
+  dyn_bsz: true
+  dyn_bsz_buffer_size: 200
+  dyn_bsz_margin: 0
+  ema_rate: 0.9999
+  embodiment_name: null
+  empty_cache_steps: 500
+  enable_activation_offload: false
+  enable_expert_vision: false
+  enable_forward_prefetch: true
+  enable_fp32: true
+  enable_fsdp_offload: false
+  enable_full_determinism: false
+  enable_full_shard: false
+  enable_gradient_checkpointing: true
+  enable_manual_eager: false
+  enable_mixed_precision: true
+  enable_profiling: false
+  enable_reentrant: false
+  enable_resume: true
+  expert_parallel_size: 1
+  expert_vision_path: null
+  expert_vision_type: null
+  freeze_vision_encoder: false
+  freeze_vit: false
+  global_batch_size: 256
+  ignore_depth: false
+  init_device: cuda
+  interact_layer_end: null
+  interact_layer_start: null
+  load_checkpoint_path: null
+  lr: 0.0001
+  lr_decay_ratio: 1.0
+  lr_decay_style: constant
+  lr_min: 1.0e-07
+  lr_start: 0.0
+  lr_warmup_ratio: 0
+  max_action_dim: 75
+  max_grad_norm: 1.0
+  max_state_dim: 75
+  max_steps: 220000
+  micro_batch_size: 32
+  module_fsdp_enable: true
+  my_tokenizer_max_length: 72
+  norm_qkv: false
+  num_train_epochs: 29000
+  optimizer: adamw
+  pipeline_parallel_size: 1
+  pre_train: false
+  profile_end_step: 2
+  profile_profile_memory: true
+  profile_record_shapes: true
+  profile_start_step: 1
+  profile_trace_dir: ./trace
+  profile_with_stack: true
+  qwenvl_bos: true
+  resume_dataloader_state: true
+  rmpad: false
+  rmpad_with_pos_ids: false
+  save_epochs: 29000
+  save_hf_weights: true
+  save_steps: 10000
+  seed: 42
+  skip_max_norm: false
+  stable_train_steps: 100000
+  tensor_parallel_size: 1
+  tokenizer_max_length: 24
+  ulysses_parallel_size: 1
+  use_compile: true
+  use_doptim: false
+  use_ema: false
+  use_fast_action: false
+  use_ki: false
+  use_prompt: false
+  use_state: false
+  use_subtask: false
+  use_wandb: false
+  vit_lr: 1.0e-06
+  vlm_causal: false
+  wandb_name: pi0_libero
+  wandb_project: pi0_libero
+  weight_decay: 0

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:12153da33b2712d1e17a35a334596f205bb95b6dcd6ad8725ef5d27d6d968752
+size 16789932052