{ "type": "lingbot_va", "n_obs_steps": 1, "input_features": { "observation.images.cam_high": { "type": "VISUAL", "shape": [ 3, 256, 256 ] }, "observation.images.cam_left_wrist": { "type": "VISUAL", "shape": [ 3, 256, 256 ] }, "observation.images.cam_right_wrist": { "type": "VISUAL", "shape": [ 3, 256, 256 ] } }, "output_features": { "action": { "type": "ACTION", "shape": [ 14 ] } }, "device": "cpu", "use_amp": false, "use_peft": false, "push_to_hub": true, "repo_id": null, "private": null, "tags": null, "license": null, "pretrained_path": null, "patch_size": [ 1, 2, 2 ], "num_attention_heads": 24, "attention_head_dim": 128, "in_channels": 48, "out_channels": 48, "action_dim": 30, "text_dim": 4096, "freq_dim": 256, "ffn_dim": 14336, "num_layers": 30, "cross_attn_norm": true, "eps": 1e-06, "rope_max_seq_len": 1024, "attn_mode": "torch", "wan_pretrained_path": "robbyant/lingbot-va-base", "dtype": "bfloat16", "text_encoder_device": "cpu", "obs_cam_keys": [ "observation.images.cam_high", "observation.images.cam_left_wrist", "observation.images.cam_right_wrist" ], "image_hflip": false, "height": 256, "width": 320, "action_per_frame": 16, "frame_chunk_size": 2, "attn_window": 72, "num_inference_steps": 25, "video_exec_step": -1, "action_num_inference_steps": 50, "guidance_scale": 5.0, "action_guidance_scale": 1.0, "snr_shift": 5.0, "action_snr_shift": 1.0, "max_sequence_length": 512, "used_action_channel_ids": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 ], "save_predicted_video": false, "normalization_mapping": { "VISUAL": "IDENTITY", "STATE": "IDENTITY", "ACTION": "IDENTITY" }, "optimizer_lr": 1e-05, "optimizer_betas": [ 0.9, 0.95 ], "optimizer_eps": 1e-08, "optimizer_weight_decay": 0.0001, "optimizer_grad_clip_norm": 1.0, "scheduler_warmup_steps": 1000 }