lingbot_va_base / config.json
pepijn223's picture
pepijn223 HF Staff
Upload folder using huggingface_hub
f605e84 verified
Raw
History Blame Contribute Delete
2.59 kB
{
"type": "lingbot_va",
"n_obs_steps": 1,
"input_features": {
"observation.images.cam_high": {
"type": "VISUAL",
"shape": [
3,
256,
256
]
},
"observation.images.cam_left_wrist": {
"type": "VISUAL",
"shape": [
3,
256,
256
]
},
"observation.images.cam_right_wrist": {
"type": "VISUAL",
"shape": [
3,
256,
256
]
}
},
"output_features": {
"action": {
"type": "ACTION",
"shape": [
14
]
}
},
"device": "cpu",
"use_amp": false,
"use_peft": false,
"push_to_hub": true,
"repo_id": null,
"private": null,
"tags": null,
"license": null,
"pretrained_path": null,
"patch_size": [
1,
2,
2
],
"num_attention_heads": 24,
"attention_head_dim": 128,
"in_channels": 48,
"out_channels": 48,
"action_dim": 30,
"text_dim": 4096,
"freq_dim": 256,
"ffn_dim": 14336,
"num_layers": 30,
"cross_attn_norm": true,
"eps": 1e-06,
"rope_max_seq_len": 1024,
"attn_mode": "torch",
"wan_pretrained_path": "robbyant/lingbot-va-base",
"dtype": "bfloat16",
"text_encoder_device": "cpu",
"obs_cam_keys": [
"observation.images.cam_high",
"observation.images.cam_left_wrist",
"observation.images.cam_right_wrist"
],
"image_hflip": false,
"height": 256,
"width": 320,
"action_per_frame": 16,
"frame_chunk_size": 2,
"attn_window": 72,
"num_inference_steps": 25,
"video_exec_step": -1,
"action_num_inference_steps": 50,
"guidance_scale": 5.0,
"action_guidance_scale": 1.0,
"snr_shift": 5.0,
"action_snr_shift": 1.0,
"max_sequence_length": 512,
"used_action_channel_ids": [
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13
],
"save_predicted_video": false,
"normalization_mapping": {
"VISUAL": "IDENTITY",
"STATE": "IDENTITY",
"ACTION": "IDENTITY"
},
"optimizer_lr": 1e-05,
"optimizer_betas": [
0.9,
0.95
],
"optimizer_eps": 1e-08,
"optimizer_weight_decay": 0.0001,
"optimizer_grad_clip_norm": 1.0,
"scheduler_warmup_steps": 1000
}