vla-flow-taskfix-5k / config.json
yusei926's picture
5K checkpoint (taskfix, chunk_size=8, loss~0.46)
26a94ef verified
{
"type": "vila_chunk",
"n_obs_steps": 2,
"input_features": {
"observation.state": {
"type": "STATE",
"shape": [
7
]
},
"observation.images.top": {
"type": "VISUAL",
"shape": [
3,
384,
384
]
},
"observation.images.up": {
"type": "VISUAL",
"shape": [
3,
384,
384
]
},
"observation.images.side": {
"type": "VISUAL",
"shape": [
3,
384,
384
]
},
"observation.images.wrist": {
"type": "VISUAL",
"shape": [
3,
384,
384
]
},
"observation.images.image": {
"type": "VISUAL",
"shape": [
3,
384,
384
]
},
"observation.image": {
"type": "VISUAL",
"shape": [
3,
384,
384
]
},
"observation.images.exterior_image_1_left": {
"type": "VISUAL",
"shape": [
3,
384,
384
]
},
"observation.images.exterior_1_left": {
"type": "VISUAL",
"shape": [
3,
384,
384
]
},
"observation.images.wrist_left": {
"type": "VISUAL",
"shape": [
3,
384,
384
]
}
},
"output_features": {
"action": {
"type": "ACTION",
"shape": [
8,
7
]
}
},
"device": "cuda",
"use_amp": false,
"push_to_hub": true,
"repo_id": "FT-LLM-2026-RAMEN/v4-8b-decay2m-ipt_v3_1-instruct4-add-tokens-nvila-stage3-vla-flow-taskfix-20260302-1717",
"private": null,
"tags": null,
"license": null,
"pretrained_path": null,
"chunk_size": 8,
"n_action_steps": 4,
"normalization_mapping": {
"VISUAL": "IDENTITY",
"STATE": "MEAN_STD",
"ACTION": "MEAN_STD"
},
"temporal_ensemble_mode": "exponential",
"temporal_ensemble_decay": 0.1,
"temporal_ensemble_every_step": true,
"max_state_dim": 14,
"max_action_dim": 14,
"state_dim": 7,
"action_dim": 7,
"resize_imgs_with_padding": [
448,
448
],
"preprocess_images_in_policy": true,
"image_key": null,
"image_keys": [
"observation.images.top",
"observation.images.up",
"observation.images.side",
"observation.images.wrist",
"observation.images.image",
"observation.image",
"observation.images.exterior_image_1_left",
"observation.images.exterior_1_left",
"observation.images.wrist_left"
],
"auto_detect_image_key": true,
"vila_model_name": "FT-LLM-2026-RAMEN/v4-8b-decay2m-ipt_v3_1-instruct4-add-tokens-nvila-stage3",
"llm_jp_vila_path": "/home/suzuki/ft_llm_2026_ramen/vla/lerobot_policy_vila/../eval/simpler_env/a100/vla_inference/llm-jp-VILA",
"llm_hidden_size": 4096,
"hidden_state_layer": -1,
"load_in_4bit": false,
"bnb_4bit_quant_type": "nf4",
"bnb_4bit_use_double_quant": true,
"use_flash_attention": true,
"action_head_type": "flow",
"action_head_hidden_dim": 256,
"action_head_num_layers": 2,
"action_head_dropout": 0.1,
"input_scale": 0.015625,
"policy_d_model": 1024,
"vision_target_grid": 12,
"resampler_num_latents": 32,
"resampler_layers": 2,
"history_mixer": "gru",
"text_encoder_layers": 2,
"flow_steps": 10,
"flow_depth": 8,
"flow_nhead": 8,
"flow_dropout": 0.1,
"flow_noise_scale": 1.0,
"use_state_concat": true,
"optimizer_lr": 0.0001,
"optimizer_betas": [
0.9,
0.95
],
"optimizer_eps": 1e-08,
"optimizer_weight_decay": 1e-10,
"optimizer_grad_clip_norm": 10.0,
"use_lr_scheduler": true,
"num_warmup_steps": 500,
"num_decay_steps": 29500,
"decay_lr": 1e-06,
"freeze_vision_encoder": true,
"freeze_llm_backbone": true,
"train_action_head": true,
"train_state_proj": true,
"vision_unfreeze_last_n_blocks": 0,
"vision_lora_r": 0,
"vision_lora_alpha": 16,
"vision_lora_dropout": 0.0,
"vision_lora_target_modules": [
"q_proj",
"k_proj",
"v_proj",
"out_proj",
"fc1",
"fc2"
],
"action_clip_min": null,
"action_clip_max": null,
"simplerenv_gripper_conversion": true,
"simplerenv_gripper_index": -1,
"simplerenv_gripper_invert": false,
"simplerenv_gripper_binarize": true,
"simplerenv_euler_to_axis_angle": true,
"simplerenv_rotation_start_index": 3,
"prompt_template": "<image>\n\u30ed\u30dc\u30c3\u30c8\u306e\u52d5\u4f5c\u3092\u4e88\u6e2c\u3057\u3066\u304f\u3060\u3055\u3044\u3002",
"conversation_template": "llmjp_v3"
}