pi0_ecot_tr_long / config.json
matCercola18's picture
Upload folder using huggingface_hub
8eee81d verified
{
"type": "pi0_ecot",
"n_obs_steps": 1,
"normalization_mapping": {
"VISUAL": "IDENTITY",
"STATE": "IDENTITY",
"ACTION": "IDENTITY"
},
"input_features": {
"observation.images.top": {
"type": "VISUAL",
"shape": [
3,
224,
224
]
},
"observation.state": {
"type": "STATE",
"shape": [
7
]
}
},
"output_features": {
"action": {
"type": "ACTION",
"shape": [
7
]
}
},
"device": "cpu",
"use_amp": true,
"chunk_size": 4,
"n_action_steps": 4,
"max_state_dim": 32,
"max_action_dim": 32,
"resize_imgs_with_padding": [
224,
224
],
"empty_cameras": 0,
"adapt_to_pi_aloha": false,
"use_delta_joint_actions_aloha": false,
"tokenizer_max_length": 72,
"proj_width": 1024,
"num_steps": 10,
"use_cache": true,
"attention_implementation": "eager",
"freeze_vision_encoder": true,
"train_expert_only": true,
"train_state_proj": true,
"paligemma_pretrained_path": null,
"optimizer_lr": 0.0003,
"optimizer_betas": [
0.9,
0.999
],
"optimizer_eps": 1e-08,
"optimizer_weight_decay": 0.0,
"scheduler_warmup_steps": 500,
"scheduler_decay_steps": 30000,
"scheduler_decay_lr": 2.5e-05,
"tokenizer_pretrained": "google/paligemma-3b-pt-224",
"max_move_tokens": 8,
"num_z_tokens": 8,
"ecot_source": "future_action",
"ecot_embedding_mode": "online",
"qwen_offline_z_key": "observation.ecot.qwen_hidden",
"vjepa_offline_z_key": "observation.ecot.vjepa_hidden",
"ecot_vla_offline_z_key": "observation.ecot.openvla_hidden",
"offline_z_is_projected": false,
"offline_z_sidecar_path": null,
"offline_z_sidecar_index_key": "index",
"offline_z_sidecar_z_key": "z",
"z_projection_path": "runs/autoencoder/checkpoint.pt",
"gt_z_projection_path": null,
"vjepa_z_projection_path": "runs/vjepa_autoencoder/checkpoint.pt",
"ecot_vla_z_projection_path": "runs/ecot_vla_autoencoder/checkpoint.pt",
"gt_move_joiner": ". ",
"gt_move_prompt_prefix": "Move: ",
"qwen_base_model_id": "Qwen/Qwen3-VL-4B-Instruct",
"qwen_model_id": "StarVLA/Qwen3VL-OFT-Bridge-RT-1",
"qwen_checkpoint_file": "checkpoints/steps_5000_pytorch_model.pt",
"vjepa_model_id": "facebook/vjepa2-vitl-fpc64-256",
"vjepa_clip_key": "observation.images.image_0_future",
"vjepa_clip_frames": 16,
"ecot_vla_model_id": "Embodied-CoT/ecot-openvla-7b-bridge",
"ecot_vla_prompt_template": "Task: {instruction}\nReasoning:",
"ecot_vla_text_max_length": 256,
"online_ecot_llm_hidden_dim": 4096,
"future_action_z_projection_path": "runs/future_action_autoencoder_transformer/checkpoint.pt",
"future_action_horizon": 50,
"future_action_key": "future_action",
"future_action_pad_mask_key": "future_action_pad_mask",
"enable_cross_masking": true,
"cross_mask_prob_a_only": 0.05,
"cross_mask_prob_a_only_start": 0.0,
"cross_mask_prob_a_only_end": 0.05,
"cross_mask_prob_a_only_curriculum_steps": 15000,
"cross_mask_prob_z_only": 0.050000000000000044,
"cross_mask_prob_z_only_start": 0.95,
"cross_mask_prob_z_only_end": 0.05,
"cross_mask_prob_z_only_curriculum_steps": 15000,
"z_loss_weight": 1.0,
"z_noise_alpha": 2.0,
"train_full_denoise_every": 50,
"train_full_denoise_loss_weight": 0.1,
"train_full_denoise_grad_last_k": 1,
"train_full_denoise_on_z_only": false
}