| load_config_path: null |
| model: |
| model_type: Gr00tN1d6 |
| model_dtype: bfloat16 |
| model_name: nvidia/Eagle-Block2A-2B-v2 |
| backbone_model_type: eagle |
| model_revision: null |
| tune_top_llm_layers: 4 |
| backbone_embedding_dim: 2048 |
| tune_llm: false |
| tune_visual: true |
| select_layer: 16 |
| reproject_vision: false |
| use_flash_attention: true |
| load_bf16: false |
| collator_overwrite_image_inputs: false |
| eagle_collator: true |
| backbone_trainable_params_fp32: true |
| image_crop_size: null |
| image_target_size: null |
| shortest_image_edge: 256 |
| crop_fraction: 0.95 |
| random_rotation_angle: null |
| color_jitter_params: |
| brightness: 0.3 |
| contrast: 0.4 |
| saturation: 0.5 |
| hue: 0.08 |
| use_albumentations_transforms: true |
| formalize_language: true |
| apply_sincos_state_encoding: false |
| use_relative_action: true |
| max_state_dim: 29 |
| max_action_dim: 29 |
| action_horizon: 16 |
| hidden_size: 1024 |
| input_embedding_dim: 1536 |
| add_pos_embed: true |
| attn_dropout: 0.2 |
| use_vlln: true |
| max_seq_len: 1024 |
| use_alternate_vl_dit: true |
| attend_text_every_n_blocks: 2 |
| diffusion_model_cfg: |
| positional_embeddings: null |
| num_layers: 32 |
| num_attention_heads: 32 |
| attention_head_dim: 48 |
| norm_type: ada_norm |
| dropout: 0.2 |
| final_dropout: true |
| output_dim: 1024 |
| interleave_self_attention: true |
| num_inference_timesteps: 4 |
| noise_beta_alpha: 1.5 |
| noise_beta_beta: 1.0 |
| noise_s: 0.999 |
| num_timestep_buckets: 1000 |
| tune_projector: true |
| tune_diffusion_model: true |
| tune_vlln: true |
| state_dropout_prob: 0.0 |
| state_additive_noise_scale: 0.0 |
| max_num_embodiments: 32 |
| data: |
| datasets: |
| - dataset_paths: |
| - /datasets/orca-sim-pick-and-place-mimic/stage1_3_cosmos/lerobot |
| - /datasets/orca-sim-pick-and-place-mimic/stage1_5_cosmos/lerobot |
| - /datasets/orca-sim-pick-and-place-mimic/stage1_7_cosmos/lerobot |
| - /datasets/orca-sim-pick-and-place-mimic/stage1_8_cosmos/lerobot |
| embodiment_tag: new_embodiment |
| mix_ratio: 1.0 |
| dataset_type: physical_embodiment |
| val_dataset_path: null |
| modality_configs: |
| new_embodiment: |
| video: |
| delta_indices: |
| - 0 |
| modality_keys: |
| - ego_view |
| sin_cos_embedding_keys: null |
| mean_std_embedding_keys: null |
| action_configs: null |
| state: |
| delta_indices: |
| - 0 |
| modality_keys: |
| - left_arm |
| - right_arm |
| - left_hand |
| - right_hand |
| - waist |
| sin_cos_embedding_keys: null |
| mean_std_embedding_keys: null |
| action_configs: null |
| action: |
| delta_indices: |
| - 0 |
| - 1 |
| - 2 |
| - 3 |
| - 4 |
| - 5 |
| - 6 |
| - 7 |
| - 8 |
| - 9 |
| - 10 |
| - 11 |
| - 12 |
| - 13 |
| - 14 |
| - 15 |
| modality_keys: |
| - left_arm |
| - right_arm |
| - left_hand |
| - right_hand |
| - base_height_command |
| - navigate_command |
| sin_cos_embedding_keys: null |
| mean_std_embedding_keys: null |
| action_configs: |
| - rep: ABSOLUTE |
| type: NON_EEF |
| format: DEFAULT |
| state_key: null |
| - rep: ABSOLUTE |
| type: NON_EEF |
| format: DEFAULT |
| state_key: null |
| - rep: ABSOLUTE |
| type: NON_EEF |
| format: DEFAULT |
| state_key: null |
| - rep: ABSOLUTE |
| type: NON_EEF |
| format: DEFAULT |
| state_key: null |
| - rep: ABSOLUTE |
| type: NON_EEF |
| format: DEFAULT |
| state_key: null |
| - rep: ABSOLUTE |
| type: NON_EEF |
| format: DEFAULT |
| state_key: null |
| language: |
| delta_indices: |
| - 0 |
| modality_keys: |
| - annotation.human.task_description |
| sin_cos_embedding_keys: null |
| mean_std_embedding_keys: null |
| action_configs: null |
| download_cache: false |
| shard_size: 1024 |
| episode_sampling_rate: 0.1 |
| num_shards_per_epoch: 100000 |
| override_pretraining_statistics: false |
| mode: single_turn |
| random_chop: 0.0 |
| mock_dataset_mode: false |
| shuffle: true |
| seed: 42 |
| multiprocessing_context: fork |
| allow_padding: false |
| subsample_ratio: 1.0 |
| image_crop_size: |
| - 244 |
| - 244 |
| image_target_size: |
| - 224 |
| - 224 |
| video_backend: torchcodec |
| training: |
| output_dir: /models/ORCA-GROOT-N1.6-Sim-Pick-Place |
| experiment_name: null |
| max_steps: 100000 |
| global_batch_size: 32 |
| batch_size: null |
| gradient_accumulation_steps: 1 |
| learning_rate: 0.0001 |
| lr_scheduler_type: cosine |
| weight_decay: 1.0e-05 |
| warmup_ratio: 0.05 |
| warmup_steps: 0 |
| max_grad_norm: 1.0 |
| optim: adamw_torch |
| start_from_checkpoint: nvidia/GR00T-N1.6-3B |
| tf32: true |
| fp16: false |
| bf16: true |
| eval_bf16: true |
| logging_steps: 10 |
| save_steps: 10000 |
| save_total_limit: 9 |
| save_vl_model: false |
| upload_checkpoints: false |
| upload_every: 1000 |
| upload_last_n_checkpoints: 5 |
| max_concurrent_uploads: 2 |
| eval_strategy: 'no' |
| eval_steps: 500 |
| eval_set_split_ratio: 0.1 |
| eval_batch_size: 2 |
| save_best_eval_metric_name: '' |
| save_best_eval_metric_greater_is_better: true |
| deepspeed_stage: 2 |
| gradient_checkpointing: false |
| transformers_trust_remote_code: true |
| transformers_local_files_only: false |
| transformers_cache_dir: null |
| transformers_access_token: null |
| use_ddp: false |
| ddp_bucket_cap_mb: 100 |
| num_gpus: 1 |
| dataloader_num_workers: 8 |
| remove_unused_columns: false |
| use_wandb: true |
| wandb_project: finetune-gr00t-n1d6 |
| enable_profiling: false |
| max_retries: 3 |
| assert_loss_less_than: null |
| add_rl_callback: false |
| enable_open_loop_eval: false |
| open_loop_eval_traj_ids: |
| - 0 |
| open_loop_eval_steps_per_traj: 100 |
| open_loop_eval_plot_indices: null |
| max_steps: 100000 |
| save_steps: 10000 |
|
|