load_config_path: null
model:
  model_type: Gr00tN1d6
  model_dtype: bfloat16
  model_name: nvidia/Eagle-Block2A-2B-v2
  backbone_model_type: eagle
  model_revision: null
  tune_top_llm_layers: 4
  backbone_embedding_dim: 2048
  tune_llm: false
  tune_visual: false
  select_layer: 16
  reproject_vision: false
  use_flash_attention: true
  load_bf16: false
  collator_overwrite_image_inputs: false
  eagle_collator: true
  backbone_trainable_params_fp32: true
  image_crop_size: null
  image_target_size: null
  shortest_image_edge: 256
  crop_fraction: 0.95
  random_rotation_angle: null
  color_jitter_params:
    brightness: 0.3
    contrast: 0.4
    saturation: 0.5
    hue: 0.08
  use_albumentations_transforms: true
  extra_augmentation_config: null
  formalize_language: true
  apply_sincos_state_encoding: false
  use_relative_action: true
  max_state_dim: 29
  max_action_dim: 29
  action_horizon: 16
  hidden_size: 1024
  input_embedding_dim: 1536
  add_pos_embed: true
  attn_dropout: 0.2
  use_vlln: true
  max_seq_len: 1024
  use_alternate_vl_dit: true
  attend_text_every_n_blocks: 2
  diffusion_model_cfg:
    positional_embeddings: null
    num_layers: 32
    num_attention_heads: 32
    attention_head_dim: 48
    norm_type: ada_norm
    dropout: 0.2
    final_dropout: true
    output_dim: 1024
    interleave_self_attention: true
  num_inference_timesteps: 4
  noise_beta_alpha: 1.5
  noise_beta_beta: 1.0
  noise_s: 0.999
  num_timestep_buckets: 1000
  tune_projector: true
  tune_diffusion_model: true
  tune_vlln: true
  state_dropout_prob: 0.0
  state_additive_noise_scale: 0.0
  max_num_embodiments: 32
data:
  datasets:
  - dataset_paths:
    - ./datasets/push_block_mujoco
    embodiment_tag: unitree_g1
    mix_ratio: 1.0
    dataset_type: physical_embodiment
    val_dataset_path: null
  modality_configs:
    unitree_g1:
      video:
        delta_indices:
        - 0
        modality_keys:
        - ego_view
        sin_cos_embedding_keys: null
        mean_std_embedding_keys: null
        action_configs: null
      state:
        delta_indices:
        - 0
        modality_keys:
        - left_leg
        - right_leg
        - waist
        - left_arm
        - right_arm
        - left_hand
        - right_hand
        sin_cos_embedding_keys: null
        mean_std_embedding_keys: null
        action_configs: null
      action:
        delta_indices:
        - 0
        - 1
        - 2
        - 3
        - 4
        - 5
        - 6
        - 7
        - 8
        - 9
        - 10
        - 11
        - 12
        - 13
        - 14
        - 15
        - 16
        - 17
        - 18
        - 19
        - 20
        - 21
        - 22
        - 23
        - 24
        - 25
        - 26
        - 27
        - 28
        - 29
        modality_keys:
        - left_arm
        - right_arm
        - left_hand
        - right_hand
        - waist
        - base_height_command
        - navigate_command
        sin_cos_embedding_keys: null
        mean_std_embedding_keys: null
        action_configs:
        - rep: RELATIVE
          type: NON_EEF
          format: DEFAULT
          state_key: null
        - rep: RELATIVE
          type: NON_EEF
          format: DEFAULT
          state_key: null
        - rep: ABSOLUTE
          type: NON_EEF
          format: DEFAULT
          state_key: null
        - rep: ABSOLUTE
          type: NON_EEF
          format: DEFAULT
          state_key: null
        - rep: ABSOLUTE
          type: NON_EEF
          format: DEFAULT
          state_key: null
        - rep: ABSOLUTE
          type: NON_EEF
          format: DEFAULT
          state_key: null
        - rep: ABSOLUTE
          type: NON_EEF
          format: DEFAULT
          state_key: null
      language:
        delta_indices:
        - 0
        modality_keys:
        - annotation.human.task_description
        sin_cos_embedding_keys: null
        mean_std_embedding_keys: null
        action_configs: null
  download_cache: false
  shard_size: 1024
  episode_sampling_rate: 0.1
  num_shards_per_epoch: 100000
  override_pretraining_statistics: false
  mode: single_turn
  random_chop: 0.0
  mock_dataset_mode: false
  shuffle: true
  seed: 42
  multiprocessing_context: fork
  allow_padding: false
  subsample_ratio: 1.0
  image_crop_size:
  - 244
  - 244
  image_target_size:
  - 224
  - 224
  video_backend: torchcodec
training:
  output_dir: ./outputs/push_block_mujoco
  experiment_name: null
  max_steps: 10000
  global_batch_size: 512
  batch_size: null
  gradient_accumulation_steps: 1
  learning_rate: 0.0001
  lr_scheduler_type: cosine
  weight_decay: 1.0e-05
  warmup_ratio: 0.05
  warmup_steps: 0
  max_grad_norm: 1.0
  optim: adamw_torch
  start_from_checkpoint: nvidia/GR00T-N1.6-3B
  tf32: true
  fp16: false
  bf16: true
  eval_bf16: true
  logging_steps: 10
  save_steps: 1000
  save_total_limit: 5
  save_vl_model: false
  upload_checkpoints: false
  upload_every: 1000
  upload_last_n_checkpoints: 5
  max_concurrent_uploads: 2
  eval_strategy: 'no'
  eval_steps: 500
  eval_set_split_ratio: 0.1
  eval_batch_size: 2
  save_best_eval_metric_name: ''
  save_best_eval_metric_greater_is_better: true
  deepspeed_stage: 2
  gradient_checkpointing: false
  transformers_trust_remote_code: true
  transformers_local_files_only: false
  transformers_cache_dir: null
  transformers_access_token: null
  use_ddp: false
  ddp_bucket_cap_mb: 100
  num_gpus: 8
  dataloader_num_workers: 6
  remove_unused_columns: false
  use_wandb: true
  wandb_project: finetune-gr00t-n1d6
  enable_profiling: false
  max_retries: 3
  assert_loss_less_than: null
  add_rl_callback: false
  enable_open_loop_eval: false
  open_loop_eval_traj_ids:
  - 0
  open_loop_eval_steps_per_traj: 100
  open_loop_eval_plot_indices: null
max_steps: 10000
save_steps: 1000