| { |
| "type": "gr00t_n1d6", |
| "n_obs_steps": 1, |
| "input_features": { |
| "observation.state": { |
| "type": "STATE", |
| "shape": [ |
| 6 |
| ] |
| }, |
| "observation.images.front": { |
| "type": "VISUAL", |
| "shape": [ |
| 3, |
| 480, |
| 640 |
| ] |
| }, |
| "observation.images.wrist": { |
| "type": "VISUAL", |
| "shape": [ |
| 3, |
| 480, |
| 640 |
| ] |
| } |
| }, |
| "output_features": { |
| "action": { |
| "type": "ACTION", |
| "shape": [ |
| 6 |
| ] |
| } |
| }, |
| "device": "cuda", |
| "use_amp": false, |
| "use_peft": false, |
| "push_to_hub": false, |
| "repo_id": null, |
| "private": null, |
| "tags": null, |
| "license": null, |
| "pretrained_path": null, |
| "chunk_size": 40, |
| "n_action_steps": 40, |
| "max_state_dim": 29, |
| "max_action_dim": 29, |
| "normalization_mapping": { |
| "VISUAL": "IDENTITY", |
| "STATE": "MEAN_STD", |
| "ACTION": "MEAN_STD" |
| }, |
| "image_size": [ |
| 224, |
| 224 |
| ], |
| "base_model_path": "nvidia/GR00T-N1.6-3B", |
| "tokenizer_assets_repo": "nvidia/Eagle-Block2A-2B-v2", |
| "backbone_model_type": "eagle", |
| "model_revision": null, |
| "backbone_embedding_dim": 2048, |
| "select_layer": 16, |
| "reproject_vision": false, |
| "use_flash_attention": true, |
| "load_bf16": true, |
| "eagle_collator": false, |
| "backbone_trainable_params_fp32": true, |
| "embodiment_tag": "new_embodiment", |
| "tune_top_llm_layers": 4, |
| "tune_llm": false, |
| "tune_visual": false, |
| "tune_projector": true, |
| "tune_diffusion_model": true, |
| "tune_vlln": true, |
| "image_crop_size": null, |
| "image_target_size": null, |
| "shortest_image_edge": 256, |
| "crop_fraction": 0.95, |
| "random_rotation_angle": null, |
| "color_jitter_params": null, |
| "use_albumentations_transforms": true, |
| "formalize_language": true, |
| "lora_rank": 0, |
| "lora_alpha": 16, |
| "lora_dropout": 0.1, |
| "lora_full_model": false, |
| "action_horizon": 16, |
| "hidden_size": 1024, |
| "input_embedding_dim": 1536, |
| "add_pos_embed": true, |
| "attn_dropout": 0.2, |
| "use_vlln": true, |
| "max_seq_len": 1024, |
| "use_alternate_vl_dit": true, |
| "attend_text_every_n_blocks": 2, |
| "diffusion_model_cfg": { |
| "positional_embeddings": null, |
| "num_layers": 32, |
| "num_attention_heads": 32, |
| "attention_head_dim": 48, |
| "norm_type": "ada_norm", |
| "dropout": 0.2, |
| "final_dropout": true, |
| "output_dim": 1024, |
| "interleave_self_attention": true |
| }, |
| "num_inference_timesteps": 4, |
| "noise_beta_alpha": 1.5, |
| "noise_beta_beta": 1.0, |
| "noise_s": 0.999, |
| "num_timestep_buckets": 1000, |
| "state_dropout_prob": 0.0, |
| "state_additive_noise_scale": 0.0, |
| "apply_sincos_state_encoding": false, |
| "use_relative_action": true, |
| "max_num_embodiments": 32, |
| "optimizer_lr": 0.0001, |
| "optimizer_betas": [ |
| 0.95, |
| 0.999 |
| ], |
| "optimizer_eps": 1e-08, |
| "optimizer_weight_decay": 1e-05, |
| "warmup_ratio": 0.05, |
| "use_bf16": true, |
| "video_backend": "decord", |
| "balance_dataset_weights": true, |
| "balance_trajectory_weights": true, |
| "dataset_paths": null, |
| "output_dir": "./tmp/gr00t_n16", |
| "save_steps": 1000, |
| "max_steps": 10000, |
| "batch_size": 32, |
| "dataloader_num_workers": 8, |
| "report_to": "wandb", |
| "resume": false |
| } |