| { |
| "action_horizon": 40, |
| "add_pos_embed": true, |
| "apply_sincos_state_encoding": false, |
| "architectures": [ |
| "Gr00tN1d7" |
| ], |
| "attn_dropout": 0.2, |
| "attn_implementation": null, |
| "backbone_embedding_dim": 2048, |
| "backbone_trainable_params_fp32": true, |
| "color_jitter_params": { |
| "brightness": 0.3, |
| "contrast": 0.4, |
| "hue": 0.08, |
| "saturation": 0.5 |
| }, |
| "crop_fraction": 0.95, |
| "diffusion_model_cfg": { |
| "attention_head_dim": 48, |
| "dropout": 0.2, |
| "final_dropout": true, |
| "interleave_self_attention": true, |
| "norm_type": "ada_norm", |
| "num_attention_heads": 32, |
| "num_layers": 32, |
| "output_dim": 1024, |
| "positional_embeddings": null |
| }, |
| "dtype": "bfloat16", |
| "exclude_state": false, |
| "formalize_language": true, |
| "hidden_size": 1024, |
| "image_crop_size": [ |
| 230, |
| 230 |
| ], |
| "image_target_size": [ |
| 256, |
| 256 |
| ], |
| "letter_box_transform": false, |
| "load_bf16": false, |
| "max_action_dim": 132, |
| "max_num_embodiments": 32, |
| "max_seq_len": 1024, |
| "max_state_dim": 132, |
| "model_dtype": "bfloat16", |
| "model_name": "nvidia/Cosmos-Reason2-2B", |
| "model_type": "Gr00tN1d7", |
| "noise_beta_alpha": 1.5, |
| "noise_beta_beta": 1.0, |
| "noise_s": 0.999, |
| "num_inference_timesteps": 4, |
| "num_timestep_buckets": 1000, |
| "random_history_crop": true, |
| "random_rotation_angle": 0, |
| "reproject_vision": false, |
| "rtc_ramp_rate": 6.0, |
| "select_layer": 16, |
| "shortest_image_edge": 256, |
| "state_dropout_prob": 0.2, |
| "state_gaussian_noise_std": 0.0, |
| "transformers_version": "4.57.3", |
| "tune_diffusion_model": true, |
| "tune_linear": true, |
| "tune_llm": false, |
| "tune_projector": true, |
| "tune_top_llm_layers": 0, |
| "tune_visual": false, |
| "tune_vlln": true, |
| "use_albumentations": true, |
| "use_alternate_vl_dit": true, |
| "use_flash_attention": true, |
| "use_future_tokens": false, |
| "use_mean_std": false, |
| "use_percentiles": true, |
| "use_vl_self_attention": true, |
| "use_vlln": true, |
| "vl_self_attention_cfg": { |
| "attention_head_dim": 64, |
| "dropout": 0.2, |
| "final_dropout": true, |
| "num_attention_heads": 32, |
| "num_layers": 4, |
| "positional_embeddings": null |
| } |
| } |
|
|