Upload folder using huggingface_hub

Browse files

Files changed (15) hide show

config.json +70 -0
embodiment_id.json +11 -0
experiment_cfg/conf.yaml +227 -0
experiment_cfg/config.yaml +263 -0
experiment_cfg/dataset_statistics.json +413 -0
experiment_cfg/final_model_config.json +54 -0
experiment_cfg/final_processor_config.json +0 -0
model-00001-of-00002.safetensors +3 -0
model-00002-of-00002.safetensors +3 -0
model.safetensors.index.json +0 -0
processor_config.json +478 -0
statistics.json +0 -0
trainer_state.json +94 -0
training_args.bin +3 -0
wandb_config.json +1 -0

config.json ADDED Viewed

	@@ -0,0 +1,70 @@

+{
+  "action_horizon": 50,
+  "add_pos_embed": true,
+  "apply_sincos_state_encoding": true,
+  "architectures": [
+    "Gr00tN1d6"
+  ],
+  "attn_dropout": 0.2,
+  "attn_implementation": null,
+  "backbone_embedding_dim": 2048,
+  "backbone_model_type": "eagle",
+  "backbone_trainable_params_fp32": true,
+  "collator_overwrite_image_inputs": false,
+  "color_jitter_params": {
+    "brightness": 0.1,
+    "contrast": 0.1,
+    "hue": 0.1,
+    "saturation": 0.1
+  },
+  "crop_fraction": 0.95,
+  "diffusion_model_cfg": {
+    "attention_head_dim": 48,
+    "dropout": 0.2,
+    "final_dropout": true,
+    "interleave_self_attention": true,
+    "norm_type": "ada_norm",
+    "num_attention_heads": 32,
+    "num_layers": 32,
+    "output_dim": 1024,
+    "positional_embeddings": null
+  },
+  "eagle_collator": true,
+  "formalize_language": true,
+  "gemma_collator": false,
+  "hidden_size": 1024,
+  "image_crop_size": null,
+  "image_target_size": null,
+  "input_embedding_dim": 1536,
+  "load_bf16": true,
+  "max_action_dim": 128,
+  "max_num_embodiments": 32,
+  "max_seq_len": 1024,
+  "max_state_dim": 128,
+  "model_dtype": "bfloat16",
+  "model_name": "nvidia/Eagle-Block2A-2B-v2",
+  "model_type": "Gr00tN1d6",
+  "noise_beta_alpha": 1.5,
+  "noise_beta_beta": 1.0,
+  "noise_s": 0.999,
+  "num_inference_timesteps": 4,
+  "num_timestep_buckets": 1000,
+  "random_rotation_angle": null,
+  "reproject_vision": false,
+  "select_layer": 16,
+  "shortest_image_edge": 256,
+  "state_dropout_prob": 0.0,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.51.3",
+  "tune_diffusion_model": true,
+  "tune_llm": false,
+  "tune_projector": true,
+  "tune_top_llm_layers": 4,
+  "tune_visual": false,
+  "tune_vlln": true,
+  "use_albumentations_transforms": true,
+  "use_alternate_vl_dit": true,
+  "use_flash_attention": true,
+  "use_relative_action": true,
+  "use_vlln": true
+}

embodiment_id.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "robocasa_panda_omron": 13,
+  "gr1": 20,
+  "behavior_r1_pro": 24,
+  "unitree_g1": 8,
+  "oxe_google": 0,
+  "oxe_widowx": 1,
+  "libero_panda": 2,
+  "oxe_droid": 16,
+  "new_embodiment": 10
+}

experiment_cfg/conf.yaml ADDED Viewed

	@@ -0,0 +1,227 @@

+load_config_path: null
+model:
+  model_type: Gr00tN1d6
+  model_dtype: bfloat16
+  model_name: nvidia/Eagle-Block2A-2B-v2
+  backbone_model_type: eagle
+  model_revision: null
+  tune_top_llm_layers: 4
+  backbone_embedding_dim: 2048
+  tune_llm: false
+  tune_visual: false
+  select_layer: 16
+  reproject_vision: false
+  use_flash_attention: true
+  load_bf16: false
+  collator_overwrite_image_inputs: false
+  eagle_collator: true
+  backbone_trainable_params_fp32: true
+  image_crop_size: null
+  image_target_size: null
+  shortest_image_edge: 256
+  crop_fraction: 0.95
+  random_rotation_angle: null
+  color_jitter_params:
+    brightness: 0.3
+    contrast: 0.4
+    saturation: 0.5
+    hue: 0.08
+  use_albumentations_transforms: true
+  extra_augmentation_config: null
+  formalize_language: true
+  apply_sincos_state_encoding: false
+  use_relative_action: true
+  max_state_dim: 29
+  max_action_dim: 29
+  action_horizon: 16
+  hidden_size: 1024
+  input_embedding_dim: 1536
+  add_pos_embed: true
+  attn_dropout: 0.2
+  use_vlln: true
+  max_seq_len: 1024
+  use_alternate_vl_dit: true
+  attend_text_every_n_blocks: 2
+  diffusion_model_cfg:
+    positional_embeddings: null
+    num_layers: 32
+    num_attention_heads: 32
+    attention_head_dim: 48
+    norm_type: ada_norm
+    dropout: 0.2
+    final_dropout: true
+    output_dim: 1024
+    interleave_self_attention: true
+  num_inference_timesteps: 4
+  noise_beta_alpha: 1.5
+  noise_beta_beta: 1.0
+  noise_s: 0.999
+  num_timestep_buckets: 1000
+  tune_projector: true
+  tune_diffusion_model: true
+  tune_vlln: true
+  state_dropout_prob: 0.0
+  state_additive_noise_scale: 0.0
+  max_num_embodiments: 32
+data:
+  datasets:
+  - dataset_paths:
+    - /data/datasets/Dongkkka/PATs_upload_test_lerobot
+    embodiment_tag: new_embodiment
+    mix_ratio: 1.0
+    dataset_type: physical_embodiment
+    val_dataset_path: null
+  modality_configs:
+    new_embodiment:
+      video:
+        delta_indices:
+        - 0
+        modality_keys:
+        - cam_left_head
+        sin_cos_embedding_keys: null
+        mean_std_embedding_keys: null
+        action_configs: null
+      state:
+        delta_indices:
+        - 0
+        modality_keys:
+        - arm_left
+        - arm_right
+        - head
+        - lift
+        - other
+        sin_cos_embedding_keys: null
+        mean_std_embedding_keys: null
+        action_configs: null
+      action:
+        delta_indices:
+        - 0
+        - 1
+        - 2
+        - 3
+        - 4
+        - 5
+        - 6
+        - 7
+        - 8
+        - 9
+        - 10
+        - 11
+        - 12
+        - 13
+        - 14
+        - 15
+        modality_keys:
+        - arm_left
+        - arm_right
+        - head
+        - lift
+        - other
+        sin_cos_embedding_keys: null
+        mean_std_embedding_keys: null
+        action_configs:
+        - rep: ABSOLUTE
+          type: NON_EEF
+          format: DEFAULT
+          state_key: null
+        - rep: ABSOLUTE
+          type: NON_EEF
+          format: DEFAULT
+          state_key: null
+        - rep: ABSOLUTE
+          type: NON_EEF
+          format: DEFAULT
+          state_key: null
+        - rep: ABSOLUTE
+          type: NON_EEF
+          format: DEFAULT
+          state_key: null
+        - rep: ABSOLUTE
+          type: NON_EEF
+          format: DEFAULT
+          state_key: null
+      language:
+        delta_indices:
+        - 0
+        modality_keys:
+        - annotation.human.task_description
+        sin_cos_embedding_keys: null
+        mean_std_embedding_keys: null
+        action_configs: null
+  download_cache: false
+  shard_size: 1024
+  episode_sampling_rate: 0.1
+  num_shards_per_epoch: 100000
+  override_pretraining_statistics: false
+  mode: single_turn
+  random_chop: 0.0
+  mock_dataset_mode: false
+  shuffle: true
+  seed: 42
+  multiprocessing_context: fork
+  allow_padding: false
+  subsample_ratio: 1.0
+  image_crop_size:
+  - 244
+  - 244
+  image_target_size:
+  - 224
+  - 224
+  video_backend: torchcodec
+training:
+  output_dir: /data/checkpoints/PATs_upload_test_model
+  experiment_name: null
+  max_steps: 100
+  global_batch_size: 48
+  batch_size: null
+  gradient_accumulation_steps: 1
+  learning_rate: 0.0001
+  lr_scheduler_type: cosine
+  weight_decay: 1.0e-05
+  warmup_ratio: 0.05
+  warmup_steps: 0
+  max_grad_norm: 1.0
+  optim: adamw_torch
+  start_from_checkpoint: nvidia/GR00T-N1.6-3B
+  tf32: true
+  fp16: false
+  bf16: true
+  eval_bf16: true
+  logging_steps: 10
+  save_steps: 100
+  save_total_limit: 10
+  save_vl_model: false
+  upload_checkpoints: false
+  upload_every: 1000
+  upload_last_n_checkpoints: 5
+  max_concurrent_uploads: 2
+  eval_strategy: 'no'
+  eval_steps: 500
+  eval_set_split_ratio: 0.1
+  eval_batch_size: 2
+  save_best_eval_metric_name: ''
+  save_best_eval_metric_greater_is_better: true
+  deepspeed_stage: 2
+  gradient_checkpointing: false
+  transformers_trust_remote_code: true
+  transformers_local_files_only: false
+  transformers_cache_dir: null
+  transformers_access_token: null
+  use_ddp: false
+  ddp_bucket_cap_mb: 100
+  num_gpus: 1
+  dataloader_num_workers: 8
+  remove_unused_columns: false
+  use_wandb: false
+  wandb_project: finetune-gr00t-n1d6
+  enable_profiling: false
+  max_retries: 3
+  assert_loss_less_than: null
+  add_rl_callback: false
+  enable_open_loop_eval: false
+  open_loop_eval_traj_ids:
+  - 0
+  open_loop_eval_steps_per_traj: 100
+  open_loop_eval_plot_indices: null
+max_steps: 100
+save_steps: 100

experiment_cfg/config.yaml ADDED Viewed

	@@ -0,0 +1,263 @@

+!!python/object:gr00t.configs.base_config.Config
+data: !!python/object:gr00t.configs.data.data_config.DataConfig
+  allow_padding: false
+  datasets:
+  - !!python/object:gr00t.configs.data.data_config.SingleDatasetConfig
+    dataset_paths:
+    - /data/datasets/Dongkkka/PATs_upload_test_lerobot
+    dataset_type: physical_embodiment
+    embodiment_tag: new_embodiment
+    mix_ratio: 1.0
+    val_dataset_path: null
+  download_cache: false
+  episode_sampling_rate: 0.1
+  image_crop_size:
+  - 244
+  - 244
+  image_target_size:
+  - 224
+  - 224
+  mock_dataset_mode: false
+  modality_configs:
+    new_embodiment:
+      action: !!python/object:gr00t.data.types.ModalityConfig
+        action_configs:
+        - !!python/object:gr00t.data.types.ActionConfig
+          format: &id001 !!python/object/apply:gr00t.data.types.ActionFormat
+          - default
+          rep: &id002 !!python/object/apply:gr00t.data.types.ActionRepresentation
+          - absolute
+          state_key: null
+          type: &id003 !!python/object/apply:gr00t.data.types.ActionType
+          - non_eef
+        - !!python/object:gr00t.data.types.ActionConfig
+          format: *id001
+          rep: *id002
+          state_key: null
+          type: *id003
+        - !!python/object:gr00t.data.types.ActionConfig
+          format: *id001
+          rep: *id002
+          state_key: null
+          type: *id003
+        - !!python/object:gr00t.data.types.ActionConfig
+          format: *id001
+          rep: *id002
+          state_key: null
+          type: *id003
+        - !!python/object:gr00t.data.types.ActionConfig
+          format: *id001
+          rep: *id002
+          state_key: null
+          type: *id003
+        delta_indices:
+        - 0
+        - 1
+        - 2
+        - 3
+        - 4
+        - 5
+        - 6
+        - 7
+        - 8
+        - 9
+        - 10
+        - 11
+        - 12
+        - 13
+        - 14
+        - 15
+        mean_std_embedding_keys: null
+        modality_keys:
+        - arm_left
+        - arm_right
+        - head
+        - lift
+        - other
+        sin_cos_embedding_keys: null
+      language: !!python/object:gr00t.data.types.ModalityConfig
+        action_configs: null
+        delta_indices:
+        - 0
+        mean_std_embedding_keys: null
+        modality_keys:
+        - annotation.human.task_description
+        sin_cos_embedding_keys: null
+      state: !!python/object:gr00t.data.types.ModalityConfig
+        action_configs: null
+        delta_indices:
+        - 0
+        mean_std_embedding_keys: null
+        modality_keys:
+        - arm_left
+        - arm_right
+        - head
+        - lift
+        - other
+        sin_cos_embedding_keys: null
+      video: !!python/object:gr00t.data.types.ModalityConfig
+        action_configs: null
+        delta_indices:
+        - 0
+        mean_std_embedding_keys: null
+        modality_keys:
+        - cam_left_head
+        sin_cos_embedding_keys: null
+  mode: single_turn
+  multiprocessing_context: fork
+  num_shards_per_epoch: 100000
+  override_pretraining_statistics: false
+  random_chop: 0.0
+  seed: 42
+  shard_size: 1024
+  shuffle: true
+  subsample_ratio: 1.0
+  video_backend: torchcodec
+load_config_path: null
+model: !!python/object:gr00t.configs.model.gr00t_n1d6.Gr00tN1d6Config
+  _attn_implementation_autoset: false
+  _attn_implementation_internal: null
+  _commit_hash: null
+  _name_or_path: ''
+  add_cross_attention: false
+  architectures: null
+  backbone_model_type: eagle
+  backbone_trainable_params_fp32: true
+  bad_words_ids: null
+  begin_suppress_tokens: null
+  bos_token_id: null
+  chunk_size_feed_forward: 0
+  color_jitter_params:
+    brightness: 0.3
+    contrast: 0.4
+    hue: 0.08
+    saturation: 0.5
+  cross_attention_hidden_size: null
+  decoder_start_token_id: null
+  diffusion_model_cfg:
+    attention_head_dim: 48
+    dropout: 0.2
+    final_dropout: true
+    interleave_self_attention: true
+    norm_type: ada_norm
+    num_attention_heads: 32
+    num_layers: 32
+    output_dim: 1024
+    positional_embeddings: null
+  diversity_penalty: 0.0
+  do_sample: false
+  eagle_collator: true
+  early_stopping: false
+  encoder_no_repeat_ngram_size: 0
+  eos_token_id: null
+  exponential_decay_length_penalty: null
+  extra_augmentation_config: null
+  finetuning_task: null
+  forced_bos_token_id: null
+  forced_eos_token_id: null
+  id2label:
+    0: LABEL_0
+    1: LABEL_1
+  is_decoder: false
+  is_encoder_decoder: false
+  label2id:
+    LABEL_0: 0
+    LABEL_1: 1
+  length_penalty: 1.0
+  load_bf16: false
+  max_length: 20
+  min_length: 0
+  model_name: nvidia/Eagle-Block2A-2B-v2
+  no_repeat_ngram_size: 0
+  num_beam_groups: 1
+  num_beams: 1
+  num_return_sequences: 1
+  output_attentions: false
+  output_hidden_states: false
+  output_scores: false
+  pad_token_id: null
+  prefix: null
+  problem_type: null
+  pruned_heads: {}
+  random_rotation_angle: null
+  remove_invalid_values: false
+  repetition_penalty: 1.0
+  reproject_vision: false
+  return_dict: true
+  return_dict_in_generate: false
+  sep_token_id: null
+  state_dropout_prob: 0.0
+  suppress_tokens: null
+  task_specific_params: null
+  temperature: 1.0
+  tf_legacy_loss: false
+  tie_encoder_decoder: false
+  tie_word_embeddings: true
+  tokenizer_class: null
+  top_k: 50
+  top_p: 1.0
+  torch_dtype: null
+  torchscript: false
+  transformers_version: null
+  tune_diffusion_model: true
+  tune_llm: false
+  tune_projector: true
+  tune_visual: false
+  typical_p: 1.0
+  use_bfloat16: false
+  use_relative_action: true
+training: !!python/object:gr00t.configs.training.training_config.TrainingConfig
+  add_rl_callback: false
+  assert_loss_less_than: null
+  batch_size: null
+  bf16: true
+  dataloader_num_workers: 8
+  ddp_bucket_cap_mb: 100
+  deepspeed_stage: 2
+  enable_open_loop_eval: false
+  enable_profiling: false
+  eval_batch_size: 2
+  eval_bf16: true
+  eval_set_split_ratio: 0.1
+  eval_steps: 500
+  eval_strategy: 'no'
+  experiment_name: null
+  fp16: false
+  global_batch_size: 48
+  gradient_accumulation_steps: 1
+  gradient_checkpointing: false
+  learning_rate: 0.0001
+  logging_steps: 10
+  lr_scheduler_type: cosine
+  max_concurrent_uploads: 2
+  max_grad_norm: 1.0
+  max_retries: 3
+  max_steps: 100
+  num_gpus: 1
+  open_loop_eval_plot_indices: null
+  open_loop_eval_steps_per_traj: 100
+  open_loop_eval_traj_ids:
+  - 0
+  optim: adamw_torch
+  output_dir: /data/checkpoints/PATs_upload_test_model
+  remove_unused_columns: false
+  save_best_eval_metric_greater_is_better: true
+  save_best_eval_metric_name: ''
+  save_steps: 100
+  save_total_limit: 10
+  save_vl_model: false
+  start_from_checkpoint: nvidia/GR00T-N1.6-3B
+  tf32: true
+  transformers_access_token: null
+  transformers_cache_dir: null
+  transformers_local_files_only: false
+  transformers_trust_remote_code: true
+  upload_checkpoints: false
+  upload_every: 1000
+  upload_last_n_checkpoints: 5
+  use_ddp: false
+  use_wandb: false
+  wandb_project: finetune-gr00t-n1d6
+  warmup_ratio: 0.05
+  warmup_steps: 0
+  weight_decay: 1.0e-05

experiment_cfg/dataset_statistics.json ADDED Viewed

	@@ -0,0 +1,413 @@

+{
+  "new_embodiment": {
+    "state": {
+      "arm_left": {
+        "min": [
+          0.0261855311691761,
+          0.04987834393978119,
+          -0.2664572596549988,
+          -0.3596585690975189,
+          0.3398486375808716,
+          -0.2888677716255188,
+          0.0007343311444856226,
+          0.1365242898464203
+        ],
+        "max": [
+          0.14100639522075653,
+          0.09169130772352219,
+          -0.2527233362197876,
+          -0.2703401446342468,
+          0.3994581699371338,
+          -0.18811637163162231,
+          0.07764258980751038,
+          0.2208932340145111
+        ],
+        "mean": [
+          0.07342984527349472,
+          0.07221090793609619,
+          -0.25541064143180847,
+          -0.3170183598995209,
+          0.3570297360420227,
+          -0.252654492855072,
+          0.047983720898628235,
+          0.16140522062778473
+        ],
+        "std": [
+          0.02547214739024639,
+          0.007564172148704529,
+          0.0024449343327422116,
+          0.021817393600940704,
+          0.01599450968205918,
+          0.030162282288074493,
+          0.019700631499290466,
+          0.03007184900343418
+        ],
+        "q01": [
+          0.026755980625748636,
+          0.05169419512152672,
+          -0.2658288013935089,
+          -0.35693241119384767,
+          0.3405748975276947,
+          -0.28847227334976194,
+          0.0020259265135973694,
+          0.1365242898464203
+        ],
+        "q99": [
+          0.13866083860397338,
+          0.09100484818220138,
+          -0.2530445146560669,
+          -0.27110089898109435,
+          0.39925684690475466,
+          -0.18824484288692475,
+          0.07562038660049437,
+          0.2208932340145111
+        ]
+      },
+      "arm_right": {
+        "min": [
+          0.023009711876511574,
+          -0.08837167173624039,
+          0.24233300983905792,
+          -0.3249882161617279,
+          -0.5430291891098022,
+          -0.20141886174678802,
+          -0.21628637611865997,
+          0.07669904083013535
+        ],
+        "max": [
+          0.18333467841148376,
+          -0.040890175849199295,
+          0.25923076272010803,
+          -0.29035380482673645,
+          -0.5261434316635132,
+          -0.12771588563919067,
+          -0.1844722479581833,
+          0.07976700365543365
+        ],
+        "mean": [
+          0.09076610952615738,
+          -0.07737327367067337,
+          0.2503284811973572,
+          -0.31020236015319824,
+          -0.5314948558807373,
+          -0.15467683970928192,
+          -0.1939942091703415,
+          0.07828317582607269
+        ],
+        "std": [
+          0.03285541012883186,
+          0.007657118607312427,
+          0.007399852853268136,
+          0.0069493041373787905,
+          0.004396171309053799,
+          0.017841925844550133,
+          0.011914669536054098,
+          0.0002946005261036449
+        ],
+        "q01": [
+          0.026532114669680595,
+          -0.08812719404697418,
+          0.24233300983905792,
+          -0.3234734082221985,
+          -0.5429851007461548,
+          -0.19559118151664734,
+          -0.21628637611865997,
+          0.0782330185174942
+        ],
+        "q99": [
+          0.17690824568271638,
+          -0.04201333686709405,
+          0.25923076272010803,
+          -0.2913810956478119,
+          -0.5261635589599609,
+          -0.12910940766334533,
+          -0.18474199175834655,
+          0.07976700365543365
+        ]
+      },
+      "head": {
+        "min": [
+          -2.0694557179012918e-13,
+          -0.0015339808305725455
+        ],
+        "max": [
+          -2.0694557179012918e-13,
+          0.0015339808305725455
+        ],
+        "mean": [
+          -2.0694557179012918e-13,
+          1.2471388799895067e-05
+        ],
+        "std": [
+          0.0,
+          0.0002109108172589913
+        ],
+        "q01": [
+          -2.0694557179012918e-13,
+          -2.0694557179012918e-13
+        ],
+        "q99": [
+          -2.0694557179012918e-13,
+          0.0015339808305725455
+        ]
+      },
+      "lift": {
+        "min": [
+          0.0
+        ],
+        "max": [
+          0.0
+        ],
+        "mean": [
+          0.0
+        ],
+        "std": [
+          0.0
+        ],
+        "q01": [
+          0.0
+        ],
+        "q99": [
+          0.0
+        ]
+      },
+      "other": {
+        "min": [
+          -0.0023815890308469534,
+          -1.3601596947410144e-05,
+          -0.0026437826454639435
+        ],
+        "max": [
+          0.0023931083269417286,
+          2.1567169824265875e-05,
+          0.004167238250374794
+        ],
+        "mean": [
+          1.2018665529467398e-06,
+          -2.14848228097253e-08,
+          -4.104913386981934e-06
+        ],
+        "std": [
+          0.0004975512274540961,
+          4.607368282449897e-06,
+          0.0008911995682865381
+        ],
+        "q01": [
+          -0.0015624607214704154,
+          -1.3387292019615416e-05,
+          -0.002579548256471753
+        ],
+        "q99": [
+          0.0017830528365448096,
+          1.714260106382424e-05,
+          0.003313816646113988
+        ]
+      }
+    },
+    "action": {
+      "arm_left": {
+        "min": [
+          0.02454369328916073,
+          0.0475534051656723,
+          -0.26691266894340515,
+          -0.3666214048862457,
+          0.33900976181030273,
+          -0.28904861211776733,
+          0.0015339808305725455,
+          0.13712233304977417
+        ],
+        "max": [
+          0.1426602154970169,
+          0.0920388475060463,
+          -0.251572847366333,
+          -0.2684466242790222,
+          0.40957286953926086,
+          -0.1878058910369873,
+          0.0782330185174942,
+          0.221491277217865
+        ],
+        "mean": [
+          0.07346080243587494,
+          0.07216790318489075,
+          -0.2554052174091339,
+          -0.3169855773448944,
+          0.35689324140548706,
+          -0.2531152665615082,
+          0.048434652388095856,
+          0.16206307709217072
+        ],
+        "std": [
+          0.02742091380059719,
+          0.007938423193991184,
+          0.0025356656406072945,
+          0.023799823597073482,
+          0.01636500656604767,
+          0.030521018430590574,
+          0.019570723176002502,
+          0.029494687914848328
+        ],
+        "q01": [
+          0.025586799383163453,
+          0.05166447162628174,
+          -0.26691266894340515,
+          -0.3609763693809509,
+          0.33900976181030273,
+          -0.28904861211776733,
+          0.0015339808305725455,
+          0.13712233304977417
+        ],
+        "q99": [
+          0.1426602154970169,
+          0.0920388475060463,
+          -0.25310683250427246,
+          -0.26998060941696167,
+          0.4013507390022278,
+          -0.1878058910369873,
+          0.0756559309363365,
+          0.221491277217865
+        ]
+      },
+      "arm_right": {
+        "min": [
+          0.019941750913858414,
+          -0.09050486981868744,
+          0.24236896634101868,
+          -0.3267379105091095,
+          -0.5430291891098022,
+          -0.20928162336349487,
+          -0.21629129350185394,
+          0.07975145429372787
+        ],
+        "max": [
+          0.1871456503868103,
+          -0.03988350182771683,
+          0.2592427432537079,
+          -0.2899223566055298,
+          -0.526155412197113,
+          -0.1264466643333435,
+          -0.1840776950120926,
+          0.07975145429372787
+        ],
+        "mean": [
+          0.09057962149381638,
+          -0.07741815596818924,
+          0.25054508447647095,
+          -0.31016814708709717,
+          -0.5314047336578369,
+          -0.1541411280632019,
+          -0.19378866255283356,
+          0.07975157350301743
+        ],
+        "std": [
+          0.03508458286523819,
+          0.007914146408438683,
+          0.00744145084172528,
+          0.0075515802018344255,
+          0.004423194099218413,
+          0.017863281071186066,
+          0.011763404123485052,
+          1.1920928955078125e-07
+        ],
+        "q01": [
+          0.022518837824463844,
+          -0.08897088468074799,
+          0.24236896634101868,
+          -0.32520392537117004,
+          -0.5430291891098022,
+          -0.19590531289577484,
+          -0.21629129350185394,
+          0.07975145429372787
+        ],
+        "q99": [
+          0.18456857025623322,
+          -0.03988350182771683,
+          0.2592427432537079,
+          -0.2899223566055298,
+          -0.526155412197113,
+          -0.12853287398815158,
+          -0.18512080490589142,
+          0.07975145429372787
+        ]
+      },
+      "head": {
+        "min": [
+          -2.0694557179012918e-13,
+          -2.0694557179012918e-13
+        ],
+        "max": [
+          -2.0694557179012918e-13,
+          -2.0694557179012918e-13
+        ],
+        "mean": [
+          -2.0694557179012918e-13,
+          -2.0694557179012918e-13
+        ],
+        "std": [
+          0.0,
+          0.0
+        ],
+        "q01": [
+          -2.0694557179012918e-13,
+          -2.0694557179012918e-13
+        ],
+        "q99": [
+          -2.0694557179012918e-13,
+          -2.0694557179012918e-13
+        ]
+      },
+      "lift": {
+        "min": [
+          0.0
+        ],
+        "max": [
+          0.0
+        ],
+        "mean": [
+          0.0
+        ],
+        "std": [
+          0.0
+        ],
+        "q01": [
+          0.0
+        ],
+        "q99": [
+          0.0
+        ]
+      },
+      "other": {
+        "min": [
+          0.0,
+          0.0,
+          0.0
+        ],
+        "max": [
+          0.0,
+          0.0,
+          0.0
+        ],
+        "mean": [
+          0.0,
+          0.0,
+          0.0
+        ],
+        "std": [
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q01": [
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q99": [
+          0.0,
+          0.0,
+          0.0
+        ]
+      }
+    },
+    "relative_action": {}
+  }
+}

experiment_cfg/final_model_config.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+  "model_type": "Gr00tN1d6",
+  "model_dtype": "bfloat16",
+  "model_name": "nvidia/Eagle-Block2A-2B-v2",
+  "backbone_model_type": "eagle",
+  "model_revision": null,
+  "tune_top_llm_layers": 4,
+  "backbone_embedding_dim": 2048,
+  "tune_llm": false,
+  "tune_visual": false,
+  "select_layer": 16,
+  "reproject_vision": false,
+  "use_flash_attention": true,
+  "load_bf16": true,
+  "collator_overwrite_image_inputs": false,
+  "eagle_collator": true,
+  "backbone_trainable_params_fp32": true,
+  "extra_augmentation_config": null,
+  "apply_sincos_state_encoding": true,
+  "use_relative_action": true,
+  "max_state_dim": 128,
+  "max_action_dim": 128,
+  "action_horizon": 50,
+  "hidden_size": 1024,
+  "input_embedding_dim": 1536,
+  "add_pos_embed": true,
+  "attn_dropout": 0.2,
+  "use_vlln": true,
+  "max_seq_len": 1024,
+  "use_alternate_vl_dit": true,
+  "attend_text_every_n_blocks": 2,
+  "diffusion_model_cfg": {
+    "attention_head_dim": 48,
+    "dropout": 0.2,
+    "final_dropout": true,
+    "interleave_self_attention": true,
+    "norm_type": "ada_norm",
+    "num_attention_heads": 32,
+    "num_layers": 32,
+    "output_dim": 1024,
+    "positional_embeddings": null
+  },
+  "num_inference_timesteps": 4,
+  "noise_beta_alpha": 1.5,
+  "noise_beta_beta": 1.0,
+  "noise_s": 0.999,
+  "num_timestep_buckets": 1000,
+  "tune_projector": true,
+  "tune_diffusion_model": true,
+  "tune_vlln": true,
+  "state_dropout_prob": 0.0,
+  "state_additive_noise_scale": 0.0,
+  "max_num_embodiments": 32
+}

experiment_cfg/final_processor_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

model-00001-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b3cccf9061f4d9fc43fccc44c9d1ed1d5b4518fa205138f721504127766b77fe
+size 4990120184

model-00002-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:57b3f3127cd667325d6cbcb4a2b7f3dd34b14a7f530748849c3509c6c3b9e3bf
+size 4823190320

model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

processor_config.json ADDED Viewed

	@@ -0,0 +1,478 @@

+{
+  "processor_class": "Gr00tN1d6Processor",
+  "processor_kwargs": {
+    "modality_configs": {
+      "behavior_r1_pro": {
+        "video": {
+          "delta_indices": [
+            0
+          ],
+          "modality_keys": [
+            "observation.images.rgb.head_256_256",
+            "observation.images.rgb.left_wrist_256_256",
+            "observation.images.rgb.right_wrist_256_256"
+          ],
+          "sin_cos_embedding_keys": null,
+          "mean_std_embedding_keys": null,
+          "action_configs": null
+        },
+        "state": {
+          "delta_indices": [
+            0
+          ],
+          "modality_keys": [
+            "robot_pos",
+            "robot_ori_cos",
+            "robot_ori_sin",
+            "robot_2d_ori",
+            "robot_2d_ori_cos",
+            "robot_2d_ori_sin",
+            "robot_lin_vel",
+            "robot_ang_vel",
+            "arm_left_qpos",
+            "arm_left_qpos_sin",
+            "arm_left_qpos_cos",
+            "eef_left_pos",
+            "eef_left_quat",
+            "gripper_left_qpos",
+            "arm_right_qpos",
+            "arm_right_qpos_sin",
+            "arm_right_qpos_cos",
+            "eef_right_pos",
+            "eef_right_quat",
+            "gripper_right_qpos",
+            "trunk_qpos"
+          ],
+          "sin_cos_embedding_keys": null,
+          "mean_std_embedding_keys": null,
+          "action_configs": null
+        },
+        "action": {
+          "delta_indices": [
+            0,
+            1,
+            2,
+            3,
+            4,
+            5,
+            6,
+            7,
+            8,
+            9,
+            10,
+            11,
+            12,
+            13,
+            14,
+            15,
+            16,
+            17,
+            18,
+            19,
+            20,
+            21,
+            22,
+            23,
+            24,
+            25,
+            26,
+            27,
+            28,
+            29,
+            30,
+            31
+          ],
+          "modality_keys": [
+            "base",
+            "torso",
+            "left_arm",
+            "left_gripper",
+            "right_arm",
+            "right_gripper"
+          ],
+          "sin_cos_embedding_keys": null,
+          "mean_std_embedding_keys": null,
+          "action_configs": [
+            {
+              "rep": "ABSOLUTE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": null
+            },
+            {
+              "rep": "RELATIVE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": "trunk_qpos"
+            },
+            {
+              "rep": "RELATIVE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": "arm_left_qpos"
+            },
+            {
+              "rep": "ABSOLUTE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": null
+            },
+            {
+              "rep": "RELATIVE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": "arm_right_qpos"
+            },
+            {
+              "rep": "ABSOLUTE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": null
+            }
+          ]
+        },
+        "language": {
+          "delta_indices": [
+            0
+          ],
+          "modality_keys": [
+            "annotation.human.coarse_action"
+          ],
+          "sin_cos_embedding_keys": null,
+          "mean_std_embedding_keys": null,
+          "action_configs": null
+        }
+      },
+      "gr1": {
+        "video": {
+          "delta_indices": [
+            0
+          ],
+          "modality_keys": [
+            "ego_view_bg_crop_pad_res256_freq20"
+          ],
+          "sin_cos_embedding_keys": null,
+          "mean_std_embedding_keys": null,
+          "action_configs": null
+        },
+        "state": {
+          "delta_indices": [
+            0
+          ],
+          "modality_keys": [
+            "left_arm",
+            "right_arm",
+            "left_hand",
+            "right_hand",
+            "waist"
+          ],
+          "sin_cos_embedding_keys": [
+            "left_arm",
+            "right_arm",
+            "left_hand",
+            "right_hand",
+            "waist"
+          ],
+          "mean_std_embedding_keys": null,
+          "action_configs": null
+        },
+        "action": {
+          "delta_indices": [
+            0,
+            1,
+            2,
+            3,
+            4,
+            5,
+            6,
+            7,
+            8,
+            9,
+            10,
+            11,
+            12,
+            13,
+            14,
+            15
+          ],
+          "modality_keys": [
+            "left_arm",
+            "right_arm",
+            "left_hand",
+            "right_hand",
+            "waist"
+          ],
+          "sin_cos_embedding_keys": null,
+          "mean_std_embedding_keys": null,
+          "action_configs": [
+            {
+              "rep": "RELATIVE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": null
+            },
+            {
+              "rep": "RELATIVE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": null
+            },
+            {
+              "rep": "RELATIVE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": null
+            },
+            {
+              "rep": "RELATIVE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": null
+            },
+            {
+              "rep": "ABSOLUTE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": null
+            }
+          ]
+        },
+        "language": {
+          "delta_indices": [
+            0
+          ],
+          "modality_keys": [
+            "task"
+          ],
+          "sin_cos_embedding_keys": null,
+          "mean_std_embedding_keys": null,
+          "action_configs": null
+        }
+      },
+      "robocasa_panda_omron": {
+        "video": {
+          "delta_indices": [
+            0
+          ],
+          "modality_keys": [
+            "res256_image_side_0",
+            "res256_image_side_1",
+            "res256_image_wrist_0"
+          ],
+          "sin_cos_embedding_keys": null,
+          "mean_std_embedding_keys": null,
+          "action_configs": null
+        },
+        "state": {
+          "delta_indices": [
+            0
+          ],
+          "modality_keys": [
+            "end_effector_position_relative",
+            "end_effector_rotation_relative",
+            "gripper_qpos",
+            "base_position",
+            "base_rotation"
+          ],
+          "sin_cos_embedding_keys": null,
+          "mean_std_embedding_keys": null,
+          "action_configs": null
+        },
+        "action": {
+          "delta_indices": [
+            0,
+            1,
+            2,
+            3,
+            4,
+            5,
+            6,
+            7,
+            8,
+            9,
+            10,
+            11,
+            12,
+            13,
+            14,
+            15
+          ],
+          "modality_keys": [
+            "end_effector_position",
+            "end_effector_rotation",
+            "gripper_close",
+            "base_motion",
+            "control_mode"
+          ],
+          "sin_cos_embedding_keys": null,
+          "mean_std_embedding_keys": null,
+          "action_configs": [
+            {
+              "rep": "ABSOLUTE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": null
+            },
+            {
+              "rep": "ABSOLUTE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": null
+            },
+            {
+              "rep": "ABSOLUTE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": null
+            },
+            {
+              "rep": "ABSOLUTE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": null
+            },
+            {
+              "rep": "ABSOLUTE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": null
+            }
+          ]
+        },
+        "language": {
+          "delta_indices": [
+            0
+          ],
+          "modality_keys": [
+            "annotation.human.action.task_description"
+          ],
+          "sin_cos_embedding_keys": null,
+          "mean_std_embedding_keys": null,
+          "action_configs": null
+        }
+      },
+      "new_embodiment": {
+        "video": {
+          "delta_indices": [
+            0
+          ],
+          "modality_keys": [
+            "cam_left_head"
+          ],
+          "sin_cos_embedding_keys": null,
+          "mean_std_embedding_keys": null,
+          "action_configs": null
+        },
+        "state": {
+          "delta_indices": [
+            0
+          ],
+          "modality_keys": [
+            "arm_left",
+            "arm_right",
+            "head",
+            "lift",
+            "other"
+          ],
+          "sin_cos_embedding_keys": null,
+          "mean_std_embedding_keys": null,
+          "action_configs": null
+        },
+        "action": {
+          "delta_indices": [
+            0,
+            1,
+            2,
+            3,
+            4,
+            5,
+            6,
+            7,
+            8,
+            9,
+            10,
+            11,
+            12,
+            13,
+            14,
+            15
+          ],
+          "modality_keys": [
+            "arm_left",
+            "arm_right",
+            "head",
+            "lift",
+            "other"
+          ],
+          "sin_cos_embedding_keys": null,
+          "mean_std_embedding_keys": null,
+          "action_configs": [
+            {
+              "rep": "ABSOLUTE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": null
+            },
+            {
+              "rep": "ABSOLUTE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": null
+            },
+            {
+              "rep": "ABSOLUTE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": null
+            },
+            {
+              "rep": "ABSOLUTE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": null
+            },
+            {
+              "rep": "ABSOLUTE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": null
+            }
+          ]
+        },
+        "language": {
+          "delta_indices": [
+            0
+          ],
+          "modality_keys": [
+            "annotation.human.task_description"
+          ],
+          "sin_cos_embedding_keys": null,
+          "mean_std_embedding_keys": null,
+          "action_configs": null
+        }
+      }
+    },
+    "image_crop_size": null,
+    "image_target_size": null,
+    "use_albumentations": true,
+    "random_rotation_angle": null,
+    "color_jitter_params": {
+      "brightness": 0.3,
+      "contrast": 0.4,
+      "saturation": 0.5,
+      "hue": 0.08
+    },
+    "shortest_image_edge": 256,
+    "crop_fraction": 0.95,
+    "model_name": "nvidia/Eagle-Block2A-2B-v2",
+    "model_type": "eagle",
+    "formalize_language": true,
+    "max_state_dim": 128,
+    "max_action_dim": 128,
+    "max_action_horizon": 50,
+    "use_percentiles": false,
+    "clip_outliers": true,
+    "apply_sincos_state_encoding": true,
+    "use_relative_action": true
+  }
+}

statistics.json ADDED Viewed

The diff for this file is too large to render. See raw diff

trainer_state.json ADDED Viewed

	@@ -0,0 +1,94 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 100,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "grad_norm": 0.29096701741218567,
+      "learning_rate": 9.956320346634876e-05,
+      "loss": 1.2375,
+      "step": 10
+    },
+    {
+      "grad_norm": 0.16030608117580414,
+      "learning_rate": 9.473646649103818e-05,
+      "loss": 1.1952,
+      "step": 20
+    },
+    {
+      "grad_norm": 0.13103239238262177,
+      "learning_rate": 8.506183921362443e-05,
+      "loss": 1.2002,
+      "step": 30
+    },
+    {
+      "grad_norm": 0.11081837117671967,
+      "learning_rate": 7.158771761692464e-05,
+      "loss": 1.191,
+      "step": 40
+    },
+    {
+      "grad_norm": 0.15927913784980774,
+      "learning_rate": 5.577423184847932e-05,
+      "loss": 1.194,
+      "step": 50
+    },
+    {
+      "grad_norm": 0.2992898225784302,
+      "learning_rate": 3.933501846281267e-05,
+      "loss": 1.1546,
+      "step": 60
+    },
+    {
+      "grad_norm": 0.402879923582077,
+      "learning_rate": 2.405152131093926e-05,
+      "loss": 1.1042,
+      "step": 70
+    },
+    {
+      "grad_norm": 0.3662746846675873,
+      "learning_rate": 1.157994445715706e-05,
+      "loss": 1.091,
+      "step": 80
+    },
+    {
+      "grad_norm": 0.31836211681365967,
+      "learning_rate": 3.271776770026963e-06,
+      "loss": 1.0746,
+      "step": 90
+    },
+    {
+      "grad_norm": 0.2885839641094208,
+      "learning_rate": 2.7337132953697554e-08,
+      "loss": 1.0781,
+      "step": 100
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 100,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 9223372036854775807,
+  "save_steps": 100,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0.0,
+  "train_batch_size": 48,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:80455889e43f93dca11660a5a2c7b0b0c42781f793878da53d76a14431b3bb9f
+size 5777

wandb_config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"project": "finetune-gr00t-n1d6", "run_id": "PATs_upload_test_model"}