binhpham
/

molmoact2-leslider

@@ -1,123 +1,153 @@
 {
-    "type": "molmoact2",
-    "n_obs_steps": 1,
-    "input_features": {
-        "observation.state": {
-            "type": "STATE",
-            "shape": [
-                6
-            ]
-        },
-        "observation.images.arm_camera": {
-            "type": "VISUAL",
-            "shape": [
-                3,
-                480,
-                640
-            ]
-        }
     },
-    "output_features": {
-        "action": {
-            "type": "ACTION",
-            "shape": [
-                6
-            ]
-        }
-    },
-    "device": "cuda",
-    "use_amp": false,
-    "use_peft": false,
-    "push_to_hub": false,
-    "repo_id": null,
-    "private": null,
-    "tags": null,
-    "license": null,
-    "pretrained_path": "/home/binhpham/outputs/molmoact2-leslider_steps10000_bs8_lr1e-5_chunk30/checkpoints/005000/pretrained_model",
-    "checkpoint_path": "allenai/MolmoAct2-SO100_101",
-    "checkpoint_revision": null,
-    "checkpoint_force_download": false,
-    "trust_remote_code": true,
-    "chunk_size": 30,
-    "n_action_steps": 30,
-    "action_mode": "continuous",
-    "inference_action_mode": null,
-    "discrete_action_tokenizer": "allenai/MolmoAct2-FAST-Tokenizer",
-    "discrete_generation_max_steps": null,
-    "norm_tag": null,
-    "setup_type": "single so100/so101 robotic arm in molmoact2",
-    "control_mode": "absolute joint pose",
-    "image_keys": [
-        "observation.images.arm_camera"
-    ],
-    "normalize_language": true,
-    "add_setup_tokens": true,
-    "add_control_tokens": true,
-    "normalize_gripper": false,
-    "num_state_tokens": 256,
-    "max_sequence_length": null,
-    "expected_max_action_dim": 32,
-    "num_flow_timesteps": 8,
-    "flow_matching_cutoff": 1.0,
-    "flow_matching_time_offset": 0.001,
-    "flow_matching_time_scale": 0.999,
-    "flow_matching_beta_alpha": 1.0,
-    "flow_matching_beta_beta": 1.5,
-    "num_inference_steps": null,
-    "mask_action_dim_padding": true,
-    "enable_inference_cuda_graph": true,
-    "per_episode_seed": false,
-    "eval_seed": null,
-    "rtc_config": null,
-    "enable_lora_vlm": false,
-    "lora_rank": 64,
-    "lora_alpha": 16,
-    "lora_dropout": 0.05,
-    "lora_bias": "none",
-    "enable_lora_action_expert": false,
-    "enable_knowledge_insulation": false,
-    "freeze_embedding": true,
-    "train_action_expert_only": false,
-    "gradient_checkpointing": true,
-    "model_dtype": "bfloat16",
-    "softmax_auxiliary_loss": true,
-    "softmax_auxiliary_loss_scale": 0.0001,
-    "discrete_loss_token_weighting": "root_subsegments_root_tokens",
-    "optimizer_lr": 1e-05,
-    "optimizer_vit_lr": 5e-06,
-    "optimizer_connector_lr": 5e-06,
-    "optimizer_action_expert_lr": 5e-05,
-    "optimizer_betas": [
-        0.9,
-        0.95
     ],
-    "optimizer_eps": 1e-06,
-    "optimizer_weight_decay": 0.0,
-    "optimizer_grad_clip_norm": 1.0,
-    "scheduler_warmup_steps": 200,
-    "scheduler_decay_steps": null,
-    "scheduler_decay_lr": 1e-06,
-    "normalization_mapping": {
-        "ACTION": "MEAN_STD",
-        "STATE": "MEAN_STD",
-        "VISUAL": "IDENTITY"
-    },
-    "dataset_feature_names": {
-        "action": [
-            "shoulder_pan.pos",
-            "shoulder_lift.pos",
-            "elbow_flex.pos",
-            "wrist_flex.pos",
-            "wrist_roll.pos",
-            "gripper.pos"
-        ],
-        "observation.state": [
-            "shoulder_pan.pos",
-            "shoulder_lift.pos",
-            "elbow_flex.pos",
-            "wrist_flex.pos",
-            "wrist_roll.pos",
-            "gripper.pos"
-        ]
-    }
-}

 {
+  "action_end_token_id": 151933,
+  "action_expert_config": {
+    "attn_dropout": 0.0,
+    "causal_attn": false,
+    "context_layer_norm": true,
+    "dropout": 0.0,
+    "ffn_multiple_of": 256,
+    "hidden_size": 768,
+    "mlp_ratio": 4.0,
+    "model_type": "molmoact2_action_expert",
+    "num_heads": 8,
+    "num_layers": 36,
+    "qk_norm": true,
+    "qk_norm_eps": 1e-06,
+    "rope": true,
+    "timestep_embed_dim": 256
+  },
+  "action_expert_depth_gate": false,
+  "action_expert_depth_gate_init_bias": -4.0,
+  "action_expert_depth_gate_per_layer": false,
+  "action_mode": "both",
+  "max_action_horizon": 30,
+  "action_output_token_id": 151931,
+  "action_start_token_id": 151932,
+  "action_token_start_id": 151934,
+  "adapter_config": {
+    "attention_dropout": 0.0,
+    "attn_implementation": "sdpa",
+    "float32_attention": true,
+    "head_dim": 72,
+    "hidden_act": "silu",
+    "hidden_size": 1152,
+    "image_feature_dropout": 0.0,
+    "initializer_range": 0.02,
+    "intermediate_size": 9728,
+    "model_type": "molmoact2",
+    "num_attention_heads": 16,
+    "num_key_value_heads": 16,
+    "pooling_attention_mask": true,
+    "residual_dropout": 0.0,
+    "text_hidden_size": 2560,
+    "vit_layers": [
+      -3,
+      -9
+    ]
+  },
+  "add_action_expert": true,
+  "add_control_tokens": true,
+  "add_setup_tokens": true,
+  "architectures": [
+    "MolmoAct2ForConditionalGeneration"
+  ],
+  "auto_map": {
+    "AutoConfig": "configuration_molmoact2.MolmoAct2Config",
+    "AutoModelForImageTextToText": "modeling_molmoact2.MolmoAct2ForConditionalGeneration"
+  },
+  "depth_end_token_id": null,
+  "depth_mode": 2,
+  "depth_output_token_id": null,
+  "depth_start_token_id": null,
+  "depth_token_start_id": null,
+  "dtype": "float32",
+  "enable_depth_reasoning": false,
+  "flow_matching_beta_alpha": 1.0,
+  "flow_matching_beta_beta": 1.5,
+  "flow_matching_cutoff": 1.0,
+  "flow_matching_num_steps": 10,
+  "flow_matching_time_offset": 0.001,
+  "flow_matching_time_scale": 0.999,
+  "frame_end_token_id": 154632,
+  "frame_start_token_id": 154631,
+  "image_col_id": 154627,
+  "image_end_token_id": 154625,
+  "image_high_res_id": 154626,
+  "image_low_res_id": 154630,
+  "image_patch_id": 154626,
+  "image_start_token_id": 154624,
+  "initializer_range": 0.02,
+  "low_res_image_start_token_id": 154628,
+  "mask_action_dim_padding": true,
+  "max_action_dim": 32,
+  "model_type": "molmoact2",
+  "n_obs_steps": 1,
+  "norm_stats_filename": "norm_stats.json",
+  "num_action_tokens": 2048,
+  "num_depth_codes": 100,
+  "num_depth_tokens": 0,
+  "num_state_tokens": 256,
+  "state_end_token_id": 151674,
+  "state_format": "discrete",
+  "state_start_token_id": 151673,
+  "state_token_start_id": 151675,
+  "text_config": {
+    "additional_vocab_size": 128,
+    "attention_dropout": 0.0,
+    "attn_implementation": "sdpa",
+    "embedding_dropout": 0.0,
+    "head_dim": 128,
+    "hidden_act": "silu",
+    "hidden_size": 2560,
+    "initializer_range": 0.02,
+    "intermediate_size": 9728,
+    "layer_norm_eps": 1e-06,
+    "max_position_embeddings": 16384,
+    "model_type": "molmoact2_text",
+    "norm_after": false,
+    "num_attention_heads": 32,
+    "num_hidden_layers": 36,
+    "num_key_value_heads": 8,
+    "qk_norm_type": "qwen3",
+    "qkv_bias": false,
+    "residual_dropout": 0.0,
+    "rope_parameters": {
+      "rope_theta": 5000000.0,
+      "rope_type": "default"
     },
+    "rope_scaling_layers": null,
+    "rope_theta": 5000000.0,
+    "tie_word_embeddings": false,
+    "use_cache": true,
+    "use_qk_norm": true,
+    "vocab_size": 154624
+  },
+  "tie_word_embeddings": false,
+  "transformers_version": "5.3.0",
+  "use_frame_special_tokens": true,
+  "vit_config": {
+    "attention_dropout": 0.0,
+    "attn_implementation": "sdpa",
+    "float32_attention": true,
+    "head_dim": 72,
+    "hidden_act": "gelu_pytorch_tanh",
+    "hidden_size": 1152,
+    "image_default_input_size": [
+      378,
+      378
     ],
+    "image_num_pos": 729,
+    "image_patch_size": 14,
+    "initializer_range": 0.02,
+    "intermediate_size": 4304,
+    "layer_norm_eps": 1e-06,
+    "model_type": "molmoact2",
+    "num_attention_heads": 16,
+    "num_hidden_layers": 27,
+    "num_key_value_heads": 16,
+    "residual_dropout": 0.0
+  },
+  "bos_token_id": 151645,
+  "eos_token_id": 151645,
+  "pad_token_id": 151643
+}

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5129ddfec4817fbcfd58a41dcc5543b4eafbbe0ba6b867012c6dc955c31bfe34
-size 10884573720

 version https://git-lfs.github.com/spec/v1
+oid sha256:6d34a2c69ef43bb3f346bacffff8866e8e6f6653f4cfa493aee26b75afdead19
+size 10884565968