huiwon
/

alinvla_gr1_robot_vqa_v2_7647

Model card Files Files and versions

huiwon commited on Jan 15

Commit

5e66157

·

verified ·

1 Parent(s): 898de24

Add config.json

Files changed (1) hide show

config.json +103 -0

config.json ADDED Viewed

	@@ -0,0 +1,103 @@

+{
+  "action_dim": 32,
+  "action_head_cfg": {
+    "action_dim": 32,
+    "action_horizon": 16,
+    "add_pos_embed": true,
+    "attn_type": "joint_attn_v2",
+    "backbone_embedding_dim": 4096,
+    "base_freq": 50.0,
+    "dct_loss_weight": 0.0,
+    "diffusion_model_cfg": {
+      "attention_head_dim": 64,
+      "cross_attention_dim": 4096,
+      "depth": 4,
+      "depth_single_blocks": 8,
+      "direct_visual_conditioning": null,
+      "disable_time_token_pos_emb": false,
+      "dropout": 0.2,
+      "final_dropout": true,
+      "interleave_self_attention": true,
+      "meta_queries_as_modality": false,
+      "norm_type": "ada_norm",
+      "num_attention_heads": 24,
+      "num_layers": 16,
+      "num_temb_tokens": 1,
+      "output_dim": 1024,
+      "positional_embeddings": "rope_sa_only",
+      "rope_theta": 10000.0,
+      "state_as_modality": false,
+      "temb_type": "additional_token",
+      "use_alternate_vl_conditioning": false,
+      "use_swiglu": true
+    },
+    "discretize_timesteps": false,
+    "ff_loss_weight": 0.0,
+    "flow_matching_loss_weight": 1.0,
+    "hidden_size": 1024,
+    "input_embedding_dim": 1536,
+    "max_action_dim": 32,
+    "max_num_embodiments": 32,
+    "max_state_dim": 64,
+    "model_dct": false,
+    "model_dtype": "float32",
+    "noise_beta_alpha": 1.5,
+    "noise_beta_beta": 1.0,
+    "noise_s": 0.999,
+    "num_inference_timesteps": 4,
+    "num_target_vision_tokens": 32,
+    "num_timestep_buckets": 1000,
+    "post_norm": "none",
+    "pre_norm": "layer_norm",
+    "qk_rmsnorm": false,
+    "remove_bias": false,
+    "tune_diffusion_model": true,
+    "tune_projector": true,
+    "use_future_tokens": false,
+    "use_qknorm": true,
+    "use_rmsnorm": true,
+    "use_vlln": false,
+    "vl_self_attention_cfg": {
+      "attention_head_dim": 64,
+      "dropout": 0.2,
+      "final_dropout": true,
+      "num_attention_heads": 64,
+      "num_layers": 4,
+      "positional_embeddings": null
+    },
+    "x_prediction": false
+  },
+  "action_horizon": 16,
+  "architectures": [
+    "GR00T_N1_5"
+  ],
+  "attn_implementation": null,
+  "backbone_cfg": {
+    "load_bf16": false,
+    "meta_queries_mode": "full",
+    "n_meta_queries": 4,
+    "project_to_dim": null,
+    "qwen_path": "/fsx/alinvla/AlinVLA-VLM/checkpoints/robot_vqa_v2/checkpoint-7647",
+    "reproject_vision": false,
+    "select_layer": 18,
+    "tune_llm": false,
+    "tune_visual": false,
+    "use_causal_mask": true,
+    "use_flash_attention": true,
+    "use_meta_queries": false
+  },
+  "backbone_model_type": "qwen3_vl_8b",
+  "compute_dtype": "bfloat16",
+  "dtype": "bfloat16",
+  "hidden_size": 2048,
+  "lap_cfg": {},
+  "max_action_dim": 32,
+  "model_dtype": "float32",
+  "model_type": "gr00t_n1_5",
+  "transformers_version": "5.0.0.dev0",
+  "tune_diffusion_model": true,
+  "tune_llm": false,
+  "tune_projector": true,
+  "tune_visual": false,
+  "use_cache": false
+}