{ "action_chunk_size": 60, "action_config": { "_attn_implementation_autoset": true, "action_chunk_size": 60, "action_dim": 26, "architectures": [ "ActionExpertModel" ], "attn_implementation": "eager", "auto_map": { "AutoConfig": "configuration_action_expert.ActionExpertConfig", "AutoModel": "modeling_action_expert.ActionExpertModel" }, "bias": false, "dtype": "bfloat16", "head_dim": 64, "hidden_act": "silu", "hidden_size": 1024, "initializer_range": 0.02, "input_hidden_size": 2048, "intermediate_size": 4096, "max_position_embeddings": 32768, "model_type": "action_expert", "num_attention_heads": 16, "num_hidden_layers": 24, "num_key_value_heads": 8, "pad_token_id": 2, "rms_norm_eps": 1e-05, "rope_scaling": { "factor": 2.0, "type": "dynamic" }, "rope_theta": 1000000, "state_dim": 26, "state_token_num": 3, "use_bfloat16": true, "use_cache": true, "use_flash_attn": false }, "architectures": [ "GO1Model" ], "auto_map": { "AutoConfig": "configuration_go1.GO1ModelConfig", "AutoModel": "modeling_go1.GO1Model", "AutoModelForCausalLM": "modeling_internlm2_go1.py.InternLM2ForCausalLMGO1" }, "bos_token_id": 1, "downsample_ratio": 0.5, "dtype": "bfloat16", "dynamic_image_size": false, "eos_token_id": 2, "flow_matching": { "flow_matching_final_weight": 10.0, "flow_matching_weight": 1.0, "num_steps": 10, "rng": 42 }, "force_image_size": 448, "img_context_token_id": 92546, "information_fusion_config": { "action_chunk_size": 60, "action_dim": 26, "attn_implementation": "eager", "bias": true, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "input_hidden_size": 2048, "intermediate_size": 11008, "max_position_embeddings": 2048, "model_type": "information_fusion", "num_attention_heads": 16, "num_hidden_layers": 12, "num_key_value_heads": 16, "rms_norm_eps": 1e-06, "rope_scaling": null, "rope_theta": 10000, "state_dim": 26, "use_cache": false }, "initializer_range": 0.02, "latent_planner_config": { "action_dim": 1, "attn_implementation": "eager", "bias": false, "head_dim": 64, "hidden_act": "silu", "hidden_size": 1024, "initializer_range": 0.02, "input_hidden_size": 2048, "intermediate_size": 2048, "max_position_embeddings": 2048, "model_type": "intermidiate_action_expert", "num_attention_heads": 16, "num_hidden_layers": 24, "num_key_value_heads": 8, "rms_norm_eps": 1e-06, "rope_scaling": null, "rope_theta": 10000, "state_token_num": 0, "use_cache": true, "vocab_size": 32 }, "latent_planning": false, "llm_config": { "_attn_implementation_autoset": true, "architectures": [ "InternLM2ForCausalLMGO1" ], "attn_implementation": "flash_attention_2", "auto_map": { "AutoConfig": "configuration_internlm2.InternLM2Config", "AutoModel": "modeling_internlm2_go1.InternLM2ForCausalLMGO1", "AutoModelForCausalLM": "modeling_internlm2_go1.py.InternLM2ForCausalLMGO1" }, "bias": false, "dtype": "bfloat16", "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 8192, "max_position_embeddings": 32768, "model_type": "internlm2", "num_attention_heads": 16, "num_hidden_layers": 24, "num_key_value_heads": 8, "pad_token_id": 2, "rms_norm_eps": 1e-05, "rope_scaling": { "factor": 2.0, "type": "dynamic" }, "rope_theta": 1000000, "use_bfloat16": true, "use_cache": true, "vocab_size": 92553 }, "max_dynamic_patch": 6, "min_dynamic_patch": 1, "model_type": "go1", "noise_scheduler_config": { "beta_schedule": "squaredcos_cap_v2", "clip_sample": false, "num_inference_timesteps": 5, "num_train_timesteps": 1000, "prediction_type": "sample" }, "norm": true, "output_attentions": false, "pad2square": false, "pad_token_id": 2, "ps_version": "v2", "select_layer": -1, "template": "internlm2-chat", "transformers_version": null, "use_backbone_lora": 0, "use_llm_lora": 0, "use_thumbnail": false, "vision_config": { "_attn_implementation_autoset": true, "architectures": [ "InternVisionModel" ], "attention_dropout": 0.0, "drop_path_rate": 0.1, "dropout": 0.0, "dtype": "bfloat16", "hidden_act": "gelu", "hidden_size": 1024, "image_size": 448, "initializer_factor": 1.0, "initializer_range": 0.02, "intermediate_size": 4096, "layer_norm_eps": 1e-06, "model_type": "intern_vit_6b", "norm_type": "layer_norm", "num_attention_heads": 16, "num_channels": 3, "num_hidden_layers": 24, "patch_size": 14, "qk_normalization": false, "qkv_bias": true, "use_bfloat16": true, "use_flash_attn": true } }