model-3b-1023 / config.json
hqfang's picture
Upload folder using huggingface_hub
c2215f4 verified
{
"adapter_config": {
"attention_dropout": 0.0,
"float32_attention": true,
"head_dim": 72,
"hidden_act": "silu",
"hidden_size": 1152,
"image_feature_dropout": 0.0,
"image_padding_embed": null,
"initializer_range": 0.02,
"intermediate_size": 11008,
"model_type": "",
"num_attention_heads": 16,
"num_key_value_heads": 16,
"residual_dropout": 0.0,
"text_hidden_size": 2048,
"vit_layers": [
-3,
-9
]
},
"architectures": [
"MolmoActForActionReasoning"
],
"auto_map": {
"AutoConfig": "configuration_molmoact.MolmoActConfig",
"AutoModelForImageTextToText": "modeling_molmoact.MolmoActForActionReasoning"
},
"image_patch_id": 151938,
"initializer_range": 0.02,
"llm_config": {
"additional_vocab_size": 128,
"attention_dropout": 0.0,
"embedding_dropout": 0.0,
"head_dim": 128,
"hidden_act": "silu",
"hidden_size": 2048,
"initializer_range": 0.02,
"intermediate_size": 11008,
"layer_norm_eps": 1e-06,
"max_position_embeddings": 4096,
"model_type": "molmoact_llm",
"norm_after": false,
"num_attention_heads": 16,
"num_hidden_layers": 36,
"num_key_value_heads": 2,
"qk_norm_type": "olmo",
"qkv_bias": true,
"residual_dropout": 0.0,
"rope_scaling": null,
"rope_theta": 1000000.0,
"use_cache": true,
"use_qk_norm": false,
"vocab_size": 151936
},
"model_type": "molmoact",
"n_action_bins": 256,
"norm_stats": {
"norm_stats": {
"1023": {
"action": {
"max": [
0.04367176070809364,
0.022778866812586784,
0.01708386093378067,
0.028339775279164314,
0.04235227778553963,
0.06355218589305878,
1.0
],
"mean": [
0.00020232585666235536,
-0.0004970032023265958,
-0.000430602376582101,
-0.00015123763296287507,
4.535434709396213e-05,
-0.00013272084470372647,
0.44365325570106506
],
"min": [
-0.023753266781568527,
-0.027570929378271103,
-0.01796768791973591,
-0.04000410810112953,
-0.0816362202167511,
-0.040603041648864746,
0.0
],
"q01": [
-0.006955103948712349,
-0.014739521779119968,
-0.007455943152308464,
-0.00743744894862175,
-0.007610292173922062,
-0.008179591968655586,
0.0
],
"q99": [
0.007702148985117674,
0.010903404094278812,
0.009033547714352608,
0.0069551002234220505,
0.006484407931566238,
0.00784524530172348,
1.0
],
"std": [
0.0027527061756700277,
0.004528957884758711,
0.0034716646187007427,
0.00254280143417418,
0.002804982475936413,
0.0028773832600563765,
0.4968259632587433
]
},
"num_entries": 114035
}
}
},
"torch_dtype": "float32",
"transformers_version": "4.52.3",
"use_cache": true,
"vit_config": {
"attention_dropout": 0.0,
"float32_attention": true,
"head_dim": 72,
"hidden_act": "gelu_pytorch_tanh",
"hidden_size": 1152,
"image_default_input_size": [
378,
378
],
"image_num_pos": 729,
"image_patch_size": 14,
"initializer_range": 0.02,
"intermediate_size": 4304,
"layer_norm_eps": 1e-06,
"model_type": "molmoact_vit",
"num_attention_heads": 16,
"num_hidden_layers": 27,
"num_key_value_heads": 16,
"patch_bias": true,
"pre_layernorm": false,
"residual_dropout": 0.0,
"use_cls_token": false
}
}