| { |
| "adapter_config": { |
| "attention_dropout": 0.0, |
| "float32_attention": true, |
| "head_dim": 72, |
| "hidden_act": "silu", |
| "hidden_size": 1152, |
| "image_feature_dropout": 0.0, |
| "image_padding_embed": null, |
| "initializer_range": 0.02, |
| "intermediate_size": 11008, |
| "model_type": "", |
| "num_attention_heads": 16, |
| "num_key_value_heads": 16, |
| "residual_dropout": 0.0, |
| "text_hidden_size": 2048, |
| "vit_layers": [ |
| -3, |
| -9 |
| ] |
| }, |
| "architectures": [ |
| "MolmoActForActionReasoning" |
| ], |
| "auto_map": { |
| "AutoConfig": "configuration_molmoact.MolmoActConfig", |
| "AutoModelForImageTextToText": "modeling_molmoact.MolmoActForActionReasoning" |
| }, |
| "image_patch_id": 151938, |
| "initializer_range": 0.02, |
| "llm_config": { |
| "additional_vocab_size": 128, |
| "attention_dropout": 0.0, |
| "embedding_dropout": 0.0, |
| "head_dim": 128, |
| "hidden_act": "silu", |
| "hidden_size": 2048, |
| "initializer_range": 0.02, |
| "intermediate_size": 11008, |
| "layer_norm_eps": 1e-06, |
| "max_position_embeddings": 4096, |
| "model_type": "molmoact_llm", |
| "norm_after": false, |
| "num_attention_heads": 16, |
| "num_hidden_layers": 36, |
| "num_key_value_heads": 2, |
| "qk_norm_type": "olmo", |
| "qkv_bias": true, |
| "residual_dropout": 0.0, |
| "rope_scaling": null, |
| "rope_theta": 1000000.0, |
| "use_cache": true, |
| "use_qk_norm": false, |
| "vocab_size": 151936 |
| }, |
| "model_type": "molmoact", |
| "n_action_bins": 256, |
| "norm_stats": { |
| "norm_stats": { |
| "1023": { |
| "action": { |
| "max": [ |
| 0.04367176070809364, |
| 0.022778866812586784, |
| 0.01708386093378067, |
| 0.028339775279164314, |
| 0.04235227778553963, |
| 0.06355218589305878, |
| 1.0 |
| ], |
| "mean": [ |
| 0.00020232585666235536, |
| -0.0004970032023265958, |
| -0.000430602376582101, |
| -0.00015123763296287507, |
| 4.535434709396213e-05, |
| -0.00013272084470372647, |
| 0.44365325570106506 |
| ], |
| "min": [ |
| -0.023753266781568527, |
| -0.027570929378271103, |
| -0.01796768791973591, |
| -0.04000410810112953, |
| -0.0816362202167511, |
| -0.040603041648864746, |
| 0.0 |
| ], |
| "q01": [ |
| -0.006955103948712349, |
| -0.014739521779119968, |
| -0.007455943152308464, |
| -0.00743744894862175, |
| -0.007610292173922062, |
| -0.008179591968655586, |
| 0.0 |
| ], |
| "q99": [ |
| 0.007702148985117674, |
| 0.010903404094278812, |
| 0.009033547714352608, |
| 0.0069551002234220505, |
| 0.006484407931566238, |
| 0.00784524530172348, |
| 1.0 |
| ], |
| "std": [ |
| 0.0027527061756700277, |
| 0.004528957884758711, |
| 0.0034716646187007427, |
| 0.00254280143417418, |
| 0.002804982475936413, |
| 0.0028773832600563765, |
| 0.4968259632587433 |
| ] |
| }, |
| "num_entries": 114035 |
| } |
| } |
| }, |
| "torch_dtype": "float32", |
| "transformers_version": "4.52.3", |
| "use_cache": true, |
| "vit_config": { |
| "attention_dropout": 0.0, |
| "float32_attention": true, |
| "head_dim": 72, |
| "hidden_act": "gelu_pytorch_tanh", |
| "hidden_size": 1152, |
| "image_default_input_size": [ |
| 378, |
| 378 |
| ], |
| "image_num_pos": 729, |
| "image_patch_size": 14, |
| "initializer_range": 0.02, |
| "intermediate_size": 4304, |
| "layer_norm_eps": 1e-06, |
| "model_type": "molmoact_vit", |
| "num_attention_heads": 16, |
| "num_hidden_layers": 27, |
| "num_key_value_heads": 16, |
| "patch_bias": true, |
| "pre_layernorm": false, |
| "residual_dropout": 0.0, |
| "use_cls_token": false |
| } |
| } |