mlx-community
/

MolmoPoint-8B-4bit

Image-Text-to-Text

4-bit precision

Model card Files Files and versions

MolmoPoint-8B-4bit / config.json

prince-canuma's picture

Upload folder using huggingface_hub

bfc9279 verified 3 months ago

history blame contribute delete

3.74 kB

	{
	"adapter_config": {
	"attention_dropout": 0.0,
	"attn_implementation": "sdpa",
	"float32_attention": true,
	"head_dim": 72,
	"hidden_act": "silu",
	"hidden_size": 1152,
	"image_feature_dropout": 0,
	"initializer_range": 0.02,
	"intermediate_size": 12288,
	"model_type": "molmo_point",
	"num_attention_heads": 16,
	"num_key_value_heads": 16,
	"pooling_attention_mask": true,
	"positional_embeddings": null,
	"residual_dropout": 0.0,
	"text_hidden_size": 4096,
	"vit_layers": [
	-3,
	-9
	]
	},
	"architectures": [
	"MolmoPointForConditionalGeneration"
	],
	"auto_map": {
	"AutoConfig": "configuration_molmo_point.MolmoPointConfig",
	"AutoModelForImageTextToText": "modeling_molmo_point.MolmoPointForConditionalGeneration"
	},
	"dtype": "float32",
	"embed_location": false,
	"embed_selected_vit_patch": "linear",
	"eos_token_id": 151645,
	"frame_end_token_id": 151944,
	"frame_start_token_id": 151943,
	"image_col_id": 151939,
	"image_end_token_id": 151937,
	"image_high_res_id": 151938,
	"image_non_indexable_patch_id": 151942,
	"image_patch_id": 151938,
	"image_start_token_id": 151936,
	"initializer_range": 0.02,
	"layer_norm_x": true,
	"location_token_id": 151949,
	"low_res_image_start_token_id": null,
	"mask_patches": "always",
	"mask_repeats": "inference",
	"mask_subpatches": "inference",
	"model_type": "molmo_point",
	"no_more_points_class": true,
	"norm_logits": true,
	"patch_embed_dim": 512,
	"patch_embedding_kind": "image_feature0",
	"patch_location": "3x3",
	"patch_token_id": 151947,
	"quantization": {
	"group_size": 64,
	"bits": 4,
	"mode": "affine"
	},
	"quantization_config": {
	"group_size": 64,
	"bits": 4,
	"mode": "affine"
	},
	"subpatch_token_id": 151948,
	"text_config": {
	"additional_vocab_size": 128,
	"attention_dropout": 0.0,
	"attn_implementation": "sdpa",
	"embedding_dropout": 0.0,
	"head_dim": 128,
	"hidden_act": "silu",
	"hidden_size": 4096,
	"initializer_range": 0.02,
	"intermediate_size": 12288,
	"layer_norm_eps": 1e-06,
	"max_position_embeddings": 37376,
	"model_type": "molmo2_text",
	"norm_after": false,
	"num_attention_heads": 32,
	"num_hidden_layers": 36,
	"num_key_value_heads": 8,
	"qk_norm_type": "qwen3",
	"qkv_bias": false,
	"residual_dropout": 0.0,
	"rope_scaling": null,
	"rope_scaling_layers": null,
	"rope_theta": 1000000.0,
	"use_cache": true,
	"use_qk_norm": true,
	"vocab_size": 151936
	},
	"tie_word_embeddings": false,
	"token_prediction_rotary": "one_d",
	"token_prediction_rotary_theta": 50000.0,
	"transformers_version": "4.57.1",
	"use_cache": true,
	"use_frame_special_tokens": true,
	"vision_config": {},
	"vit_config": {
	"attention_dropout": 0.0,
	"attn_implementation": "sdpa",
	"float32_attention": true,
	"head_dim": 72,
	"hidden_act": "gelu_pytorch_tanh",
	"hidden_size": 1152,
	"image_default_input_size": [
	378,
	378
	],
	"image_num_pos": 729,
	"image_patch_size": 14,
	"initializer_range": 0.02,
	"intermediate_size": 4304,
	"layer_norm_eps": 1e-06,
	"model_type": "molmo2",
	"num_attention_heads": 16,
	"num_hidden_layers": 27,
	"num_key_value_heads": 16,
	"residual_dropout": 0.0
	}
	}