simaai
/

Qwen3-VL-4B-Instruct-a16w4

Image-Text-to-Text

Model card Files Files and versions

Qwen3-VL-4B-Instruct-a16w4 / devkit /vlm_config.json

florianvoss's picture

Upload folder using huggingface_hub

0f4b5a1 verified about 2 months ago

history blame contribute delete

3.63 kB

	{
	"model_name": "Qwen3-VL-4B-Instruct",
	"model_type": "vlm-qwen3_vl",
	"vm_cfg": {
	"model_type": "qwen3_vl",
	"arch": "qwen3_vision_encoder",
	"image_size": [
	448,
	448
	],
	"patch_size": 16,
	"cls_embed": false,
	"hidden_size": 1024,
	"intermediate_size": 4096,
	"num_attention_heads": 16,
	"num_hidden_layers": 24,
	"hidden_act": "gelu_pytorch_tanh",
	"layer_norm_eps": 1e-06,
	"spatial_merge_size": 2,
	"temporal_patch_size": 2,
	"window_size": 0,
	"num_position_embeddings": 2304,
	"fullatt_block_indexes": [],
	"deepstack_visual_indexes": [
	5,
	11,
	17
	]
	},
	"mm_cfg": {
	"num_layers": 2,
	"hidden_act": "silu",
	"mm_tokens_per_image": 196,
	"proj_dim": 2560,
	"downsample_factor": 1
	},
	"lm_cfg": {
	"model_type": "qwen3_vl_text",
	"data_type": "bfloat16",
	"arch": "qwen",
	"gen": "3",
	"size": "4b",
	"token_cfg": {
	"vocab_size": 151936
	},
	"rope_cfg": {
	"rope_theta": 5000000,
	"rope_local_base_freq": 5000000,
	"rope_scaling": {
	"factor": 1.0,
	"low_freq_factor": 0,
	"high_freq_factor": 0,
	"original_max_position_embeddings": 0,
	"long_factor": null,
	"short_factor": null,
	"rope_type": "mrope",
	"mrope_section": [
	24,
	20,
	20
	],
	"mrope_interleaved": true
	}
	},
	"attn_cfg": {
	"num_attention_heads": 32,
	"num_key_value_heads": 8,
	"head_dim": 128,
	"swa_enable": false,
	"swa_ratio": 0,
	"sliding_window": 0,
	"attention_bias": false,
	"attention_dropout": 0.0,
	"query_pre_attn_scalar": 0
	},
	"mlp_cfg": {
	"intermediate_size": 9728,
	"act": "silu",
	"num_layers": 3,
	"mlp_bias": false
	},
	"hidden_size": 2560,
	"num_hidden_layers": 36,
	"max_position_embeddings": 2048,
	"rms_norm_eps": 1e-06,
	"rms_norm_unit_offset": false,
	"layer_norms": [
	"pre_attn",
	"post_attn",
	"qk_norm"
	],
	"layer_types": null,
	"attn_logit_softcapping": null,
	"final_logit_softcapping": null,
	"lm_head_num_splits": 3,
	"lm_head_split_dim": 50656,
	"lora_cfg": null,
	"conv_L_cache": 3,
	"conv_bias": false
	},
	"pipeline_cfg": {
	"system_prompt": null,
	"chat_template": null,
	"max_num_tokens": 2048,
	"input_token_group_size": 128,
	"input_token_group_offsets": [
	0,
	128,
	256,
	384,
	512,
	640,
	768,
	896,
	1024,
	1152,
	1280,
	1408,
	1536,
	1664,
	1792,
	1920
	],
	"future_token_mask_size": 128,
	"return_logits": false,
	"use_strided_kv_cache": false,
	"enable_filter_sharing": false,
	"quantize_embeddings": false,
	"split_mlp": false
	},
	"language_model_name": "Qwen3-VL-4B-Instruct_language",
	"vision_model_name": "Qwen3-VL-4B-Instruct_vision"
	}