PKSGIN
/

qwen3-30b-selective-quant-MixedMPW-mlx

Text Generation

Mixture of Experts

mixture-of-experts

heterogeneous-quantization

code-generation

Model card Files Files and versions

qwen3-30b-selective-quant-MixedMPW-mlx / config.json

PKSGIN's picture

Upload folder using huggingface_hub

3ef7e25 verified 3 months ago

history blame contribute delete

4.02 kB

	{
	"architectures": [
	"Qwen3MoeForCausalLM"
	],
	"attention_bias": false,
	"attention_dropout": 0.0,
	"bos_token_id": 151643,
	"decoder_sparse_step": 1,
	"eos_token_id": 151645,
	"head_dim": 128,
	"hidden_act": "silu",
	"hidden_size": 2048,
	"initializer_range": 0.02,
	"intermediate_size": 6144,
	"max_position_embeddings": 40960,
	"max_window_layers": 48,
	"mlp_only_layers": [],
	"model_type": "qwen3_moe",
	"moe_intermediate_size": 768,
	"norm_topk_prob": true,
	"num_attention_heads": 32,
	"num_experts": 128,
	"num_experts_per_tok": 8,
	"num_hidden_layers": 48,
	"num_key_value_heads": 4,
	"output_router_logits": false,
	"rms_norm_eps": 1e-06,
	"rope_scaling": null,
	"rope_theta": 1000000.0,
	"router_aux_loss_coef": 0.001,
	"sliding_window": null,
	"tie_word_embeddings": false,
	"torch_dtype": "bfloat16",
	"transformers_version": "4.51.0",
	"use_cache": true,
	"use_sliding_window": false,
	"vocab_size": 151936,
	"conversion_info": {
	"source_hetero_v2": "qwen3-30b-mlx-hetero-v2",
	"source_q4": "qwen3-30b-mlx-q4",
	"source_fp16_hf": null,
	"target_format": "heterogeneous_v3",
	"coding_experts": [
	21,
	27,
	31,
	43,
	59,
	66,
	71,
	113,
	126
	],
	"coding_bits": 16,
	"non_coding_bits": 4,
	"description": "Hetero v3: FP16 attention/router/lm_head + FP16 coding experts + Q4 non-coding experts. mlx_lm compatible.",
	"quantization_info": {
	"coding_experts": {
	"expert_ids": [
	21,
	27,
	31,
	43,
	59,
	66,
	71,
	113,
	126
	],
	"bits": 16,
	"format": "fp16",
	"note": "User selected FP16 for code quality"
	},
	"non_coding_experts": {
	"expert_ids": [
	0,
	1,
	2,
	3,
	4,
	5,
	6,
	7,
	8,
	9,
	10,
	11,
	12,
	13,
	14,
	15,
	16,
	17,
	18,
	19,
	20,
	22,
	23,
	24,
	25,
	26,
	28,
	29,
	30,
	32,
	33,
	34,
	35,
	36,
	37,
	38,
	39,
	40,
	41,
	42,
	44,
	45,
	46,
	47,
	48,
	49,
	50,
	51,
	52,
	53,
	54,
	55,
	56,
	57,
	58,
	60,
	61,
	62,
	63,
	64,
	65,
	67,
	68,
	69,
	70,
	72,
	73,
	74,
	75,
	76,
	77,
	78,
	79,
	80,
	81,
	82,
	83,
	84,
	85,
	86,
	87,
	88,
	89,
	90,
	91,
	92,
	93,
	94,
	95,
	96,
	97,
	98,
	99,
	100,
	101,
	102,
	103,
	104,
	105,
	106,
	107,
	108,
	109,
	110,
	111,
	112,
	114,
	115,
	116,
	117,
	118,
	119,
	120,
	121,
	122,
	123,
	124,
	125,
	127
	],
	"bits": 4,
	"group_size": 64,
	"format": "quantized"
	},
	"attention": {
	"bits": 16,
	"note": "User selected FP16 for better quality"
	},
	"router": {
	"bits": 16,
	"note": "User selected FP16 for accurate expert selection"
	},
	"lm_head": {
	"bits": 16,
	"note": "User selected FP16 for better token distributions"
	}
	}
	},
	"model_variant": "hetero_v3",
	"_name_or_path": "qwen3-30b-mlx-hetero-v3"
	}