| { |
| "architectures": [ |
| "Qwen3MoeForCausalLM" |
| ], |
| "attention_bias": false, |
| "attention_dropout": 0.0, |
| "bos_token_id": 151643, |
| "decoder_sparse_step": 1, |
| "eos_token_id": 151645, |
| "head_dim": 128, |
| "hidden_act": "silu", |
| "hidden_size": 2048, |
| "initializer_range": 0.02, |
| "intermediate_size": 6144, |
| "max_position_embeddings": 40960, |
| "max_window_layers": 48, |
| "mlp_only_layers": [], |
| "model_type": "qwen3_moe", |
| "moe_intermediate_size": 768, |
| "norm_topk_prob": true, |
| "num_attention_heads": 32, |
| "num_experts": 128, |
| "num_experts_per_tok": 8, |
| "num_hidden_layers": 48, |
| "num_key_value_heads": 4, |
| "output_router_logits": false, |
| "rms_norm_eps": 1e-06, |
| "rope_scaling": null, |
| "rope_theta": 1000000.0, |
| "router_aux_loss_coef": 0.001, |
| "sliding_window": null, |
| "tie_word_embeddings": false, |
| "torch_dtype": "bfloat16", |
| "transformers_version": "4.51.0", |
| "use_cache": true, |
| "use_sliding_window": false, |
| "vocab_size": 151936, |
| "conversion_info": { |
| "source_hetero_v2": "qwen3-30b-mlx-hetero-v2", |
| "source_q4": "qwen3-30b-mlx-q4", |
| "source_fp16_hf": null, |
| "target_format": "heterogeneous_v3", |
| "coding_experts": [ |
| 21, |
| 27, |
| 31, |
| 43, |
| 59, |
| 66, |
| 71, |
| 113, |
| 126 |
| ], |
| "coding_bits": 16, |
| "non_coding_bits": 4, |
| "description": "Hetero v3: FP16 attention/router/lm_head + FP16 coding experts + Q4 non-coding experts. mlx_lm compatible.", |
| "quantization_info": { |
| "coding_experts": { |
| "expert_ids": [ |
| 21, |
| 27, |
| 31, |
| 43, |
| 59, |
| 66, |
| 71, |
| 113, |
| 126 |
| ], |
| "bits": 16, |
| "format": "fp16", |
| "note": "User selected FP16 for code quality" |
| }, |
| "non_coding_experts": { |
| "expert_ids": [ |
| 0, |
| 1, |
| 2, |
| 3, |
| 4, |
| 5, |
| 6, |
| 7, |
| 8, |
| 9, |
| 10, |
| 11, |
| 12, |
| 13, |
| 14, |
| 15, |
| 16, |
| 17, |
| 18, |
| 19, |
| 20, |
| 22, |
| 23, |
| 24, |
| 25, |
| 26, |
| 28, |
| 29, |
| 30, |
| 32, |
| 33, |
| 34, |
| 35, |
| 36, |
| 37, |
| 38, |
| 39, |
| 40, |
| 41, |
| 42, |
| 44, |
| 45, |
| 46, |
| 47, |
| 48, |
| 49, |
| 50, |
| 51, |
| 52, |
| 53, |
| 54, |
| 55, |
| 56, |
| 57, |
| 58, |
| 60, |
| 61, |
| 62, |
| 63, |
| 64, |
| 65, |
| 67, |
| 68, |
| 69, |
| 70, |
| 72, |
| 73, |
| 74, |
| 75, |
| 76, |
| 77, |
| 78, |
| 79, |
| 80, |
| 81, |
| 82, |
| 83, |
| 84, |
| 85, |
| 86, |
| 87, |
| 88, |
| 89, |
| 90, |
| 91, |
| 92, |
| 93, |
| 94, |
| 95, |
| 96, |
| 97, |
| 98, |
| 99, |
| 100, |
| 101, |
| 102, |
| 103, |
| 104, |
| 105, |
| 106, |
| 107, |
| 108, |
| 109, |
| 110, |
| 111, |
| 112, |
| 114, |
| 115, |
| 116, |
| 117, |
| 118, |
| 119, |
| 120, |
| 121, |
| 122, |
| 123, |
| 124, |
| 125, |
| 127 |
| ], |
| "bits": 4, |
| "group_size": 64, |
| "format": "quantized" |
| }, |
| "attention": { |
| "bits": 16, |
| "note": "User selected FP16 for better quality" |
| }, |
| "router": { |
| "bits": 16, |
| "note": "User selected FP16 for accurate expert selection" |
| }, |
| "lm_head": { |
| "bits": 16, |
| "note": "User selected FP16 for better token distributions" |
| } |
| } |
| }, |
| "model_variant": "hetero_v3", |
| "_name_or_path": "qwen3-30b-mlx-hetero-v3" |
| } |