inference-optimization
/

test_qwen3_next_mtp

Model card Files Files and versions

test_qwen3_next_mtp / config.json

RelaxingSnorlax's picture

RelaxingSnorlax

Upload config.json with huggingface_hub

a79f558 verified 4 days ago

history blame contribute delete

1.2 kB

	{
	"speculators_model_type": "mtp",
	"architectures": [
	"MTPSpeculator"
	],
	"num_speculative_steps": 3,
	"num_nextn_predict_layers": 1,
	"mtp_loss_step_weights": [
	0.51,
	0.31,
	0.18
	],
	"hidden_size": 2048,
	"intermediate_size": 5120,
	"num_attention_heads": 16,
	"num_key_value_heads": 2,
	"vocab_size": 151936,
	"max_position_embeddings": 262144,
	"rms_norm_eps": 1e-06,
	"transformer_config": {
	"model_type": "qwen3_next",
	"vocab_size": 151936,
	"hidden_size": 2048,
	"intermediate_size": 5120,
	"num_hidden_layers": 1,
	"num_attention_heads": 16,
	"num_key_value_heads": 2,
	"max_position_embeddings": 262144,
	"rms_norm_eps": 1e-06,
	"rope_theta": 10000000,
	"tie_word_embeddings": false,
	"attention_bias": true
	},
	"speculators_config": {
	"algorithm": "mtp",
	"proposal_methods": [
	{
	"proposal_type": "greedy",
	"speculative_tokens": 3,
	"verifier_accept_k": 1,
	"accept_tolerance": 0.0
	}
	],
	"default_proposal_method": "greedy",
	"verifier": {
	"name_or_path": "Qwen/Qwen3-Next-80B-A3B-Instruct",
	"architectures": ["Qwen3MoeForCausalLM"]
	}
	}
	}