cyankiwi
/

Qwen3-Coder-Next-AWQ-4bit

Text Generation

compressed-tensors

Model card Files Files and versions

Qwen3-Coder-Next-AWQ-4bit / recipe.yaml

cpatonn's picture

Upload folder using huggingface_hub

fa29333 verified about 1 month ago

history blame contribute delete

1.45 kB

	default_stage:
	default_modifiers:
	AWQModifier:
	config_groups:
	group_0:
	targets: [Linear]
	weights:
	num_bits: 4
	type: int
	symmetric: true
	group_size: 32
	strategy: group
	block_structure: null
	dynamic: false
	actorder: null
	scale_dtype: null
	zp_dtype: null
	observer: mse
	observer_kwargs: {}
	input_activations: null
	output_activations: null
	format: null
	targets: [Linear]
	ignore: [model.embed_tokens, 're:.linear_attn.', 're:.norm.', 're:.shared_expert.',
	're:.shared_expert_gate$', 're:.mlp[.]gate$', 're:.router.', 're:.self_attn.',
	lm_head]
	bypass_divisibility_checks: false
	mappings:
	- smooth_layer: re:.*post_attention_layernorm
	balance_layers: ['re:.mlp.gate', 're:.mlp.shared_expert.gate_proj', 're:.*mlp.shared_expert.up_proj',
	're:.mlp.shared_expert_gate', 're:.mlp.experts.gate_proj', 're:.mlp.experts.*up_proj']
	activation_hook_target: null
	balance_exponent: 1
	- smooth_layer: re:.mlp.experts.up_proj
	balance_layers: ['re:.mlp.experts.down_proj']
	activation_hook_target: null
	balance_exponent: 1
	offload_device: !!python/object/apply:torch.device [cpu]
	duo_scaling: true
	n_grid: 20