QuantTrio
/

sarvam-105b-AWQ

Text Generation

compressed-tensors

Model card Files Files and versions

sarvam-105b-AWQ / recipe.yaml

JunHowie's picture

Add files using upload-large-folder tool

592444d verified about 2 months ago

history blame contribute delete

974 Bytes

	default_stage:
	default_modifiers:
	AWQModifier:
	targets: [Linear]
	ignore: ['re:.lm_head$', 're:.self_attn\.', 're:^model\.layers\.0\.']
	scheme: W4A16
	bypass_divisibility_checks: false
	mappings:
	- smooth_layer: re:post_attention_layernorm$
	balance_layers: ['re:mlp\.experts\.\d+\.gate_proj$', 're:mlp\.experts\.\d+\.up_proj$']
	activation_hook_target: null
	- smooth_layer: re:mlp\.experts\.\d+\.up_proj$
	balance_layers: ['re:mlp\.experts\.\d+\.down_proj$']
	activation_hook_target: null
	- smooth_layer: re:post_attention_layernorm$
	balance_layers: ['re:mlp\.shared_experts\.gate_proj$', 're:mlp\.shared_experts\.up_proj$']
	activation_hook_target: null
	- smooth_layer: re:mlp\.shared_experts\.up_proj$
	balance_layers: ['re:mlp\.shared_experts\.down_proj$']
	activation_hook_target: null
	offload_device: cpu
	duo_scaling: true
	n_grid: 20