sarvam-105b-AWQ / recipe.yaml
JunHowie's picture
Add files using upload-large-folder tool
592444d verified
default_stage:
default_modifiers:
AWQModifier:
targets: [Linear]
ignore: ['re:.*lm_head$', 're:.*self_attn\.', 're:^model\.layers\.0\.']
scheme: W4A16
bypass_divisibility_checks: false
mappings:
- smooth_layer: re:post_attention_layernorm$
balance_layers: ['re:mlp\.experts\.\d+\.gate_proj$', 're:mlp\.experts\.\d+\.up_proj$']
activation_hook_target: null
- smooth_layer: re:mlp\.experts\.\d+\.up_proj$
balance_layers: ['re:mlp\.experts\.\d+\.down_proj$']
activation_hook_target: null
- smooth_layer: re:post_attention_layernorm$
balance_layers: ['re:mlp\.shared_experts\.gate_proj$', 're:mlp\.shared_experts\.up_proj$']
activation_hook_target: null
- smooth_layer: re:mlp\.shared_experts\.up_proj$
balance_layers: ['re:mlp\.shared_experts\.down_proj$']
activation_hook_target: null
offload_device: cpu
duo_scaling: true
n_grid: 20