| default_stage: | |
| default_modifiers: | |
| AWQModifier: | |
| targets: [Linear] | |
| ignore: ['re:.*lm_head$', 're:.*self_attn\.', 're:^model\.layers\.0\.'] | |
| scheme: W4A16 | |
| bypass_divisibility_checks: false | |
| mappings: | |
| - smooth_layer: re:post_attention_layernorm$ | |
| balance_layers: ['re:mlp\.experts\.\d+\.gate_proj$', 're:mlp\.experts\.\d+\.up_proj$'] | |
| activation_hook_target: null | |
| - smooth_layer: re:mlp\.experts\.\d+\.up_proj$ | |
| balance_layers: ['re:mlp\.experts\.\d+\.down_proj$'] | |
| activation_hook_target: null | |
| - smooth_layer: re:post_attention_layernorm$ | |
| balance_layers: ['re:mlp\.shared_experts\.gate_proj$', 're:mlp\.shared_experts\.up_proj$'] | |
| activation_hook_target: null | |
| - smooth_layer: re:mlp\.shared_experts\.up_proj$ | |
| balance_layers: ['re:mlp\.shared_experts\.down_proj$'] | |
| activation_hook_target: null | |
| offload_device: cpu | |
| duo_scaling: true | |
| n_grid: 20 | |