| default_stage: | |
| default_modifiers: | |
| AWQModifier: | |
| targets: [Linear] | |
| ignore: [lm_head, 're:.*linear_attn.in_proj_b$', 're:.*linear_attn.in_proj_a$'] | |
| scheme: W4A16_ASYM | |
| bypass_divisibility_checks: false | |
| mappings: | |
| - smooth_layer: re:.*layers\.(3|7|11|15|19|23|27|31|35|39|43|47|51|55|59|63)\.input_layernorm$ | |
| balance_layers: ['re:.*self_attn.q_proj$', 're:.*self_attn.k_proj$', 're:.*self_attn.v_proj$'] | |
| activation_hook_target: null | |
| - smooth_layer: re:.*self_attn.v_proj$ | |
| balance_layers: ['re:.*self_attn.o_proj$'] | |
| activation_hook_target: null | |
| - smooth_layer: re:.*layers\.(0|1|2|4|5|6|8|9|10|12|13|14|16|17|18|20|21|22|24|25|26|28|29|30|32|33|34|36|37|38|40|41|42|44|45|46|48|49|50|52|53|54|56|57|58|60|61|62)\.input_layernorm$ | |
| balance_layers: ['re:.*linear_attn.in_proj_qkv$', 're:.*linear_attn.in_proj_z$', 're:.*linear_attn.in_proj_b$', | |
| 're:.*linear_attn.in_proj_a$'] | |
| activation_hook_target: null | |
| - smooth_layer: re:.*post_attention_layernorm$ | |
| balance_layers: ['re:.*mlp.gate_proj$', 're:.*mlp.up_proj$'] | |
| activation_hook_target: null | |
| - smooth_layer: re:.*up_proj$ | |
| balance_layers: ['re:.*down_proj$'] | |
| activation_hook_target: null | |
| offload_device: !!python/object/apply:torch.device [cpu] | |
| duo_scaling: both | |
| n_grid: 20 | |