SubSir's picture
Upload folder using huggingface_hub
e0e0d56 verified
default_stage:
default_modifiers:
AWQModifier:
targets: [Linear]
ignore: [lm_head, 're:.*linear_attn.in_proj_b$', 're:.*linear_attn.in_proj_a$']
scheme: W4A16_ASYM
bypass_divisibility_checks: false
mappings:
- smooth_layer: re:.*layers\.(3|7|11|15|19|23|27|31|35|39|43|47|51|55|59|63)\.input_layernorm$
balance_layers: ['re:.*self_attn.q_proj$', 're:.*self_attn.k_proj$', 're:.*self_attn.v_proj$']
activation_hook_target: null
- smooth_layer: re:.*self_attn.v_proj$
balance_layers: ['re:.*self_attn.o_proj$']
activation_hook_target: null
- smooth_layer: re:.*layers\.(0|1|2|4|5|6|8|9|10|12|13|14|16|17|18|20|21|22|24|25|26|28|29|30|32|33|34|36|37|38|40|41|42|44|45|46|48|49|50|52|53|54|56|57|58|60|61|62)\.input_layernorm$
balance_layers: ['re:.*linear_attn.in_proj_qkv$', 're:.*linear_attn.in_proj_z$', 're:.*linear_attn.in_proj_b$',
're:.*linear_attn.in_proj_a$']
activation_hook_target: null
- smooth_layer: re:.*post_attention_layernorm$
balance_layers: ['re:.*mlp.gate_proj$', 're:.*mlp.up_proj$']
activation_hook_target: null
- smooth_layer: re:.*up_proj$
balance_layers: ['re:.*down_proj$']
activation_hook_target: null
offload_device: !!python/object/apply:torch.device [cpu]
duo_scaling: both
n_grid: 20