quant_stage: quant_modifiers: AWQModifier: mappings: - smooth_layer: re:.*input_layernorm$ balance_layers: ['re:.*q_proj$', 're:.*k_proj$', 're:.*v_proj$'] activation_hook_target: null duo_scaling: true n_grid: 20 QuantizationModifier: targets: ['re:.*self_attn\.(k|q|o|v)_proj$'] ignore: [lm_head] scheme: W4A16 bypass_divisibility_checks: false GPTQModifier: targets: ['re:.*mlp\.(down|gate|up)_proj$'] ignore: [lm_head] scheme: W8A8 bypass_divisibility_checks: false block_size: 128 dampening_frac: 0.001 offload_hessians: false