| default_stage: | |
| default_modifiers: | |
| SmoothQuantModifier: | |
| smoothing_strength: 0.8 | |
| mappings: | |
| - - ['re:.*self_attn.q_proj', 're:.*self_attn.k_proj', 're:.*self_attn.v_proj'] | |
| - re:.*input_layernorm | |
| - - ['re:.*shared_mlp.input_linear', 're:.*block_sparse_moe\.input_linear\.experts\.\d+'] | |
| - re:.*post_attention_layernorm | |
| ignore: [] | |
| GPTQModifier: | |
| targets: [Linear] | |
| ignore: [lm_head, 're:.*block_sparse_moe.router', 're:.*mamba.in_proj', 're:.*mamba.out_proj'] | |
| scheme: W8A8 | |
| block_size: 128 | |
| dampening_frac: 0.01 | |
| actorder: static | |
| offload_hessians: false | |