| DEFAULT_stage: | |
| DEFAULT_modifiers: | |
| SparseGPTModifier: | |
| sparsity: 0.5 | |
| mask_structure: '2:4' | |
| sequential_update: true | |
| targets: ['re:model.layers.\d*$'] | |
| QuantizationModifier: | |
| ignore: [lm_head] | |
| targets: [Linear] | |
| scheme: FP8_DYNAMIC | |
| ConstantPruningModifier: | |
| start: 0.0 | |
| targets: ['re:.*q_proj.weight', 're:.*k_proj.weight', 're:.*v_proj.weight', 're:.*o_proj.weight', | |
| 're:.*gate_proj.weight', 're:.*up_proj.weight', 're:.*down_proj.weight'] | |