File size: 235 Bytes
231c5f2
 
 
 
 
 
 
1
2
3
4
5
6
7
8
default_stage:
  default_modifiers:
    QuantizationModifier:
      targets: [Linear]
      ignore: [lm_head, model.embed_tokens, 're:.*input_layernorm$', 're:.*post_attention_layernorm$',
        model.norm]
      scheme: FP8_DYNAMIC