File size: 641 Bytes
5cae4ef
 
 
 
 
 
 
 
 
 
 
 
5e30ead
5cae4ef
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
default_stage:
  default_modifiers:
    SmoothQuantModifier:
      smoothing_strength: 0.8
      mappings:
      - - ['re:.*self_attn.q_proj', 're:.*self_attn.k_proj', 're:.*self_attn.v_proj']
        - re:.*input_layernorm
      - - ['re:.*shared_mlp.input_linear', 're:.*block_sparse_moe\.input_linear\.experts\.\d+']
        - re:.*post_attention_layernorm
      ignore: []
    GPTQModifier:
      targets: [Linear]
      ignore: [lm_head, 're:.*block_sparse_moe.router', 're:.*mamba.in_proj', 're:.*mamba.out_proj']
      scheme: W8A8
      block_size: 128
      dampening_frac: 0.01
      actorder: static
      offload_hessians: false