File size: 1,097 Bytes
7bde7d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
test_stage:
  obcq_modifiers:
    LogarithmicEqualizationModifier:
      mappings:
      - - ['re:.*q_proj', 're:.*k_proj', 're:.*v_proj']
        - re:.*input_layernorm
      - - ['re:.*gate_proj', 're:.*up_proj']
        - re:.*post_attention_layernorm
    QuantizationModifier:
      ignore: [LlamaRotaryEmbedding, LlamaRMSNorm, SiLUActivation, MatMulOutput_QK, MatMulOutput_PV,
        model.layers.21.mlp.down_proj, model.layers.7.mlp.down_proj, model.layers.2.mlp.down_proj,
        model.layers.8.self_attn.q_proj, model.layers.8.self_attn.k_proj]
      post_oneshot_calibration: true
      scheme_overrides:
        Linear:
          weights: {num_bits: 8, symmetric: true, strategy: channel}
        MatMulLeftInput_QK:
          input_activations: {num_bits: 8, symmetric: true}
        Embedding:
          input_activations: null
          weights: {num_bits: 8, symmetric: false}
    SparseGPTModifier:
      sparsity: 0.5
      block_size: 128
      sequential_update: false
      quantize: true
      percdamp: 0.01
      mask_structure: 0:0
      targets: ['re:model.layers.\d*$']