mgoin commited on
Commit
89edf70
·
1 Parent(s): 78830c6

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. recipe.yaml +49 -0
recipe.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ test_stage:
2
+ obcq_modifiers:
3
+ LogarithmicEqualizationModifier:
4
+ mappings: [
5
+ [["re:.*q_proj", "re:.*k_proj", "re:.*v_proj"], "re:.*input_layernorm"],
6
+ [["re:.*gate_proj", "re:.*up_proj"], "re:.*post_attention_layernorm"],
7
+ ]
8
+ QuantizationModifier:
9
+ ignore:
10
+ # These operations don't make sense to quantize
11
+ - LlamaRotaryEmbedding
12
+ - LlamaRMSNorm
13
+ - SiLUActivation
14
+ # Skip quantizing the layers with the most sensitive activations
15
+ - model.layers.3.mlp.down_proj
16
+ - model.layers.38.mlp.down_proj
17
+ - model.layers.39.mlp.down_proj
18
+ - model.layers.0.mlp.down_proj
19
+ - model.layers.37.mlp.down_proj
20
+ - MatMulOutput_QK
21
+ - MatMulOutput_PV
22
+ post_oneshot_calibration: true
23
+ scheme_overrides:
24
+ Linear:
25
+ weights:
26
+ num_bits: 8
27
+ symmetric: true
28
+ strategy: channel
29
+ MatMulLeftInput_QK:
30
+ input_activations:
31
+ num_bits: 8
32
+ symmetric: true
33
+ MatMulLeftInput_PV:
34
+ input_activations:
35
+ num_bits: 8
36
+ symmetric: true
37
+ Embedding:
38
+ input_activations: null
39
+ weights:
40
+ num_bits: 8
41
+ symmetric: false
42
+ SparseGPTModifier:
43
+ sparsity: 0.0
44
+ block_size: 128
45
+ sequential_update: true
46
+ quantize: true
47
+ percdamp: 0.01
48
+ mask_structure: "0:0"
49
+ targets: ["re:model.layers.\\d*$"]