JiYeaha commited on
Commit
3a7fbe7
·
verified ·
1 Parent(s): f6eeb72

Upload quantized EXAONE model

Browse files
Files changed (3) hide show
  1. config.json +9 -0
  2. model.safetensors +2 -2
  3. recipe.yaml +9 -2
config.json CHANGED
@@ -90,6 +90,15 @@
90
  "format": "int-quantized",
91
  "global_compression_ratio": null,
92
  "ignore": [
 
 
 
 
 
 
 
 
 
93
  "lm_head"
94
  ],
95
  "kv_cache_scheme": {
 
90
  "format": "int-quantized",
91
  "global_compression_ratio": null,
92
  "ignore": [
93
+ "model.layers.27.mlp.gate_proj",
94
+ "model.layers.27.mlp.up_proj",
95
+ "model.layers.27.mlp.down_proj",
96
+ "model.layers.28.mlp.gate_proj",
97
+ "model.layers.28.mlp.up_proj",
98
+ "model.layers.28.mlp.down_proj",
99
+ "model.layers.29.mlp.gate_proj",
100
+ "model.layers.29.mlp.up_proj",
101
+ "model.layers.29.mlp.down_proj",
102
  "lm_head"
103
  ],
104
  "kv_cache_scheme": {
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8dc16c2492096a6d724994f4e1ae746d323b9d2a32a9c3788e50b7094d7ba5f1
3
- size 1909656064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da147ceac30e7a9680fd4ac87f8633e40eeba4a024659b4b5c34a94775440a37
3
+ size 1985091120
recipe.yaml CHANGED
@@ -6,9 +6,12 @@ default_stage:
6
  - - - .*norm.*
7
  - [.*(q|k|v)_proj]
8
  ignore: []
9
- QuantizationModifier:
10
  targets: [Linear]
11
- ignore: [embed_tokens, lm_head]
 
 
 
12
  scheme: W8A8
13
  kv_cache_scheme:
14
  num_bits: 8
@@ -24,3 +27,7 @@ default_stage:
24
  observer: minmax
25
  observer_kwargs: {}
26
  observer: {weights: minmax, input: minmax}
 
 
 
 
 
6
  - - - .*norm.*
7
  - [.*(q|k|v)_proj]
8
  ignore: []
9
+ GPTQModifier:
10
  targets: [Linear]
11
+ ignore: [embed_tokens, lm_head, model.layers.27.mlp.gate_proj, model.layers.27.mlp.up_proj,
12
+ model.layers.27.mlp.down_proj, model.layers.28.mlp.gate_proj, model.layers.28.mlp.up_proj,
13
+ model.layers.28.mlp.down_proj, model.layers.29.mlp.gate_proj, model.layers.29.mlp.up_proj,
14
+ model.layers.29.mlp.down_proj]
15
  scheme: W8A8
16
  kv_cache_scheme:
17
  num_bits: 8
 
27
  observer: minmax
28
  observer_kwargs: {}
29
  observer: {weights: minmax, input: minmax}
30
+ block_size: 128
31
+ dampening_frac: 0.01
32
+ actorder: static
33
+ offload_hessians: false