JiYeaha commited on
Commit
6b62e81
·
verified ·
1 Parent(s): 3a7fbe7

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. config.json +1 -10
  2. model.safetensors +2 -2
  3. recipe.yaml +3 -10
config.json CHANGED
@@ -90,15 +90,6 @@
90
  "format": "int-quantized",
91
  "global_compression_ratio": null,
92
  "ignore": [
93
- "model.layers.27.mlp.gate_proj",
94
- "model.layers.27.mlp.up_proj",
95
- "model.layers.27.mlp.down_proj",
96
- "model.layers.28.mlp.gate_proj",
97
- "model.layers.28.mlp.up_proj",
98
- "model.layers.28.mlp.down_proj",
99
- "model.layers.29.mlp.gate_proj",
100
- "model.layers.29.mlp.up_proj",
101
- "model.layers.29.mlp.down_proj",
102
  "lm_head"
103
  ],
104
  "kv_cache_scheme": {
@@ -106,7 +97,7 @@
106
  "block_structure": null,
107
  "dynamic": false,
108
  "group_size": null,
109
- "num_bits": 8,
110
  "observer": "minmax",
111
  "observer_kwargs": {},
112
  "scale_dtype": null,
 
90
  "format": "int-quantized",
91
  "global_compression_ratio": null,
92
  "ignore": [
 
 
 
 
 
 
 
 
 
93
  "lm_head"
94
  ],
95
  "kv_cache_scheme": {
 
97
  "block_structure": null,
98
  "dynamic": false,
99
  "group_size": null,
100
+ "num_bits": 4,
101
  "observer": "minmax",
102
  "observer_kwargs": {},
103
  "scale_dtype": null,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da147ceac30e7a9680fd4ac87f8633e40eeba4a024659b4b5c34a94775440a37
3
- size 1985091120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7db39ec8451954e0907d096e0cf1b50bbef5570d4b053e256673b4c3260be691
3
+ size 1909656064
recipe.yaml CHANGED
@@ -6,15 +6,12 @@ default_stage:
6
  - - - .*norm.*
7
  - [.*(q|k|v)_proj]
8
  ignore: []
9
- GPTQModifier:
10
  targets: [Linear]
11
- ignore: [embed_tokens, lm_head, model.layers.27.mlp.gate_proj, model.layers.27.mlp.up_proj,
12
- model.layers.27.mlp.down_proj, model.layers.28.mlp.gate_proj, model.layers.28.mlp.up_proj,
13
- model.layers.28.mlp.down_proj, model.layers.29.mlp.gate_proj, model.layers.29.mlp.up_proj,
14
- model.layers.29.mlp.down_proj]
15
  scheme: W8A8
16
  kv_cache_scheme:
17
- num_bits: 8
18
  type: int
19
  symmetric: true
20
  group_size: null
@@ -27,7 +24,3 @@ default_stage:
27
  observer: minmax
28
  observer_kwargs: {}
29
  observer: {weights: minmax, input: minmax}
30
- block_size: 128
31
- dampening_frac: 0.01
32
- actorder: static
33
- offload_hessians: false
 
6
  - - - .*norm.*
7
  - [.*(q|k|v)_proj]
8
  ignore: []
9
+ QuantizationModifier:
10
  targets: [Linear]
11
+ ignore: [embed_tokens, lm_head, '']
 
 
 
12
  scheme: W8A8
13
  kv_cache_scheme:
14
+ num_bits: 4
15
  type: int
16
  symmetric: true
17
  group_size: null
 
24
  observer: minmax
25
  observer_kwargs: {}
26
  observer: {weights: minmax, input: minmax}