JiYeaha commited on
Commit
8b120a8
·
verified ·
1 Parent(s): 6b62e81

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. model.safetensors +1 -1
  2. recipe.yaml +13 -6
  3. tokenizer.json +1 -1
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7db39ec8451954e0907d096e0cf1b50bbef5570d4b053e256673b4c3260be691
3
  size 1909656064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcce99c37b6894f580400d2d87aa76ec25c2a7b12d7b60a3cc689fcd4e1523d9
3
  size 1909656064
recipe.yaml CHANGED
@@ -1,14 +1,18 @@
1
  default_stage:
2
  default_modifiers:
3
  SmoothQuantModifier:
4
- smoothing_strength: 0.8
5
  mappings:
6
- - - - .*norm.*
7
- - [.*(q|k|v)_proj]
 
 
 
 
8
  ignore: []
9
- QuantizationModifier:
10
  targets: [Linear]
11
- ignore: [embed_tokens, lm_head, '']
12
  scheme: W8A8
13
  kv_cache_scheme:
14
  num_bits: 4
@@ -23,4 +27,7 @@ default_stage:
23
  zp_dtype: null
24
  observer: minmax
25
  observer_kwargs: {}
26
- observer: {weights: minmax, input: minmax}
 
 
 
 
1
  default_stage:
2
  default_modifiers:
3
  SmoothQuantModifier:
4
+ smoothing_strength: 0.5
5
  mappings:
6
+ - - - re:.*ln_f
7
+ - [.*proj]
8
+ - - - re:.*attention_norm
9
+ - [.*q_proj, .*k_proj, .*v_proj]
10
+ - - - re:.*ffn_norm
11
+ - [.*gate_proj, .*up_proj]
12
  ignore: []
13
+ GPTQModifier:
14
  targets: [Linear]
15
+ ignore: [embed_tokens, lm_head]
16
  scheme: W8A8
17
  kv_cache_scheme:
18
  num_bits: 4
 
27
  zp_dtype: null
28
  observer: minmax
29
  observer_kwargs: {}
30
+ block_size: 128
31
+ dampening_frac: 0.01
32
+ actorder: static
33
+ offload_hessians: false
tokenizer.json CHANGED
@@ -2,7 +2,7 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 512,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 1024,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },