CarlosRCDev commited on
Commit
0c77c17
·
verified ·
1 Parent(s): b354e4b

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -36,9 +36,11 @@
36
  "num_bits": 4,
37
  "observer": "memoryless_minmax",
38
  "observer_kwargs": {},
 
39
  "strategy": "group",
40
- "symmetric": true,
41
- "type": "int"
 
42
  }
43
  }
44
  },
 
36
  "num_bits": 4,
37
  "observer": "memoryless_minmax",
38
  "observer_kwargs": {},
39
+ "scale_dtype": null,
40
  "strategy": "group",
41
+ "symmetric": false,
42
+ "type": "int",
43
+ "zp_dtype": "torch.int8"
44
  }
45
  }
46
  },
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4b6705eaecf338922cba8daea69fce84ff315c225cba3a0a968656229810bc2
3
- size 4992538160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e574c206e93c42a13ce226c8d125896933572ba7b72e3028503f1231525444f7
3
+ size 4966132840
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45b281b6559df36c0e590901a15e1a7a1e82250020c9047a40db8852abbb1d1f
3
- size 4977002992
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00e58997ec624729d00bd922983c1bccd02ab7a869bf07eef38145acbc2f6744
3
+ size 4962450680
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0843afa21724351b9520411ba0e5f06837c25b594399c5b74d347607c183695
3
- size 4038693632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4334851eef580bc70403fa2326dbfa0edb869ae23e16d744c083bdca68f5818d
3
+ size 4161978264
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff
 
recipe.yaml CHANGED
@@ -1,7 +1,22 @@
1
  default_stage:
2
  default_modifiers:
3
- QuantizationModifier:
4
  targets: [Linear]
5
  ignore: [lm_head]
6
- scheme: W4A16
7
  bypass_divisibility_checks: false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  default_stage:
2
  default_modifiers:
3
+ AWQModifier:
4
  targets: [Linear]
5
  ignore: [lm_head]
6
+ scheme: W4A16_ASYM
7
  bypass_divisibility_checks: false
8
+ mappings:
9
+ - smooth_layer: re:.*input_layernorm
10
+ balance_layers: ['re:.*q_proj', 're:.*k_proj', 're:.*v_proj']
11
+ activation_hook_target: null
12
+ - smooth_layer: re:.*v_proj
13
+ balance_layers: ['re:.*o_proj']
14
+ activation_hook_target: null
15
+ - smooth_layer: re:.*post_attention_layernorm
16
+ balance_layers: ['re:.*gate_proj', 're:.*up_proj']
17
+ activation_hook_target: null
18
+ - smooth_layer: re:.*up_proj
19
+ balance_layers: ['re:.*down_proj']
20
+ activation_hook_target: null
21
+ duo_scaling: true
22
+ n_grid: 20
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d869676e23a78b395eb0308cff52fd9c3ea7d52597627360cdb97407bb0b02b8
3
- size 15783075
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d6fc6c24a257545cb8bc93f6da21ac69148cc5c36c5d3fd00eaaabf8facec17
3
+ size 15783173