Crusadersk commited on
Commit
4d7ab2b
·
verified ·
1 Parent(s): 9b20d9b

Self-quantized phi-2-gptq 4-bit (group_size=128, seed=42)

Browse files
config.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "PhiForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 50256,
7
+ "dtype": "float16",
8
+ "embd_pdrop": 0.0,
9
+ "eos_token_id": 50256,
10
+ "hidden_act": "gelu_new",
11
+ "hidden_size": 2560,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 10240,
14
+ "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 2048,
16
+ "model_type": "phi",
17
+ "num_attention_heads": 32,
18
+ "num_hidden_layers": 32,
19
+ "num_key_value_heads": 32,
20
+ "pad_token_id": 50256,
21
+ "partial_rotary_factor": 0.4,
22
+ "qk_layernorm": false,
23
+ "quantization_config": {
24
+ "bits": 4,
25
+ "checkpoint_format": "gptq",
26
+ "desc_act": false,
27
+ "format": "gptq",
28
+ "group_size": 128,
29
+ "lm_head": false,
30
+ "meta": {
31
+ "act_group_aware": true,
32
+ "auto_forward_data_parallel": true,
33
+ "damp_auto_increment": 0.01,
34
+ "damp_percent": 0.05,
35
+ "failsafe": {
36
+ "smooth": null,
37
+ "strategy": "rtn",
38
+ "threshold": "0.5%"
39
+ },
40
+ "gc_mode": "interval",
41
+ "gptaq": null,
42
+ "hessian": {
43
+ "chunk_bytes": null,
44
+ "chunk_size": null,
45
+ "staging_dtype": "float32"
46
+ },
47
+ "mock_quantization": false,
48
+ "mse": 0.0,
49
+ "offload_to_disk": true,
50
+ "offload_to_disk_path": "./gptqmodel_offload/bxitqbrw-vnczleuk/",
51
+ "pack_impl": "cpu",
52
+ "quantizer": [
53
+ "gptqmodel:5.8.0"
54
+ ],
55
+ "static_groups": false,
56
+ "true_sequential": true,
57
+ "uri": "https://github.com/modelcloud/gptqmodel",
58
+ "vram_strategy": "exclusive",
59
+ "wait_for_submodule_finalizers": false
60
+ },
61
+ "pack_dtype": "int32",
62
+ "quant_method": "gptq",
63
+ "sym": true
64
+ },
65
+ "resid_pdrop": 0.1,
66
+ "rope_parameters": {
67
+ "partial_rotary_factor": 0.4,
68
+ "rope_theta": 10000.0,
69
+ "rope_type": "default"
70
+ },
71
+ "tie_word_embeddings": false,
72
+ "transformers_version": "5.4.0",
73
+ "use_cache": true,
74
+ "vocab_size": 51200
75
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "do_sample": true,
5
+ "eos_token_id": 50256,
6
+ "transformers_version": "5.4.0"
7
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad5a14048f4bc42ebe924f4c13355d96ca51fdf7c680e0ff2792a4d746af1c42
3
+ size 1836707608
quant_log.csv ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ layer,module,loss,samples,damp,time
2
+ 0,self_attn.q_proj,0.0000858512,0.05000,1.097
3
+ 0,self_attn.k_proj,0.0001029116,0.05000,1.129
4
+ 0,self_attn.v_proj,0.0000302286,0.05000,0.869
5
+ 0,self_attn.dense,0.0000123729,0.05000,0.857
6
+ 0,mlp.fc1,0.0001053048,0.05000,0.953
7
+ 0,mlp.fc2,0.0000069301,0.05000,4.047
8
+ 1,self_attn.q_proj,0.0000778551,0.05000,0.913
9
+ 1,self_attn.k_proj,0.0000812629,0.05000,0.925
10
+ 1,self_attn.v_proj,0.0000198182,0.05000,0.865
11
+ 1,self_attn.dense,0.0000046095,0.05000,0.893
12
+ 1,mlp.fc1,0.0002500907,0.05000,0.915
13
+ 1,mlp.fc2,0.0000168595,0.05000,3.982
14
+ 2,self_attn.q_proj,0.0001165650,0.05000,0.969
15
+ 2,self_attn.k_proj,0.0001173934,0.05000,0.950
16
+ 2,self_attn.v_proj,0.0000463347,0.05000,0.882
17
+ 2,self_attn.dense,0.0000052613,0.05000,0.881
18
+ 2,mlp.fc1,0.0004108166,0.05000,1.135
19
+ 2,mlp.fc2,0.0000199922,0.05000,4.276
20
+ 3,self_attn.q_proj,0.0001364429,0.05000,0.991
21
+ 3,self_attn.k_proj,0.0001284486,0.05000,0.984
22
+ 3,self_attn.v_proj,0.0000970313,0.05000,0.888
23
+ 3,self_attn.dense,0.0000050054,0.05000,0.920
24
+ 3,mlp.fc1,0.0006537699,0.05000,0.965
25
+ 3,mlp.fc2,0.0000279560,0.05000,4.165
26
+ 4,self_attn.q_proj,0.0001806877,0.05000,1.029
27
+ 4,self_attn.k_proj,0.0001703575,0.05000,0.915
28
+ 4,self_attn.v_proj,0.0001177177,0.05000,0.927
29
+ 4,self_attn.dense,0.0000071838,0.05000,0.914
30
+ 4,mlp.fc1,0.0006948134,0.05000,0.997
31
+ 4,mlp.fc2,0.0000382566,0.05000,4.367
32
+ 5,self_attn.q_proj,0.0001677401,0.05000,1.003
33
+ 5,self_attn.k_proj,0.0001583099,0.05000,0.936
34
+ 5,self_attn.v_proj,0.0001066596,0.05000,0.892
35
+ 5,self_attn.dense,0.0000078510,0.05000,0.975
36
+ 5,mlp.fc1,0.0006093672,0.05000,1.055
37
+ 5,mlp.fc2,0.0000421267,0.05000,4.136
38
+ 6,self_attn.q_proj,0.0002005054,0.05000,1.016
39
+ 6,self_attn.k_proj,0.0001911783,0.05000,0.982
40
+ 6,self_attn.v_proj,0.0001320928,0.05000,0.967
41
+ 6,self_attn.dense,0.0000092071,0.05000,0.945
42
+ 6,mlp.fc1,0.0006585670,0.05000,0.935
43
+ 6,mlp.fc2,0.0000462386,0.05000,4.468
44
+ 7,self_attn.q_proj,0.0002163916,0.05000,1.070
45
+ 7,self_attn.k_proj,0.0002072118,0.05000,0.937
46
+ 7,self_attn.v_proj,0.0001352158,0.05000,0.978
47
+ 7,self_attn.dense,0.0000117110,0.05000,0.970
48
+ 7,mlp.fc1,0.0006694943,0.05000,0.943
49
+ 7,mlp.fc2,0.0000498323,0.05000,4.157
50
+ 8,self_attn.q_proj,0.0002109012,0.05000,0.949
51
+ 8,self_attn.k_proj,0.0002039318,0.05000,1.241
52
+ 8,self_attn.v_proj,0.0001560394,0.05000,0.996
53
+ 8,self_attn.dense,0.0000082801,0.05000,0.990
54
+ 8,mlp.fc1,0.0006576708,0.05000,1.030
55
+ 8,mlp.fc2,0.0000524794,0.05000,4.458
56
+ 9,self_attn.q_proj,0.0002133507,0.05000,1.027
57
+ 9,self_attn.k_proj,0.0002043295,0.05000,0.955
58
+ 9,self_attn.v_proj,0.0001553389,0.05000,0.979
59
+ 9,self_attn.dense,0.0000089070,0.05000,0.967
60
+ 9,mlp.fc1,0.0006463157,0.05000,1.011
61
+ 9,mlp.fc2,0.0000528918,0.05000,4.181
62
+ 10,self_attn.q_proj,0.0002181723,0.05000,0.950
63
+ 10,self_attn.k_proj,0.0002149413,0.05000,0.952
64
+ 10,self_attn.v_proj,0.0001367607,0.05000,0.928
65
+ 10,self_attn.dense,0.0000134973,0.05000,0.896
66
+ 10,mlp.fc1,0.0006295212,0.05000,0.923
67
+ 10,mlp.fc2,0.0000522401,0.05000,4.083
68
+ 11,self_attn.q_proj,0.0002261469,0.05000,0.958
69
+ 11,self_attn.k_proj,0.0002225379,0.05000,0.977
70
+ 11,self_attn.v_proj,0.0001492103,0.05000,0.945
71
+ 11,self_attn.dense,0.0000167664,0.05000,0.914
72
+ 11,mlp.fc1,0.0006092769,0.05000,0.930
73
+ 11,mlp.fc2,0.0000528684,0.05000,5.138
74
+ 12,self_attn.q_proj,0.0002321886,0.05000,0.974
75
+ 12,self_attn.k_proj,0.0002303848,0.05000,0.891
76
+ 12,self_attn.v_proj,0.0001530323,0.05000,0.908
77
+ 12,self_attn.dense,0.0000169518,0.05000,0.920
78
+ 12,mlp.fc1,0.0006060201,0.05000,0.973
79
+ 12,mlp.fc2,0.0000549961,0.05000,4.304
80
+ 13,self_attn.q_proj,0.0002252652,0.05000,0.972
81
+ 13,self_attn.k_proj,0.0002252355,0.05000,1.009
82
+ 13,self_attn.v_proj,0.0001491018,0.05000,0.952
83
+ 13,self_attn.dense,0.0000157061,0.05000,0.996
84
+ 13,mlp.fc1,0.0006169498,0.05000,1.020
85
+ 13,mlp.fc2,0.0000539558,0.05000,4.684
86
+ 14,self_attn.q_proj,0.0002650832,0.05000,1.065
87
+ 14,self_attn.k_proj,0.0002238111,0.05000,0.999
88
+ 14,self_attn.v_proj,0.0001467716,0.05000,0.949
89
+ 14,self_attn.dense,0.0000165920,0.05000,0.983
90
+ 14,mlp.fc1,0.0006014784,0.05000,0.989
91
+ 14,mlp.fc2,0.0000545418,0.05000,4.357
92
+ 15,self_attn.q_proj,0.0002222408,0.05000,0.952
93
+ 15,self_attn.k_proj,0.0002214731,0.05000,0.935
94
+ 15,self_attn.v_proj,0.0001403913,0.05000,0.971
95
+ 15,self_attn.dense,0.0000169734,0.05000,0.912
96
+ 15,mlp.fc1,0.0005749508,0.05000,0.944
97
+ 15,mlp.fc2,0.0000612357,0.05000,4.166
98
+ 16,self_attn.q_proj,0.0002301284,0.05000,1.015
99
+ 16,self_attn.k_proj,0.0002283591,0.05000,0.881
100
+ 16,self_attn.v_proj,0.0001480706,0.05000,0.896
101
+ 16,self_attn.dense,0.0000156473,0.05000,0.876
102
+ 16,mlp.fc1,0.0005911736,0.05000,0.936
103
+ 16,mlp.fc2,0.0000615094,0.05000,4.146
104
+ 17,self_attn.q_proj,0.0002169144,0.05000,0.949
105
+ 17,self_attn.k_proj,0.0002164072,0.05000,0.980
106
+ 17,self_attn.v_proj,0.0001373301,0.05000,0.942
107
+ 17,self_attn.dense,0.0000168942,0.05000,0.929
108
+ 17,mlp.fc1,0.0005453525,0.05000,0.907
109
+ 17,mlp.fc2,0.0000619073,0.05000,4.035
110
+ 18,self_attn.q_proj,0.0002519506,0.05000,0.997
111
+ 18,self_attn.k_proj,0.0002533842,0.05000,0.974
112
+ 18,self_attn.v_proj,0.0001425683,0.05000,0.953
113
+ 18,self_attn.dense,0.0000179922,0.05000,0.886
114
+ 18,mlp.fc1,0.0005474294,0.05000,0.933
115
+ 18,mlp.fc2,0.0000634956,0.05000,4.298
116
+ 19,self_attn.q_proj,0.0002300704,0.05000,1.044
117
+ 19,self_attn.k_proj,0.0002330887,0.05000,0.926
118
+ 19,self_attn.v_proj,0.0001444852,0.05000,0.995
119
+ 19,self_attn.dense,0.0000164025,0.05000,1.168
120
+ 19,mlp.fc1,0.0005811269,0.05000,1.058
121
+ 19,mlp.fc2,0.0000723687,0.05000,4.503
122
+ 20,self_attn.q_proj,0.0002827060,0.05000,1.040
123
+ 20,self_attn.k_proj,0.0002414499,0.05000,0.943
124
+ 20,self_attn.v_proj,0.0001363139,0.05000,0.986
125
+ 20,self_attn.dense,0.0000187169,0.05000,1.034
126
+ 20,mlp.fc1,0.0005760693,0.05000,1.011
127
+ 20,mlp.fc2,0.0000753935,0.05000,4.585
128
+ 21,self_attn.q_proj,0.0002291035,0.05000,1.085
129
+ 21,self_attn.k_proj,0.0002217587,0.05000,0.947
130
+ 21,self_attn.v_proj,0.0001625256,0.05000,0.968
131
+ 21,self_attn.dense,0.0000147600,0.05000,0.959
132
+ 21,mlp.fc1,0.0006096665,0.05000,1.005
133
+ 21,mlp.fc2,0.0000837082,0.05000,4.373
134
+ 22,self_attn.q_proj,0.0002641166,0.05000,1.019
135
+ 22,self_attn.k_proj,0.0002266009,0.05000,1.127
136
+ 22,self_attn.v_proj,0.0001701314,0.05000,0.935
137
+ 22,self_attn.dense,0.0000122861,0.05000,0.906
138
+ 22,mlp.fc1,0.0006319354,0.05000,0.929
139
+ 22,mlp.fc2,0.0000845768,0.05000,4.132
140
+ 23,self_attn.q_proj,0.0002803879,0.05000,0.939
141
+ 23,self_attn.k_proj,0.0002535842,0.05000,0.996
142
+ 23,self_attn.v_proj,0.0001640246,0.05000,0.914
143
+ 23,self_attn.dense,0.0000198861,0.05000,0.889
144
+ 23,mlp.fc1,0.0006627535,0.05000,0.919
145
+ 23,mlp.fc2,0.0000945106,0.05000,4.222
146
+ 24,self_attn.q_proj,0.0002384906,0.05000,0.962
147
+ 24,self_attn.k_proj,0.0002369107,0.05000,0.904
148
+ 24,self_attn.v_proj,0.0001989545,0.05000,0.933
149
+ 24,self_attn.dense,0.0000162029,0.05000,0.887
150
+ 24,mlp.fc1,0.0007166684,0.05000,0.989
151
+ 24,mlp.fc2,0.0001029097,0.05000,4.025
152
+ 25,self_attn.q_proj,0.0003113051,0.05000,1.047
153
+ 25,self_attn.k_proj,0.0002572589,0.05000,1.003
154
+ 25,self_attn.v_proj,0.0002068744,0.05000,0.917
155
+ 25,self_attn.dense,0.0000187007,0.05000,0.979
156
+ 25,mlp.fc1,0.0007801538,0.05000,0.941
157
+ 25,mlp.fc2,0.0001016458,0.05000,4.126
158
+ 26,self_attn.q_proj,0.0003590267,0.05000,0.980
159
+ 26,self_attn.k_proj,0.0002574304,0.05000,0.954
160
+ 26,self_attn.v_proj,0.0002369789,0.05000,0.952
161
+ 26,self_attn.dense,0.0000299120,0.05000,0.937
162
+ 26,mlp.fc1,0.0007834066,0.05000,0.999
163
+ 26,mlp.fc2,0.0001124531,0.05000,4.201
164
+ 27,self_attn.q_proj,0.0003025899,0.05000,0.952
165
+ 27,self_attn.k_proj,0.0003528239,0.05000,0.908
166
+ 27,self_attn.v_proj,0.0002532654,0.05000,0.892
167
+ 27,self_attn.dense,0.0000275395,0.05000,0.961
168
+ 27,mlp.fc1,0.0008518580,0.05000,0.971
169
+ 27,mlp.fc2,0.0001470365,0.05000,4.424
170
+ 28,self_attn.q_proj,0.0002637209,0.05000,1.063
171
+ 28,self_attn.k_proj,0.0002721585,0.05000,0.953
172
+ 28,self_attn.v_proj,0.0002665452,0.05000,0.952
173
+ 28,self_attn.dense,0.0000327637,0.05000,0.881
174
+ 28,mlp.fc1,0.0008877072,0.05000,0.929
175
+ 28,mlp.fc2,0.0001765151,0.05000,4.167
176
+ 29,self_attn.q_proj,0.0037034005,0.05000,0.975
177
+ 29,self_attn.k_proj,0.0030506286,0.05000,1.012
178
+ 29,self_attn.v_proj,0.0001793814,0.05000,0.972
179
+ 29,self_attn.dense,0.0000784153,0.05000,0.954
180
+ 29,mlp.fc1,0.0012136550,0.05000,1.026
181
+ 29,mlp.fc2,0.0001923476,0.05000,4.545
182
+ 30,self_attn.q_proj,0.0124896431,0.05000,1.096
183
+ 30,self_attn.k_proj,0.0038405387,0.05000,1.110
184
+ 30,self_attn.v_proj,0.0001383956,0.05000,1.107
185
+ 30,self_attn.dense,0.0000560131,0.05000,1.004
186
+ 30,mlp.fc1,0.0013959989,0.05000,1.031
187
+ 30,mlp.fc2,0.0001954762,0.05000,4.354
188
+ 31,self_attn.q_proj,0.0038123704,0.05000,0.988
189
+ 31,self_attn.k_proj,0.0006134240,0.05000,0.890
190
+ 31,self_attn.v_proj,0.0000717409,0.05000,0.939
191
+ 31,self_attn.dense,0.0000127410,0.05000,0.895
192
+ 31,mlp.fc1,0.0007779660,0.05000,0.936
193
+ 31,mlp.fc2,0.0001298984,0.05000,4.108
quantize_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "bits": 4,
3
+ "group_size": 128,
4
+ "desc_act": false,
5
+ "sym": true,
6
+ "quant_method": "gptq"
7
+ }
quantize_manifest.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "phi-2",
3
+ "hf_id": "microsoft/phi-2",
4
+ "method": "gptq",
5
+ "bits": 4,
6
+ "group_size": 128,
7
+ "calibration_dataset": "allenai/c4",
8
+ "calibration_samples": 128,
9
+ "seed": 42,
10
+ "elapsed_s": 690.9595537185669,
11
+ "tool": "gptqmodel"
12
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "backend": "tokenizers",
4
+ "bos_token": "<|endoftext|>",
5
+ "clean_up_tokenization_spaces": true,
6
+ "eos_token": "<|endoftext|>",
7
+ "is_local": false,
8
+ "model_max_length": 2048,
9
+ "pad_token": "<|endoftext|>",
10
+ "tokenizer_class": "CodeGenTokenizerFast",
11
+ "unk_token": "<|endoftext|>",
12
+ "_commit_hash": null
13
+ }