iproskurina commited on
Commit
2bf03ce
·
verified ·
1 Parent(s): 3674615

Add files using upload-large-folder tool

Browse files
config.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_remove_final_layer_norm": false,
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "relu",
5
+ "architectures": [
6
+ "OPTForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "bos_token_id": 2,
10
+ "do_layer_norm_before": true,
11
+ "dropout": 0.1,
12
+ "enable_bias": true,
13
+ "eos_token_id": 2,
14
+ "ffn_dim": 10240,
15
+ "hidden_size": 2560,
16
+ "init_std": 0.02,
17
+ "layer_norm_elementwise_affine": true,
18
+ "layerdrop": 0.0,
19
+ "max_position_embeddings": 2048,
20
+ "model_type": "opt",
21
+ "num_attention_heads": 32,
22
+ "num_hidden_layers": 32,
23
+ "pad_token_id": 1,
24
+ "prefix": "</s>",
25
+ "quantization_config": {
26
+ "bits": 4,
27
+ "checkpoint_format": "gptq",
28
+ "desc_act": false,
29
+ "group_size": 128,
30
+ "lm_head": false,
31
+ "meta": {
32
+ "damp_auto_increment": 0.0025,
33
+ "damp_percent": 0.01,
34
+ "mse": 0.0,
35
+ "quantizer": [
36
+ "gptqmodel:2.2.0"
37
+ ],
38
+ "static_groups": false,
39
+ "true_sequential": true,
40
+ "uri": "https://github.com/modelcloud/gptqmodel"
41
+ },
42
+ "pack_dtype": "int32",
43
+ "quant_method": "gptq",
44
+ "sym": true
45
+ },
46
+ "torch_dtype": "float16",
47
+ "transformers_version": "4.51.3",
48
+ "use_cache": true,
49
+ "vocab_size": 50272,
50
+ "word_embed_proj_dim": 2560
51
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 2,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 1,
6
+ "transformers_version": "4.51.3"
7
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a4cf734c3d0c3930c6983fe5d5ba128cbaa365aa50207c887b008fbdb66f950
3
+ size 1580548656
quant_log.csv ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ layer,module,loss,samples,damp,time
2
+ 0,self_attn.k_proj,0.00394990,0.01000,2.077
3
+ 0,self_attn.v_proj,0.00102141,0.01000,1.203
4
+ 0,self_attn.q_proj,0.00392392,0.01000,1.159
5
+ 0,self_attn.out_proj,0.00003452,0.01000,1.160
6
+ 0,fc1,0.01215448,0.01000,1.218
7
+ 0,fc2,0.00012052,0.01000,5.100
8
+ 1,self_attn.k_proj,0.01467542,0.01000,1.203
9
+ 1,self_attn.v_proj,0.00038407,0.01000,1.189
10
+ 1,self_attn.q_proj,0.00410239,0.01000,1.197
11
+ 1,self_attn.out_proj,0.00000140,0.01000,1.198
12
+ 1,fc1,0.00917357,0.01000,1.225
13
+ 1,fc2,0.00009661,0.01000,5.052
14
+ 2,self_attn.k_proj,0.01598312,0.01000,1.182
15
+ 2,self_attn.v_proj,0.00050603,0.01000,1.175
16
+ 2,self_attn.q_proj,0.00603839,0.01000,1.187
17
+ 2,self_attn.out_proj,0.00000089,0.01000,1.180
18
+ 2,fc1,0.01015421,0.01000,1.217
19
+ 2,fc2,0.00008922,0.01000,5.029
20
+ 3,self_attn.k_proj,0.00669147,0.01000,1.183
21
+ 3,self_attn.v_proj,0.00061962,0.01000,1.194
22
+ 3,self_attn.q_proj,0.00420396,0.01000,1.198
23
+ 3,self_attn.out_proj,0.00000129,0.01000,1.197
24
+ 3,fc1,0.00989648,0.01000,1.234
25
+ 3,fc2,0.00008503,0.01000,5.125
26
+ 4,self_attn.k_proj,0.00954085,0.01000,1.195
27
+ 4,self_attn.v_proj,0.00085336,0.01000,1.204
28
+ 4,self_attn.q_proj,0.00553959,0.01000,1.213
29
+ 4,self_attn.out_proj,0.00000257,0.01000,1.190
30
+ 4,fc1,0.00941784,0.01000,1.221
31
+ 4,fc2,0.00009369,0.01000,5.107
32
+ 5,self_attn.k_proj,0.00864742,0.01000,1.199
33
+ 5,self_attn.v_proj,0.00100623,0.01000,1.207
34
+ 5,self_attn.q_proj,0.00588670,0.01000,1.202
35
+ 5,self_attn.out_proj,0.00000694,0.01000,1.215
36
+ 5,fc1,0.01044408,0.01000,1.223
37
+ 5,fc2,0.00009701,0.01000,5.078
38
+ 6,self_attn.k_proj,0.00639420,0.01000,1.211
39
+ 6,self_attn.v_proj,0.00123202,0.01000,1.195
40
+ 6,self_attn.q_proj,0.00649996,0.01000,1.227
41
+ 6,self_attn.out_proj,0.00000823,0.01000,1.200
42
+ 6,fc1,0.01182344,0.01000,1.225
43
+ 6,fc2,0.00010991,0.01000,5.068
44
+ 7,self_attn.k_proj,0.00690140,0.01000,1.206
45
+ 7,self_attn.v_proj,0.00144184,0.01000,1.200
46
+ 7,self_attn.q_proj,0.00772449,0.01000,1.188
47
+ 7,self_attn.out_proj,0.00001096,0.01000,1.193
48
+ 7,fc1,0.01315037,0.01000,1.222
49
+ 7,fc2,0.00011415,0.01000,5.176
50
+ 8,self_attn.k_proj,0.01227832,0.01000,1.198
51
+ 8,self_attn.v_proj,0.00185993,0.01000,1.209
52
+ 8,self_attn.q_proj,0.00888034,0.01000,1.240
53
+ 8,self_attn.out_proj,0.00002013,0.01000,1.185
54
+ 8,fc1,0.01511053,0.01000,1.225
55
+ 8,fc2,0.00013472,0.01000,5.054
56
+ 9,self_attn.k_proj,0.01038534,0.01000,1.200
57
+ 9,self_attn.v_proj,0.00222251,0.01000,1.213
58
+ 9,self_attn.q_proj,0.00935974,0.01000,1.205
59
+ 9,self_attn.out_proj,0.00002113,0.01000,1.199
60
+ 9,fc1,0.01749250,0.01000,1.217
61
+ 9,fc2,0.00016016,0.01000,5.113
62
+ 10,self_attn.k_proj,0.01146444,0.01000,1.200
63
+ 10,self_attn.v_proj,0.00256414,0.01000,1.189
64
+ 10,self_attn.q_proj,0.01083466,0.01000,1.207
65
+ 10,self_attn.out_proj,0.00003377,0.01000,1.187
66
+ 10,fc1,0.02200724,0.01000,1.246
67
+ 10,fc2,0.00023786,0.01000,5.080
68
+ 11,self_attn.k_proj,0.01244001,0.01000,1.199
69
+ 11,self_attn.v_proj,0.00304534,0.01000,1.229
70
+ 11,self_attn.q_proj,0.01194046,0.01000,1.203
71
+ 11,self_attn.out_proj,0.00003740,0.01000,1.182
72
+ 11,fc1,0.02627137,0.01000,1.219
73
+ 11,fc2,0.00024376,0.01000,5.084
74
+ 12,self_attn.k_proj,0.01390320,0.01000,1.195
75
+ 12,self_attn.v_proj,0.00363379,0.01000,1.192
76
+ 12,self_attn.q_proj,0.01290092,0.01000,1.201
77
+ 12,self_attn.out_proj,0.00004900,0.01000,1.181
78
+ 12,fc1,0.03056903,0.01000,1.218
79
+ 12,fc2,0.00030255,0.01000,5.100
80
+ 13,self_attn.k_proj,0.01644090,0.01000,1.204
81
+ 13,self_attn.v_proj,0.00411961,0.01000,1.190
82
+ 13,self_attn.q_proj,0.01511327,0.01000,1.203
83
+ 13,self_attn.out_proj,0.00009101,0.01000,1.200
84
+ 13,fc1,0.03606411,0.01000,1.212
85
+ 13,fc2,0.00038431,0.01000,5.066
86
+ 14,self_attn.k_proj,0.01776866,0.01000,1.207
87
+ 14,self_attn.v_proj,0.00495395,0.01000,1.211
88
+ 14,self_attn.q_proj,0.01613377,0.01000,1.210
89
+ 14,self_attn.out_proj,0.00009793,0.01000,1.188
90
+ 14,fc1,0.04128723,0.01000,1.205
91
+ 14,fc2,0.00050071,0.01000,5.064
92
+ 15,self_attn.k_proj,0.02192250,0.01000,1.196
93
+ 15,self_attn.v_proj,0.00563364,0.01000,1.204
94
+ 15,self_attn.q_proj,0.01936727,0.01000,1.242
95
+ 15,self_attn.out_proj,0.00018638,0.01000,1.184
96
+ 15,fc1,0.04553729,0.01000,1.217
97
+ 15,fc2,0.00064369,0.01000,5.201
98
+ 16,self_attn.k_proj,0.02319060,0.01000,1.194
99
+ 16,self_attn.v_proj,0.00663496,0.01000,1.188
100
+ 16,self_attn.q_proj,0.02020913,0.01000,1.201
101
+ 16,self_attn.out_proj,0.00021892,0.01000,1.233
102
+ 16,fc1,0.05039033,0.01000,1.239
103
+ 16,fc2,0.00109865,0.01000,5.104
104
+ 17,self_attn.k_proj,0.02345077,0.01000,1.192
105
+ 17,self_attn.v_proj,0.00807686,0.01000,1.190
106
+ 17,self_attn.q_proj,0.02011927,0.01000,1.200
107
+ 17,self_attn.out_proj,0.00025877,0.01000,1.199
108
+ 17,fc1,0.05574853,0.01000,1.232
109
+ 17,fc2,0.00141325,0.01000,5.056
110
+ 18,self_attn.k_proj,0.02450161,0.01000,1.201
111
+ 18,self_attn.v_proj,0.00966819,0.01000,1.188
112
+ 18,self_attn.q_proj,0.02183347,0.01000,1.193
113
+ 18,self_attn.out_proj,0.00027320,0.01000,1.202
114
+ 18,fc1,0.06218845,0.01000,1.222
115
+ 18,fc2,0.00187777,0.01000,5.135
116
+ 19,self_attn.k_proj,0.02378891,0.01000,1.200
117
+ 19,self_attn.v_proj,0.01167217,0.01000,1.199
118
+ 19,self_attn.q_proj,0.01957743,0.01000,1.225
119
+ 19,self_attn.out_proj,0.00038118,0.01000,1.218
120
+ 19,fc1,0.07038191,0.01000,1.241
121
+ 19,fc2,0.00238876,0.01000,5.082
122
+ 20,self_attn.k_proj,0.02468025,0.01000,1.197
123
+ 20,self_attn.v_proj,0.01396945,0.01000,1.191
124
+ 20,self_attn.q_proj,0.02122129,0.01000,1.197
125
+ 20,self_attn.out_proj,0.00036665,0.01000,1.191
126
+ 20,fc1,0.07806558,0.01000,1.215
127
+ 20,fc2,0.00294969,0.01000,5.041
128
+ 21,self_attn.k_proj,0.02351781,0.01000,1.208
129
+ 21,self_attn.v_proj,0.01587440,0.01000,1.186
130
+ 21,self_attn.q_proj,0.02154187,0.01000,1.197
131
+ 21,self_attn.out_proj,0.00039672,0.01000,1.187
132
+ 21,fc1,0.08287902,0.01000,1.235
133
+ 21,fc2,0.00356286,0.01000,5.060
134
+ 22,self_attn.k_proj,0.02510822,0.01000,1.204
135
+ 22,self_attn.v_proj,0.01696502,0.01000,1.221
136
+ 22,self_attn.q_proj,0.02051033,0.01000,1.224
137
+ 22,self_attn.out_proj,0.00058326,0.01000,1.174
138
+ 22,fc1,0.09057487,0.01000,1.234
139
+ 22,fc2,0.00434268,0.01000,5.093
140
+ 23,self_attn.k_proj,0.02305048,0.01000,1.205
141
+ 23,self_attn.v_proj,0.01852468,0.01000,1.179
142
+ 23,self_attn.q_proj,0.02109080,0.01000,1.227
143
+ 23,self_attn.out_proj,0.00045792,0.01000,1.214
144
+ 23,fc1,0.09620518,0.01000,1.216
145
+ 23,fc2,0.00486809,0.01000,5.072
146
+ 24,self_attn.k_proj,0.02279006,0.01000,1.187
147
+ 24,self_attn.v_proj,0.02133971,0.01000,1.194
148
+ 24,self_attn.q_proj,0.02080431,0.01000,1.192
149
+ 24,self_attn.out_proj,0.00079423,0.01000,1.199
150
+ 24,fc1,0.10291621,0.01000,1.218
151
+ 24,fc2,0.00596800,0.01000,5.054
152
+ 25,self_attn.k_proj,0.02235756,0.01000,1.220
153
+ 25,self_attn.v_proj,0.02586158,0.01000,1.202
154
+ 25,self_attn.q_proj,0.02162791,0.01000,1.198
155
+ 25,self_attn.out_proj,0.00092192,0.01000,1.191
156
+ 25,fc1,0.10848137,0.01000,1.224
157
+ 25,fc2,0.00664149,0.01000,5.109
158
+ 26,self_attn.k_proj,0.02296433,0.01000,1.195
159
+ 26,self_attn.v_proj,0.02654539,0.01000,1.187
160
+ 26,self_attn.q_proj,0.02132273,0.01000,1.203
161
+ 26,self_attn.out_proj,0.00105593,0.01000,1.192
162
+ 26,fc1,0.11584188,0.01000,1.205
163
+ 26,fc2,0.00829924,0.01000,5.138
164
+ 27,self_attn.k_proj,0.02253344,0.01000,1.211
165
+ 27,self_attn.v_proj,0.03002428,0.01000,1.236
166
+ 27,self_attn.q_proj,0.02269228,0.01000,1.201
167
+ 27,self_attn.out_proj,0.00074054,0.01000,1.208
168
+ 27,fc1,0.11827446,0.01000,1.216
169
+ 27,fc2,0.00945978,0.01000,5.088
170
+ 28,self_attn.k_proj,0.02344959,0.01000,1.201
171
+ 28,self_attn.v_proj,0.03120596,0.01000,1.199
172
+ 28,self_attn.q_proj,0.02303211,0.01000,1.202
173
+ 28,self_attn.out_proj,0.00140934,0.01000,1.246
174
+ 28,fc1,0.12195188,0.01000,1.220
175
+ 28,fc2,0.01116386,0.01000,5.036
176
+ 29,self_attn.k_proj,0.02414516,0.01000,1.184
177
+ 29,self_attn.v_proj,0.03367308,0.01000,1.196
178
+ 29,self_attn.q_proj,0.02733649,0.01000,1.210
179
+ 29,self_attn.out_proj,0.00086731,0.01000,1.195
180
+ 29,fc1,0.11979358,0.01000,1.209
181
+ 29,fc2,0.01215415,0.01000,5.011
182
+ 30,self_attn.k_proj,0.02487144,0.01000,1.172
183
+ 30,self_attn.v_proj,0.03512698,0.01000,1.186
184
+ 30,self_attn.q_proj,0.02941238,0.01000,1.186
185
+ 30,self_attn.out_proj,0.00075273,0.01000,1.231
186
+ 30,fc1,0.11724092,0.01000,1.202
187
+ 30,fc2,0.01190732,0.01000,5.066
188
+ 31,self_attn.k_proj,0.02410964,0.01000,1.177
189
+ 31,self_attn.v_proj,0.03215169,0.01000,1.197
190
+ 31,self_attn.q_proj,0.03200582,0.01000,1.194
191
+ 31,self_attn.out_proj,0.00138725,0.01000,1.186
192
+ 31,fc1,0.10804088,0.01000,1.251
193
+ 31,fc2,0.00846135,0.01000,5.037
quantize_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bits": 4,
3
+ "group_size": 128,
4
+ "desc_act": false,
5
+ "sym": true,
6
+ "lm_head": false,
7
+ "quant_method": "gptq",
8
+ "checkpoint_format": "gptq",
9
+ "pack_dtype": "int32",
10
+ "meta": {
11
+ "quantizer": [
12
+ "gptqmodel:2.2.0"
13
+ ],
14
+ "uri": "https://github.com/modelcloud/gptqmodel",
15
+ "damp_percent": 0.01,
16
+ "damp_auto_increment": 0.0025,
17
+ "static_groups": false,
18
+ "true_sequential": true,
19
+ "mse": 0.0
20
+ }
21
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "</s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<pad>",
17
+ "unk_token": {
18
+ "content": "</s>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "1": {
6
+ "content": "<pad>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "2": {
14
+ "content": "</s>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ }
21
+ },
22
+ "bos_token": "</s>",
23
+ "clean_up_tokenization_spaces": false,
24
+ "eos_token": "</s>",
25
+ "errors": "replace",
26
+ "extra_special_tokens": {},
27
+ "model_max_length": 1000000000000000019884624838656,
28
+ "pad_token": "<pad>",
29
+ "tokenizer_class": "GPT2TokenizerFast",
30
+ "unk_token": "</s>",
31
+ "_commit_hash": null
32
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff