viveksil commited on
Commit
00b2f1e
·
verified ·
1 Parent(s): b39bc22

Add files using upload-large-folder tool

Browse files
config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "eos_token_id": 2,
9
+ "head_dim": 64,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 576,
12
+ "initializer_range": 0.041666666666666664,
13
+ "intermediate_size": 1536,
14
+ "is_llama_config": true,
15
+ "max_position_embeddings": 8192,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "num_attention_heads": 9,
19
+ "num_hidden_layers": 30,
20
+ "num_key_value_heads": 3,
21
+ "pad_token_id": 2,
22
+ "pretraining_tp": 1,
23
+ "quantization_config": {
24
+ "bits": 8,
25
+ "checkpoint_format": "gptq",
26
+ "desc_act": true,
27
+ "group_size": 128,
28
+ "lm_head": false,
29
+ "meta": {
30
+ "damp_auto_increment": 0.0025,
31
+ "damp_percent": 0.01,
32
+ "mse": 0.0,
33
+ "quantizer": [
34
+ "gptqmodel:2.1.1-dev"
35
+ ],
36
+ "static_groups": false,
37
+ "true_sequential": true,
38
+ "uri": "https://github.com/modelcloud/gptqmodel"
39
+ },
40
+ "pack_dtype": "int32",
41
+ "quant_method": "gptq",
42
+ "sym": true
43
+ },
44
+ "rms_norm_eps": 1e-05,
45
+ "rope_interleaved": false,
46
+ "rope_scaling": null,
47
+ "rope_theta": 100000,
48
+ "tie_word_embeddings": true,
49
+ "torch_dtype": "bfloat16",
50
+ "transformers.js_config": {
51
+ "kv_cache_dtype": {
52
+ "fp16": "float16",
53
+ "q4f16": "float16"
54
+ }
55
+ },
56
+ "transformers_version": "4.50.1",
57
+ "use_cache": true,
58
+ "vocab_size": 49152
59
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 2,
6
+ "transformers_version": "4.50.1"
7
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dccb798fbedd0213d8661ad75fd8693cbe6acb2a04711a1c99a4a2934dd0b07
3
+ size 166253936
quant_log.csv ADDED
@@ -0,0 +1,211 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ layer,module,loss,samples,damp,time
2
+ 0,self_attn.k_proj,0.00022096,0.01000,1.099
3
+ 0,self_attn.v_proj,0.00000229,0.01000,2.435
4
+ 0,self_attn.q_proj,0.00036210,0.01000,2.448
5
+ 0,self_attn.o_proj,0.00000175,0.01000,2.415
6
+ 0,mlp.up_proj,0.00150737,0.01000,1.416
7
+ 0,mlp.gate_proj,0.00142441,0.01000,0.274
8
+ 0,mlp.down_proj,0.00595958,0.01000,0.739
9
+ 1,self_attn.k_proj,0.00078274,0.01000,0.286
10
+ 1,self_attn.v_proj,0.00015115,0.01000,0.272
11
+ 1,self_attn.q_proj,0.00174118,0.01000,0.162
12
+ 1,self_attn.o_proj,0.00103411,0.01000,0.769
13
+ 1,mlp.up_proj,0.00179594,0.01000,1.660
14
+ 1,mlp.gate_proj,0.00228291,0.01000,0.973
15
+ 1,mlp.down_proj,0.00224895,0.01000,0.746
16
+ 2,self_attn.k_proj,0.00125878,0.01000,0.437
17
+ 2,self_attn.v_proj,0.00020735,0.01000,0.269
18
+ 2,self_attn.q_proj,0.00295782,0.01000,0.638
19
+ 2,self_attn.o_proj,0.00098028,0.01000,0.810
20
+ 2,mlp.up_proj,0.00240148,0.01000,0.318
21
+ 2,mlp.gate_proj,0.00286920,0.01000,0.260
22
+ 2,mlp.down_proj,0.00500130,0.01000,6.505
23
+ 3,self_attn.k_proj,0.00188067,0.01000,1.197
24
+ 3,self_attn.v_proj,0.00074529,0.01000,0.289
25
+ 3,self_attn.q_proj,0.00454102,0.01000,0.292
26
+ 3,self_attn.o_proj,0.00114469,0.01000,0.283
27
+ 3,mlp.up_proj,0.00272536,0.01000,0.309
28
+ 3,mlp.gate_proj,0.00309045,0.01000,0.279
29
+ 3,mlp.down_proj,0.00337280,0.01000,0.736
30
+ 4,self_attn.k_proj,0.00178011,0.01000,0.471
31
+ 4,self_attn.v_proj,0.00069859,0.01000,0.149
32
+ 4,self_attn.q_proj,0.00427734,0.01000,0.145
33
+ 4,self_attn.o_proj,0.00194755,0.01000,1.499
34
+ 4,mlp.up_proj,0.00357304,0.01000,0.308
35
+ 4,mlp.gate_proj,0.00408032,0.01000,0.289
36
+ 4,mlp.down_proj,0.00474062,0.01000,0.744
37
+ 5,self_attn.k_proj,0.00194141,0.01000,0.269
38
+ 5,self_attn.v_proj,0.00072606,0.01000,0.919
39
+ 5,self_attn.q_proj,0.00453751,0.01000,1.476
40
+ 5,self_attn.o_proj,0.00234540,0.01000,0.281
41
+ 5,mlp.up_proj,0.00409303,0.01000,0.266
42
+ 5,mlp.gate_proj,0.00449629,0.01000,0.596
43
+ 5,mlp.down_proj,0.00556781,0.01000,6.516
44
+ 6,self_attn.k_proj,0.00163071,0.01000,0.275
45
+ 6,self_attn.v_proj,0.00068974,0.01000,0.287
46
+ 6,self_attn.q_proj,0.00406556,0.01000,0.276
47
+ 6,self_attn.o_proj,0.00182722,0.01000,0.284
48
+ 6,mlp.up_proj,0.00438432,0.01000,0.273
49
+ 6,mlp.gate_proj,0.00448065,0.01000,0.271
50
+ 6,mlp.down_proj,0.00687783,0.01000,0.819
51
+ 7,self_attn.k_proj,0.00229149,0.01000,0.366
52
+ 7,self_attn.v_proj,0.00139370,0.01000,0.148
53
+ 7,self_attn.q_proj,0.00637256,0.01000,0.142
54
+ 7,self_attn.o_proj,0.00216803,0.01000,1.421
55
+ 7,mlp.up_proj,0.00479016,0.01000,0.280
56
+ 7,mlp.gate_proj,0.00473948,0.01000,0.287
57
+ 7,mlp.down_proj,0.00823352,0.01000,0.831
58
+ 8,self_attn.k_proj,0.00214538,0.01000,1.666
59
+ 8,self_attn.v_proj,0.00103131,0.01000,1.952
60
+ 8,self_attn.q_proj,0.00540090,0.01000,1.636
61
+ 8,self_attn.o_proj,0.00283757,0.01000,0.303
62
+ 8,mlp.up_proj,0.00546549,0.01000,0.274
63
+ 8,mlp.gate_proj,0.00495393,0.01000,0.275
64
+ 8,mlp.down_proj,0.01154038,0.01000,6.464
65
+ 9,self_attn.k_proj,0.00147208,0.01000,1.818
66
+ 9,self_attn.v_proj,0.00058141,0.01000,1.731
67
+ 9,self_attn.q_proj,0.00394661,0.01000,1.675
68
+ 9,self_attn.o_proj,0.00251221,0.01000,0.324
69
+ 9,mlp.up_proj,0.00595842,0.01000,0.347
70
+ 9,mlp.gate_proj,0.00489875,0.01000,0.343
71
+ 9,mlp.down_proj,0.01428184,0.01000,0.904
72
+ 10,self_attn.k_proj,0.00132155,0.01000,2.685
73
+ 10,self_attn.v_proj,0.00068617,0.01000,2.503
74
+ 10,self_attn.q_proj,0.00363028,0.01000,1.752
75
+ 10,self_attn.o_proj,0.00545926,0.01000,0.408
76
+ 10,mlp.up_proj,0.00611763,0.01000,1.632
77
+ 10,mlp.gate_proj,0.00474317,0.01000,1.328
78
+ 10,mlp.down_proj,0.01347071,0.01000,2.937
79
+ 11,self_attn.k_proj,0.00115821,0.01000,3.339
80
+ 11,self_attn.v_proj,0.00065106,0.01000,2.422
81
+ 11,self_attn.q_proj,0.00344706,0.01000,2.382
82
+ 11,self_attn.o_proj,0.00740811,0.01000,2.400
83
+ 11,mlp.up_proj,0.00620620,0.01000,2.718
84
+ 11,mlp.gate_proj,0.00449832,0.01000,2.710
85
+ 11,mlp.down_proj,0.27257532,0.01000,5.754
86
+ 12,self_attn.k_proj,0.00163961,0.01000,0.344
87
+ 12,self_attn.v_proj,0.00094356,0.01000,0.337
88
+ 12,self_attn.q_proj,0.00456318,0.01000,0.338
89
+ 12,self_attn.o_proj,0.00419531,0.01000,1.311
90
+ 12,mlp.up_proj,0.00565002,0.01000,0.279
91
+ 12,mlp.gate_proj,0.00445975,0.01000,1.091
92
+ 12,mlp.down_proj,0.01192240,0.01000,4.327
93
+ 13,self_attn.k_proj,0.00174242,0.01000,0.339
94
+ 13,self_attn.v_proj,0.00083272,0.01000,0.344
95
+ 13,self_attn.q_proj,0.00502530,0.01000,0.395
96
+ 13,self_attn.o_proj,0.00500150,0.01000,0.343
97
+ 13,mlp.up_proj,0.00570113,0.01000,1.926
98
+ 13,mlp.gate_proj,0.00524486,0.01000,1.750
99
+ 13,mlp.down_proj,0.01178209,0.01000,2.821
100
+ 14,self_attn.k_proj,0.00200629,0.01000,0.361
101
+ 14,self_attn.v_proj,0.00157519,0.01000,0.359
102
+ 14,self_attn.q_proj,0.00566488,0.01000,0.366
103
+ 14,self_attn.o_proj,0.00620107,0.01000,2.248
104
+ 14,mlp.up_proj,0.00577955,0.01000,4.655
105
+ 14,mlp.gate_proj,0.00547774,0.01000,5.042
106
+ 14,mlp.down_proj,0.01469438,0.01000,8.832
107
+ 15,self_attn.k_proj,0.00194205,0.01000,0.349
108
+ 15,self_attn.v_proj,0.00151308,0.01000,0.345
109
+ 15,self_attn.q_proj,0.00563300,0.01000,0.342
110
+ 15,self_attn.o_proj,0.00601548,0.01000,0.344
111
+ 15,mlp.up_proj,0.00601284,0.01000,0.342
112
+ 15,mlp.gate_proj,0.00594717,0.01000,0.336
113
+ 15,mlp.down_proj,0.01753610,0.01000,5.689
114
+ 16,self_attn.k_proj,0.00155090,0.01000,1.763
115
+ 16,self_attn.v_proj,0.00110804,0.01000,0.365
116
+ 16,self_attn.q_proj,0.00422240,0.01000,0.357
117
+ 16,self_attn.o_proj,0.00903260,0.01000,0.345
118
+ 16,mlp.up_proj,0.00553630,0.01000,1.181
119
+ 16,mlp.gate_proj,0.00532908,0.01000,1.499
120
+ 16,mlp.down_proj,0.01532181,0.01000,5.315
121
+ 17,self_attn.k_proj,0.00241632,0.01000,0.830
122
+ 17,self_attn.v_proj,0.00194845,0.01000,2.641
123
+ 17,self_attn.q_proj,0.00834111,0.01000,4.413
124
+ 17,self_attn.o_proj,0.01061929,0.01000,2.746
125
+ 17,mlp.up_proj,0.00562216,0.01000,2.742
126
+ 17,mlp.gate_proj,0.00506327,0.01000,1.101
127
+ 17,mlp.down_proj,0.01818097,0.01000,4.789
128
+ 18,self_attn.k_proj,0.00273163,0.01000,2.613
129
+ 18,self_attn.v_proj,0.00311299,0.01000,1.891
130
+ 18,self_attn.q_proj,0.00828936,0.01000,0.344
131
+ 18,self_attn.o_proj,0.00938506,0.01000,0.345
132
+ 18,mlp.up_proj,0.00625932,0.01000,0.344
133
+ 18,mlp.gate_proj,0.00583430,0.01000,0.339
134
+ 18,mlp.down_proj,0.03318551,0.01000,2.234
135
+ 19,self_attn.k_proj,0.00133622,0.01000,0.326
136
+ 19,self_attn.v_proj,0.00225405,0.01000,0.800
137
+ 19,self_attn.q_proj,0.00550781,0.01000,1.800
138
+ 19,self_attn.o_proj,0.01567704,0.01000,0.449
139
+ 19,mlp.up_proj,0.00725614,0.01000,2.903
140
+ 19,mlp.gate_proj,0.00687203,0.01000,2.471
141
+ 19,mlp.down_proj,0.03395692,0.01000,2.946
142
+ 20,self_attn.k_proj,0.00182818,0.01000,0.388
143
+ 20,self_attn.v_proj,0.00369986,0.01000,1.045
144
+ 20,self_attn.q_proj,0.00668970,0.01000,3.772
145
+ 20,self_attn.o_proj,0.01447436,0.01000,3.580
146
+ 20,mlp.up_proj,0.00868652,0.01000,2.710
147
+ 20,mlp.gate_proj,0.00763109,0.01000,1.037
148
+ 20,mlp.down_proj,0.06614500,0.01000,3.153
149
+ 21,self_attn.k_proj,0.00169165,0.01000,0.353
150
+ 21,self_attn.v_proj,0.00278996,0.01000,1.556
151
+ 21,self_attn.q_proj,0.00603606,0.01000,2.608
152
+ 21,self_attn.o_proj,0.01160419,0.01000,2.633
153
+ 21,mlp.up_proj,0.00941546,0.01000,2.761
154
+ 21,mlp.gate_proj,0.00857336,0.01000,1.637
155
+ 21,mlp.down_proj,0.05236044,0.01000,3.023
156
+ 22,self_attn.k_proj,0.00173458,0.01000,0.340
157
+ 22,self_attn.v_proj,0.00304587,0.01000,0.335
158
+ 22,self_attn.q_proj,0.00609978,0.01000,1.500
159
+ 22,self_attn.o_proj,0.01699413,0.01000,0.329
160
+ 22,mlp.up_proj,0.01043702,0.01000,3.157
161
+ 22,mlp.gate_proj,0.00901166,0.01000,2.403
162
+ 22,mlp.down_proj,0.08635419,0.01000,2.277
163
+ 23,self_attn.k_proj,0.00235798,0.01000,2.724
164
+ 23,self_attn.v_proj,0.00853371,0.01000,2.700
165
+ 23,self_attn.q_proj,0.00731025,0.01000,2.679
166
+ 23,self_attn.o_proj,0.04595792,0.01000,0.347
167
+ 23,mlp.up_proj,0.01267573,0.01000,1.922
168
+ 23,mlp.gate_proj,0.00977481,0.01000,1.572
169
+ 23,mlp.down_proj,0.09878369,0.01000,1.992
170
+ 24,self_attn.k_proj,0.00193833,0.01000,0.352
171
+ 24,self_attn.v_proj,0.00926276,0.01000,0.343
172
+ 24,self_attn.q_proj,0.00732911,0.01000,0.354
173
+ 24,self_attn.o_proj,0.02490180,0.01000,0.294
174
+ 24,mlp.up_proj,0.01275103,0.01000,3.643
175
+ 24,mlp.gate_proj,0.00968059,0.01000,4.339
176
+ 24,mlp.down_proj,0.13135986,0.01000,6.748
177
+ 25,self_attn.k_proj,0.00188807,0.01000,1.592
178
+ 25,self_attn.v_proj,0.00410724,0.01000,1.087
179
+ 25,self_attn.q_proj,0.00648702,0.01000,0.338
180
+ 25,self_attn.o_proj,0.01829978,0.01000,1.828
181
+ 25,mlp.up_proj,0.01323432,0.01000,0.353
182
+ 25,mlp.gate_proj,0.01043347,0.01000,0.343
183
+ 25,mlp.down_proj,0.10982227,0.01000,6.597
184
+ 26,self_attn.k_proj,0.00186195,0.01000,3.885
185
+ 26,self_attn.v_proj,0.00593649,0.01000,2.686
186
+ 26,self_attn.q_proj,0.00663982,0.01000,0.818
187
+ 26,self_attn.o_proj,0.02385299,0.01000,0.346
188
+ 26,mlp.up_proj,0.01519259,0.01000,0.357
189
+ 26,mlp.gate_proj,0.01179895,0.01000,0.383
190
+ 26,mlp.down_proj,0.16240232,0.01000,2.805
191
+ 27,self_attn.k_proj,0.00188216,0.01000,0.753
192
+ 27,self_attn.v_proj,0.00480582,0.01000,2.568
193
+ 27,self_attn.q_proj,0.00687713,0.01000,3.243
194
+ 27,self_attn.o_proj,0.03130198,0.01000,1.944
195
+ 27,mlp.up_proj,0.01527163,0.01000,0.361
196
+ 27,mlp.gate_proj,0.01144433,0.01000,0.350
197
+ 27,mlp.down_proj,0.20937583,0.01000,6.981
198
+ 28,self_attn.k_proj,0.00163398,0.01000,2.274
199
+ 28,self_attn.v_proj,0.00619308,0.01000,0.347
200
+ 28,self_attn.q_proj,0.00561940,0.01000,1.200
201
+ 28,self_attn.o_proj,0.02704479,0.01000,0.391
202
+ 28,mlp.up_proj,0.01647252,0.01000,0.362
203
+ 28,mlp.gate_proj,0.01167521,0.01000,0.348
204
+ 28,mlp.down_proj,3.19434714,0.01000,7.166
205
+ 29,self_attn.k_proj,0.00153822,0.01000,1.574
206
+ 29,self_attn.v_proj,0.00717453,0.01000,0.978
207
+ 29,self_attn.q_proj,0.00649254,0.01000,1.795
208
+ 29,self_attn.o_proj,0.06275121,0.01000,0.949
209
+ 29,mlp.up_proj,0.02440672,0.01000,2.122
210
+ 29,mlp.gate_proj,0.01807862,0.01000,0.632
211
+ 29,mlp.down_proj,0.86159390,0.01000,0.909
quantize_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bits": 8,
3
+ "group_size": 128,
4
+ "desc_act": true,
5
+ "sym": true,
6
+ "lm_head": false,
7
+ "quant_method": "gptq",
8
+ "checkpoint_format": "gptq",
9
+ "pack_dtype": "int32",
10
+ "meta": {
11
+ "quantizer": [
12
+ "gptqmodel:2.1.1-dev"
13
+ ],
14
+ "uri": "https://github.com/modelcloud/gptqmodel",
15
+ "damp_percent": 0.01,
16
+ "damp_auto_increment": 0.0025,
17
+ "static_groups": false,
18
+ "true_sequential": true,
19
+ "mse": 0.0
20
+ }
21
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>"
5
+ ],
6
+ "bos_token": {
7
+ "content": "<|im_start|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "eos_token": {
14
+ "content": "<|im_end|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "pad_token": "<|im_end|>",
21
+ "unk_token": {
22
+ "content": "<|endoftext|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false
27
+ }
28
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<repo_name>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "4": {
37
+ "content": "<reponame>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "5": {
45
+ "content": "<file_sep>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "6": {
53
+ "content": "<filename>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "7": {
61
+ "content": "<gh_stars>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "8": {
69
+ "content": "<issue_start>",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "9": {
77
+ "content": "<issue_comment>",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "10": {
85
+ "content": "<issue_closed>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "11": {
93
+ "content": "<jupyter_start>",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "12": {
101
+ "content": "<jupyter_text>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "13": {
109
+ "content": "<jupyter_code>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": true
115
+ },
116
+ "14": {
117
+ "content": "<jupyter_output>",
118
+ "lstrip": false,
119
+ "normalized": false,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": true
123
+ },
124
+ "15": {
125
+ "content": "<jupyter_script>",
126
+ "lstrip": false,
127
+ "normalized": false,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": true
131
+ },
132
+ "16": {
133
+ "content": "<empty_output>",
134
+ "lstrip": false,
135
+ "normalized": false,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": true
139
+ }
140
+ },
141
+ "additional_special_tokens": [
142
+ "<|im_start|>",
143
+ "<|im_end|>"
144
+ ],
145
+ "bos_token": "<|im_start|>",
146
+ "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful AI assistant named SmolLM, trained by Hugging Face<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
147
+ "clean_up_tokenization_spaces": false,
148
+ "eos_token": "<|im_end|>",
149
+ "extra_special_tokens": {},
150
+ "model_max_length": 8192,
151
+ "pad_token": "<|im_end|>",
152
+ "tokenizer_class": "GPT2TokenizerFast",
153
+ "unk_token": "<|endoftext|>",
154
+ "vocab_size": 49152,
155
+ "_commit_hash": null
156
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff