AxisCommunity commited on
Commit
142808c
·
verified ·
1 Parent(s): ab6ba2f

Upload 4 files

Browse files
Files changed (4) hide show
  1. config.json +34 -0
  2. generation_config.json +9 -0
  3. tokenizer.json +1528 -0
  4. tokenizer_config.json +8 -0
config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "add_cross_attention": false,
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "dtype": "float32",
10
+ "embd_pdrop": 0.1,
11
+ "eos_token_id": 50256,
12
+ "initializer_range": 0.02,
13
+ "layer_norm_epsilon": 1e-05,
14
+ "model_type": "gpt2",
15
+ "n_embd": 1280,
16
+ "n_head": 20,
17
+ "n_inner": null,
18
+ "n_layer": 24,
19
+ "n_positions": 128,
20
+ "pad_token_id": null,
21
+ "reorder_and_upcast_attn": false,
22
+ "resid_pdrop": 0.1,
23
+ "scale_attn_by_inverse_layer_idx": false,
24
+ "scale_attn_weights": true,
25
+ "summary_activation": null,
26
+ "summary_first_dropout": 0.1,
27
+ "summary_proj_to_labels": true,
28
+ "summary_type": "cls_index",
29
+ "summary_use_proj": true,
30
+ "tie_word_embeddings": true,
31
+ "transformers_version": "5.10.2",
32
+ "use_cache": false,
33
+ "vocab_size": 8000
34
+ }
generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "output_attentions": false,
6
+ "output_hidden_states": false,
7
+ "transformers_version": "5.10.2",
8
+ "use_cache": true
9
+ }
tokenizer.json ADDED
@@ -0,0 +1,1528 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 128,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
+ "padding": {
10
+ "strategy": {
11
+ "Fixed": 128
12
+ },
13
+ "direction": "Right",
14
+ "pad_to_multiple_of": null,
15
+ "pad_id": 3,
16
+ "pad_type_id": 0,
17
+ "pad_token": "<pad>"
18
+ },
19
+ "added_tokens": [
20
+ {
21
+ "id": 0,
22
+ "content": "<unk>",
23
+ "single_word": false,
24
+ "lstrip": false,
25
+ "rstrip": false,
26
+ "normalized": false,
27
+ "special": true
28
+ },
29
+ {
30
+ "id": 1,
31
+ "content": "<bos>",
32
+ "single_word": false,
33
+ "lstrip": false,
34
+ "rstrip": false,
35
+ "normalized": false,
36
+ "special": true
37
+ },
38
+ {
39
+ "id": 2,
40
+ "content": "<eos>",
41
+ "single_word": false,
42
+ "lstrip": false,
43
+ "rstrip": false,
44
+ "normalized": false,
45
+ "special": true
46
+ },
47
+ {
48
+ "id": 3,
49
+ "content": "<pad>",
50
+ "single_word": false,
51
+ "lstrip": false,
52
+ "rstrip": false,
53
+ "normalized": false,
54
+ "special": true
55
+ }
56
+ ],
57
+ "normalizer": null,
58
+ "pre_tokenizer": {
59
+ "type": "ByteLevel",
60
+ "add_prefix_space": true,
61
+ "trim_offsets": true,
62
+ "use_regex": true
63
+ },
64
+ "post_processor": {
65
+ "type": "TemplateProcessing",
66
+ "single": [
67
+ {
68
+ "Sequence": {
69
+ "id": "A",
70
+ "type_id": 0
71
+ }
72
+ }
73
+ ],
74
+ "pair": [
75
+ {
76
+ "Sequence": {
77
+ "id": "A",
78
+ "type_id": 0
79
+ }
80
+ },
81
+ {
82
+ "Sequence": {
83
+ "id": "B",
84
+ "type_id": 1
85
+ }
86
+ }
87
+ ],
88
+ "special_tokens": {}
89
+ },
90
+ "decoder": null,
91
+ "model": {
92
+ "type": "BPE",
93
+ "dropout": null,
94
+ "unk_token": "<unk>",
95
+ "continuing_subword_prefix": null,
96
+ "end_of_word_suffix": null,
97
+ "fuse_unk": false,
98
+ "byte_fallback": false,
99
+ "ignore_merges": false,
100
+ "vocab": {
101
+ "<unk>": 0,
102
+ "<bos>": 1,
103
+ "<eos>": 2,
104
+ "<pad>": 3,
105
+ "'": 4,
106
+ "(": 5,
107
+ ")": 6,
108
+ ",": 7,
109
+ "-": 8,
110
+ ".": 9,
111
+ ":": 10,
112
+ "?": 11,
113
+ "H": 12,
114
+ "I": 13,
115
+ "J": 14,
116
+ "M": 15,
117
+ "O": 16,
118
+ "P": 17,
119
+ "R": 18,
120
+ "S": 19,
121
+ "T": 20,
122
+ "W": 21,
123
+ "a": 22,
124
+ "b": 23,
125
+ "c": 24,
126
+ "d": 25,
127
+ "e": 26,
128
+ "f": 27,
129
+ "h": 28,
130
+ "i": 29,
131
+ "k": 30,
132
+ "l": 31,
133
+ "m": 32,
134
+ "n": 33,
135
+ "o": 34,
136
+ "p": 35,
137
+ "r": 36,
138
+ "s": 37,
139
+ "t": 38,
140
+ "u": 39,
141
+ "v": 40,
142
+ "w": 41,
143
+ "x": 42,
144
+ "y": 43,
145
+ "£": 44,
146
+ "§": 45,
147
+ "¯": 46,
148
+ "°": 47,
149
+ "±": 48,
150
+ "²": 49,
151
+ "³": 50,
152
+ "´": 51,
153
+ "µ": 52,
154
+ "¶": 53,
155
+ "·": 54,
156
+ "¸": 55,
157
+ "¹": 56,
158
+ "º": 57,
159
+ "»": 58,
160
+ "¼": 59,
161
+ "½": 60,
162
+ "¾": 61,
163
+ "¿": 62,
164
+ "Ð": 63,
165
+ "Ñ": 64,
166
+ "Ò": 65,
167
+ "Ġ": 66,
168
+ "Ģ": 67,
169
+ "ģ": 68,
170
+ "Ĥ": 69,
171
+ "ĥ": 70,
172
+ "ħ": 71,
173
+ "Ī": 72,
174
+ "ĭ": 73,
175
+ "Į": 74,
176
+ "IJ": 75,
177
+ "ij": 76,
178
+ "Ĵ": 77,
179
+ "ĵ": 78,
180
+ "ĸ": 79,
181
+ "ļ": 80,
182
+ "Ľ": 81,
183
+ "ľ": 82,
184
+ "а": 83,
185
+ "ĠÐ": 84,
186
+ "аÐ": 85,
187
+ "Ñĭ": 86,
188
+ "о": 87,
189
+ "Ġa": 88,
190
+ "ÑĢ": 89,
191
+ "ÑĤ": 90,
192
+ "Ñĸ": 91,
193
+ "on": 92,
194
+ "л": 93,
195
+ "н": 94,
196
+ "аÑ": 95,
197
+ "е": 96,
198
+ "Ġc": 97,
199
+ "ct": 98,
200
+ "in": 99,
201
+ "Ġs": 100,
202
+ "Ġe": 101,
203
+ "le": 102,
204
+ "at": 103,
205
+ "..": 104,
206
+ "д": 105,
207
+ "ÑĪ": 106,
208
+ "ĠÒ": 107,
209
+ "Ñĸн": 108,
210
+ "ÑĪÑĸн": 109,
211
+ "ол": 110,
212
+ "Ġd": 111,
213
+ "Ġt": 112,
214
+ "or": 113,
215
+ "pt": 114,
216
+ "re": 115,
217
+ "se": 116,
218
+ "is": 117,
219
+ "ÑĤÐ": 118,
220
+ "an": 119,
221
+ "ÑĢаÐ": 120,
222
+ "fu": 121,
223
+ "Ġfu": 122,
224
+ "Ġfun": 123,
225
+ "и": 124,
226
+ "ÑĢи": 125,
227
+ "ect": 126,
228
+ "nect": 127,
229
+ "onnect": 128,
230
+ "Ġl": 129,
231
+ "ан": 130,
232
+ "ÑĭÑ": 131,
233
+ "¶Ð°Ñ": 132,
234
+ "ĠÑĸ": 133,
235
+ "ģаÑ": 134,
236
+ "ĠжаÑ": 135,
237
+ "ал": 136,
238
+ "де": 137,
239
+ "ĠÒĽ": 138,
240
+ "ÑĪÑĸнде": 139,
241
+ "ĠÑĸÑĪÑĸнде": 140,
242
+ "ģаÑĥ": 141,
243
+ "ĠжаÑģаÑĥ": 142,
244
+ "з": 143,
245
+ "Ġв": 144,
246
+ "th": 145,
247
+ "ou": 146,
248
+ "ĠH": 147,
249
+ "Ġin": 148,
250
+ "Ġex": 149,
251
+ "mp": 150,
252
+ "mple": 151,
253
+ "ion": 152,
254
+ "Ġcre": 153,
255
+ "ction": 154,
256
+ "ate": 155,
257
+ "Ġfunction": 156,
258
+ "Ġcreate": 157,
259
+ "Ġ..": 158,
260
+ "Ġ...": 159,
261
+ "ab": 160,
262
+ "ase": 161,
263
+ "Ġconnect": 162,
264
+ "atab": 163,
265
+ "Ġdatab": 164,
266
+ "Ġto": 165,
267
+ "Ġdatabase": 166,
268
+ "()": 167,
269
+ "od": 168,
270
+ "Ġm": 169,
271
+ "ce": 170,
272
+ "ry": 171,
273
+ "Ġtry": 172,
274
+ "cept": 173,
275
+ "Ġp": 174,
276
+ "Ġsor": 175,
277
+ "ist": 176,
278
+ "Ġlist": 177,
279
+ "Ġsort": 178,
280
+ "el": 179,
281
+ "Ġb": 180,
282
+ "lo": 181,
283
+ "dle": 182,
284
+ "han": 183,
285
+ "rr": 184,
286
+ "Ġhan": 185,
287
+ "Ġerr": 186,
288
+ "ors": 187,
289
+ "Ġhandle": 188,
290
+ "Ġerrors": 189,
291
+ "³Ð¾": 190,
292
+ "¼Ñĭ": 191,
293
+ "го": 192,
294
+ "IJл": 193,
295
+ "ĠÐIJл": 194,
296
+ "ÑĤмÑĭ": 195,
297
+ "ÑĢиÑĤмÑĭ": 196,
298
+ "гоÑĢиÑĤмÑĭ": 197,
299
+ "ĠÐIJлгоÑĢиÑĤмÑĭ": 198,
300
+ "Ja": 199,
301
+ "Sc": 200,
302
+ "aSc": 201,
303
+ "ipt": 202,
304
+ "ript": 203,
305
+ "vaSc": 204,
306
+ "ĠJa": 205,
307
+ "vaScript": 206,
308
+ "ĠJavaScript": 207,
309
+ "Py": 208,
310
+ "ĠPy": 209,
311
+ "thon": 210,
312
+ "ĠPython": 211,
313
+ "We": 212,
314
+ "±Ð¾": 213,
315
+ "·ÑĢаÐ": 214,
316
+ "ºÐ°": 215,
317
+ "ĠWe": 216,
318
+ "ÑĤка": 217,
319
+ "ÑĢазÑĢаÐ": 218,
320
+ "±Ð¾ÑĤка": 219,
321
+ "ĠWeb": 220,
322
+ "ÑĢазÑĢабоÑĤка": 221,
323
+ "´Ð°Ð½": 222,
324
+ "·Ñĭ": 223,
325
+ "ijаÐ": 224,
326
+ "Ġдан": 225,
327
+ "ĠÐijаÐ": 226,
328
+ "нÑĭÑ": 227,
329
+ "ĠданнÑĭÑ": 228,
330
+ "ĠÐijазÑĭ": 229,
331
+ "ĠданнÑĭÑħ": 230,
332
+ "£Ñĭ": 231,
333
+ "¯ÑĪÑĸн": 232,
334
+ "±Ð¾Ð»": 233,
335
+ "´Ñĭ": 234,
336
+ "Òĵ": 235,
337
+ "Ò£Ñĭ": 236,
338
+ "ģал": 237,
339
+ "ľÑĭÑ": 238,
340
+ "Ġбол": 239,
341
+ "ĠÐľÑĭÑ": 240,
342
+ "ай": 241,
343
+ "адÑĭ": 242,
344
+ "ÑĭÒ£Ñĭ": 243,
345
+ "дан": 244,
346
+ "ĠÒ¯ÑĪÑĸн": 245,
347
+ "олдан": 246,
348
+ "алай": 247,
349
+ "ĠÒĽÐ¾Ð»Ð´Ð°Ð½": 248,
350
+ "ĠÒĽÐ°Ð»Ð°Ð¹": 249,
351
+ "ĠжаÑģаÑĥÒĵ": 250,
352
+ "ĠболадÑĭ": 251,
353
+ "ĠÐľÑĭÑģал": 252,
354
+ "ÑĭÒ£Ñĭз": 253,
355
+ "ĠÒĽÐ¾Ð»Ð´Ð°Ð½ÑĭÒ£Ñĭз": 254,
356
+ "ĠжаÑģаÑĥÒĵа": 255,
357
+ "§ÑĤ": 256,
358
+ "±Ñĭ": 257,
359
+ "¸Ñ": 258,
360
+ "¹ÑĤ": 259,
361
+ "¼Ðµ": 260,
362
+ "¿Ð¾Ð»": 261,
363
+ "¿ÑĢи": 262,
364
+ "бÑĭ": 263,
365
+ "йÑĤ": 264,
366
+ "ме": 265,
367
+ "пол": 266,
368
+ "Ñĥ": 267,
369
+ "ÑĮ": 268,
370
+ "ģпол": 269,
371
+ "Ĵо": 270,
372
+ "ļаÐ": 271,
373
+ "ĠЧÑĤ": 272,
374
+ "ĠиÑ": 273,
375
+ "ĠпÑĢи": 274,
376
+ "ĠÐĴо": 275,
377
+ "ĠÐļаÐ": 276,
378
+ "обÑĭ": 277,
379
+ "зÑĥ": 278,
380
+ "йÑĤе": 279,
381
+ "меÑĢ": 280,
382
+ "ÑĮзÑĥ": 281,
383
+ "ģполÑĮзÑĥ": 282,
384
+ "ĠЧÑĤобÑĭ": 283,
385
+ "ĠиÑģполÑĮзÑĥ": 284,
386
+ "ĠпÑĢимеÑĢ": 285,
387
+ "ĠÐĴоÑĤ": 286,
388
+ "ĠÐļак": 287,
389
+ "ĠиÑģполÑĮзÑĥйÑĤе": 288,
390
+ "To": 289,
391
+ "ample": 290,
392
+ "ere": 291,
393
+ "hou": 292,
394
+ "ld": 293,
395
+ "ow": 294,
396
+ "use": 295,
397
+ "you": 296,
398
+ "ĠI": 297,
399
+ "Ġis": 298,
400
+ "ĠTo": 299,
401
+ "Ġuse": 300,
402
+ "Ġyou": 301,
403
+ "Ġan": 302,
404
+ "Ġcan": 303,
405
+ "Ġshou": 304,
406
+ "ĠHere": 305,
407
+ "ĠHow": 306,
408
+ "Ġexample": 307,
409
+ "Ġshould": 308,
410
+ "as": 309,
411
+ "ef": 310,
412
+ "Ġdef": 311,
413
+ "Ġfunc": 312,
414
+ "():": 313,
415
+ "Ġpas": 314,
416
+ "Ġpass": 315,
417
+ "Ġexcept": 316,
418
+ "OR": 317,
419
+ "ĠOR": 318,
420
+ "odel": 319,
421
+ "Ġmodel": 320,
422
+ "ĠORM": 321,
423
+ "Ġmodels": 322,
424
+ "imple": 323,
425
+ "oo": 324,
426
+ "Ġsimple": 325,
427
+ "Ġloo": 326,
428
+ "Ġloop": 327,
429
+ "connect": 328,
430
+ "Ġdb": 329,
431
+ "eth": 330,
432
+ "il": 331,
433
+ "uil": 332,
434
+ "ods": 333,
435
+ "Ġmeth": 334,
436
+ "Ġbuil": 335,
437
+ "Ġmethods": 336,
438
+ "Ġbuilt": 337,
439
+ "ck": 338,
440
+ "ex": 339,
441
+ "Ġblo": 340,
442
+ "cks": 341,
443
+ "except": 342,
444
+ "Ġblocks": 343,
445
+ "')": 344,
446
+ "('": 345,
447
+ "Hel": 346,
448
+ "rin": 347,
449
+ "Ġprin": 348,
450
+ "Hello": 349,
451
+ "Ġprint": 350
452
+ },
453
+ "merges": [
454
+ [
455
+ "Ð",
456
+ "°"
457
+ ],
458
+ [
459
+ "Ġ",
460
+ "Ð"
461
+ ],
462
+ [
463
+ "а",
464
+ "Ð"
465
+ ],
466
+ [
467
+ "Ñ",
468
+ "ĭ"
469
+ ],
470
+ [
471
+ "Ð",
472
+ "¾"
473
+ ],
474
+ [
475
+ "Ġ",
476
+ "a"
477
+ ],
478
+ [
479
+ "Ñ",
480
+ "Ģ"
481
+ ],
482
+ [
483
+ "Ñ",
484
+ "Ĥ"
485
+ ],
486
+ [
487
+ "Ñ",
488
+ "ĸ"
489
+ ],
490
+ [
491
+ "o",
492
+ "n"
493
+ ],
494
+ [
495
+ "Ð",
496
+ "»"
497
+ ],
498
+ [
499
+ "Ð",
500
+ "½"
501
+ ],
502
+ [
503
+ "а",
504
+ "Ñ"
505
+ ],
506
+ [
507
+ "Ð",
508
+ "µ"
509
+ ],
510
+ [
511
+ "Ġ",
512
+ "c"
513
+ ],
514
+ [
515
+ "c",
516
+ "t"
517
+ ],
518
+ [
519
+ "i",
520
+ "n"
521
+ ],
522
+ [
523
+ "Ġ",
524
+ "s"
525
+ ],
526
+ [
527
+ "Ġ",
528
+ "e"
529
+ ],
530
+ [
531
+ "l",
532
+ "e"
533
+ ],
534
+ [
535
+ "a",
536
+ "t"
537
+ ],
538
+ [
539
+ ".",
540
+ "."
541
+ ],
542
+ [
543
+ "Ð",
544
+ "´"
545
+ ],
546
+ [
547
+ "Ñ",
548
+ "Ī"
549
+ ],
550
+ [
551
+ "Ġ",
552
+ "Ò"
553
+ ],
554
+ [
555
+ "Ñĸ",
556
+ "н"
557
+ ],
558
+ [
559
+ "ÑĪ",
560
+ "Ñĸн"
561
+ ],
562
+ [
563
+ "о",
564
+ "л"
565
+ ],
566
+ [
567
+ "Ġ",
568
+ "d"
569
+ ],
570
+ [
571
+ "Ġ",
572
+ "t"
573
+ ],
574
+ [
575
+ "o",
576
+ "r"
577
+ ],
578
+ [
579
+ "p",
580
+ "t"
581
+ ],
582
+ [
583
+ "r",
584
+ "e"
585
+ ],
586
+ [
587
+ "s",
588
+ "e"
589
+ ],
590
+ [
591
+ "i",
592
+ "s"
593
+ ],
594
+ [
595
+ "ÑĤ",
596
+ "Ð"
597
+ ],
598
+ [
599
+ "a",
600
+ "n"
601
+ ],
602
+ [
603
+ "ÑĢ",
604
+ "аÐ"
605
+ ],
606
+ [
607
+ "f",
608
+ "u"
609
+ ],
610
+ [
611
+ "Ġ",
612
+ "fu"
613
+ ],
614
+ [
615
+ "Ġfu",
616
+ "n"
617
+ ],
618
+ [
619
+ "Ð",
620
+ "¸"
621
+ ],
622
+ [
623
+ "ÑĢ",
624
+ "и"
625
+ ],
626
+ [
627
+ "e",
628
+ "ct"
629
+ ],
630
+ [
631
+ "n",
632
+ "ect"
633
+ ],
634
+ [
635
+ "on",
636
+ "nect"
637
+ ],
638
+ [
639
+ "Ġ",
640
+ "l"
641
+ ],
642
+ [
643
+ "аÐ",
644
+ "½"
645
+ ],
646
+ [
647
+ "Ñĭ",
648
+ "Ñ"
649
+ ],
650
+ [
651
+ "¶",
652
+ "аÑ"
653
+ ],
654
+ [
655
+ "Ġ",
656
+ "Ñĸ"
657
+ ],
658
+ [
659
+ "ģ",
660
+ "аÑ"
661
+ ],
662
+ [
663
+ "ĠÐ",
664
+ "¶Ð°Ñ"
665
+ ],
666
+ [
667
+ "аÐ",
668
+ "»"
669
+ ],
670
+ [
671
+ "д",
672
+ "е"
673
+ ],
674
+ [
675
+ "ĠÒ",
676
+ "Ľ"
677
+ ],
678
+ [
679
+ "ÑĪÑĸн",
680
+ "де"
681
+ ],
682
+ [
683
+ "ĠÑĸ",
684
+ "ÑĪÑĸнде"
685
+ ],
686
+ [
687
+ "ģаÑ",
688
+ "ĥ"
689
+ ],
690
+ [
691
+ "ĠжаÑ",
692
+ "ģаÑĥ"
693
+ ],
694
+ [
695
+ "Ð",
696
+ "·"
697
+ ],
698
+ [
699
+ "ĠÐ",
700
+ "²"
701
+ ],
702
+ [
703
+ "t",
704
+ "h"
705
+ ],
706
+ [
707
+ "o",
708
+ "u"
709
+ ],
710
+ [
711
+ "Ġ",
712
+ "H"
713
+ ],
714
+ [
715
+ "Ġ",
716
+ "in"
717
+ ],
718
+ [
719
+ "Ġe",
720
+ "x"
721
+ ],
722
+ [
723
+ "m",
724
+ "p"
725
+ ],
726
+ [
727
+ "mp",
728
+ "le"
729
+ ],
730
+ [
731
+ "i",
732
+ "on"
733
+ ],
734
+ [
735
+ "Ġc",
736
+ "re"
737
+ ],
738
+ [
739
+ "ct",
740
+ "ion"
741
+ ],
742
+ [
743
+ "at",
744
+ "e"
745
+ ],
746
+ [
747
+ "Ġfun",
748
+ "ction"
749
+ ],
750
+ [
751
+ "Ġcre",
752
+ "ate"
753
+ ],
754
+ [
755
+ "Ġ",
756
+ ".."
757
+ ],
758
+ [
759
+ "Ġ..",
760
+ "."
761
+ ],
762
+ [
763
+ "a",
764
+ "b"
765
+ ],
766
+ [
767
+ "a",
768
+ "se"
769
+ ],
770
+ [
771
+ "Ġc",
772
+ "onnect"
773
+ ],
774
+ [
775
+ "at",
776
+ "ab"
777
+ ],
778
+ [
779
+ "Ġd",
780
+ "atab"
781
+ ],
782
+ [
783
+ "Ġt",
784
+ "o"
785
+ ],
786
+ [
787
+ "Ġdatab",
788
+ "ase"
789
+ ],
790
+ [
791
+ "(",
792
+ ")"
793
+ ],
794
+ [
795
+ "o",
796
+ "d"
797
+ ],
798
+ [
799
+ "Ġ",
800
+ "m"
801
+ ],
802
+ [
803
+ "c",
804
+ "e"
805
+ ],
806
+ [
807
+ "r",
808
+ "y"
809
+ ],
810
+ [
811
+ "Ġt",
812
+ "ry"
813
+ ],
814
+ [
815
+ "ce",
816
+ "pt"
817
+ ],
818
+ [
819
+ "Ġ",
820
+ "p"
821
+ ],
822
+ [
823
+ "Ġs",
824
+ "or"
825
+ ],
826
+ [
827
+ "is",
828
+ "t"
829
+ ],
830
+ [
831
+ "Ġl",
832
+ "ist"
833
+ ],
834
+ [
835
+ "Ġsor",
836
+ "t"
837
+ ],
838
+ [
839
+ "e",
840
+ "l"
841
+ ],
842
+ [
843
+ "Ġ",
844
+ "b"
845
+ ],
846
+ [
847
+ "l",
848
+ "o"
849
+ ],
850
+ [
851
+ "d",
852
+ "le"
853
+ ],
854
+ [
855
+ "h",
856
+ "an"
857
+ ],
858
+ [
859
+ "r",
860
+ "r"
861
+ ],
862
+ [
863
+ "Ġ",
864
+ "han"
865
+ ],
866
+ [
867
+ "Ġe",
868
+ "rr"
869
+ ],
870
+ [
871
+ "or",
872
+ "s"
873
+ ],
874
+ [
875
+ "Ġhan",
876
+ "dle"
877
+ ],
878
+ [
879
+ "Ġerr",
880
+ "ors"
881
+ ],
882
+ [
883
+ "³",
884
+ "о"
885
+ ],
886
+ [
887
+ "¼",
888
+ "Ñĭ"
889
+ ],
890
+ [
891
+ "Ð",
892
+ "³Ð¾"
893
+ ],
894
+ [
895
+ "IJ",
896
+ "л"
897
+ ],
898
+ [
899
+ "ĠÐ",
900
+ "IJл"
901
+ ],
902
+ [
903
+ "ÑĤÐ",
904
+ "¼Ñĭ"
905
+ ],
906
+ [
907
+ "ÑĢи",
908
+ "ÑĤмÑĭ"
909
+ ],
910
+ [
911
+ "го",
912
+ "ÑĢиÑĤмÑĭ"
913
+ ],
914
+ [
915
+ "ĠÐIJл",
916
+ "гоÑĢиÑĤмÑĭ"
917
+ ],
918
+ [
919
+ "J",
920
+ "a"
921
+ ],
922
+ [
923
+ "S",
924
+ "c"
925
+ ],
926
+ [
927
+ "a",
928
+ "Sc"
929
+ ],
930
+ [
931
+ "i",
932
+ "pt"
933
+ ],
934
+ [
935
+ "r",
936
+ "ipt"
937
+ ],
938
+ [
939
+ "v",
940
+ "aSc"
941
+ ],
942
+ [
943
+ "Ġ",
944
+ "Ja"
945
+ ],
946
+ [
947
+ "vaSc",
948
+ "ript"
949
+ ],
950
+ [
951
+ "ĠJa",
952
+ "vaScript"
953
+ ],
954
+ [
955
+ "P",
956
+ "y"
957
+ ],
958
+ [
959
+ "Ġ",
960
+ "Py"
961
+ ],
962
+ [
963
+ "th",
964
+ "on"
965
+ ],
966
+ [
967
+ "ĠPy",
968
+ "thon"
969
+ ],
970
+ [
971
+ "W",
972
+ "e"
973
+ ],
974
+ [
975
+ "±",
976
+ "о"
977
+ ],
978
+ [
979
+ "·",
980
+ "ÑĢаÐ"
981
+ ],
982
+ [
983
+ "º",
984
+ "а"
985
+ ],
986
+ [
987
+ "Ġ",
988
+ "We"
989
+ ],
990
+ [
991
+ "ÑĤÐ",
992
+ "ºÐ°"
993
+ ],
994
+ [
995
+ "ÑĢаÐ",
996
+ "·ÑĢаÐ"
997
+ ],
998
+ [
999
+ "±Ð¾",
1000
+ "ÑĤка"
1001
+ ],
1002
+ [
1003
+ "ĠWe",
1004
+ "b"
1005
+ ],
1006
+ [
1007
+ "ÑĢазÑĢаÐ",
1008
+ "±Ð¾ÑĤка"
1009
+ ],
1010
+ [
1011
+ "´",
1012
+ "ан"
1013
+ ],
1014
+ [
1015
+ "·",
1016
+ "Ñĭ"
1017
+ ],
1018
+ [
1019
+ "ij",
1020
+ "аÐ"
1021
+ ],
1022
+ [
1023
+ "ĠÐ",
1024
+ "´Ð°Ð½"
1025
+ ],
1026
+ [
1027
+ "ĠÐ",
1028
+ "ijаÐ"
1029
+ ],
1030
+ [
1031
+ "н",
1032
+ "ÑĭÑ"
1033
+ ],
1034
+ [
1035
+ "Ġдан",
1036
+ "нÑĭÑ"
1037
+ ],
1038
+ [
1039
+ "ĠÐijаÐ",
1040
+ "·Ñĭ"
1041
+ ],
1042
+ [
1043
+ "ĠданнÑĭÑ",
1044
+ "ħ"
1045
+ ],
1046
+ [
1047
+ "£",
1048
+ "Ñĭ"
1049
+ ],
1050
+ [
1051
+ "¯",
1052
+ "ÑĪÑĸн"
1053
+ ],
1054
+ [
1055
+ "±",
1056
+ "ол"
1057
+ ],
1058
+ [
1059
+ "´",
1060
+ "Ñĭ"
1061
+ ],
1062
+ [
1063
+ "Ò",
1064
+ "ĵ"
1065
+ ],
1066
+ [
1067
+ "Ò",
1068
+ "£Ñĭ"
1069
+ ],
1070
+ [
1071
+ "ģ",
1072
+ "ал"
1073
+ ],
1074
+ [
1075
+ "ľ",
1076
+ "ÑĭÑ"
1077
+ ],
1078
+ [
1079
+ "ĠÐ",
1080
+ "±Ð¾Ð»"
1081
+ ],
1082
+ [
1083
+ "ĠÐ",
1084
+ "ľÑĭÑ"
1085
+ ],
1086
+ [
1087
+ "аÐ",
1088
+ "¹"
1089
+ ],
1090
+ [
1091
+ "аÐ",
1092
+ "´Ñĭ"
1093
+ ],
1094
+ [
1095
+ "Ñĭ",
1096
+ "Ò£Ñĭ"
1097
+ ],
1098
+ [
1099
+ "д",
1100
+ "ан"
1101
+ ],
1102
+ [
1103
+ "ĠÒ",
1104
+ "¯ÑĪÑĸн"
1105
+ ],
1106
+ [
1107
+ "ол",
1108
+ "дан"
1109
+ ],
1110
+ [
1111
+ "ал",
1112
+ "ай"
1113
+ ],
1114
+ [
1115
+ "ĠÒĽ",
1116
+ "олдан"
1117
+ ],
1118
+ [
1119
+ "ĠÒĽ",
1120
+ "алай"
1121
+ ],
1122
+ [
1123
+ "ĠжаÑģаÑĥ",
1124
+ "Òĵ"
1125
+ ],
1126
+ [
1127
+ "Ġбол",
1128
+ "адÑĭ"
1129
+ ],
1130
+ [
1131
+ "ĠÐľÑĭÑ",
1132
+ "ģал"
1133
+ ],
1134
+ [
1135
+ "ÑĭÒ£Ñĭ",
1136
+ "з"
1137
+ ],
1138
+ [
1139
+ "ĠÒĽÐ¾Ð»Ð´Ð°Ð½",
1140
+ "ÑĭÒ£Ñĭз"
1141
+ ],
1142
+ [
1143
+ "ĠжаÑģаÑĥÒĵ",
1144
+ "а"
1145
+ ],
1146
+ [
1147
+ "§",
1148
+ "ÑĤ"
1149
+ ],
1150
+ [
1151
+ "±",
1152
+ "Ñĭ"
1153
+ ],
1154
+ [
1155
+ "¸",
1156
+ "Ñ"
1157
+ ],
1158
+ [
1159
+ "¹",
1160
+ "ÑĤ"
1161
+ ],
1162
+ [
1163
+ "¼",
1164
+ "е"
1165
+ ],
1166
+ [
1167
+ "¿",
1168
+ "ол"
1169
+ ],
1170
+ [
1171
+ "¿",
1172
+ "ÑĢи"
1173
+ ],
1174
+ [
1175
+ "Ð",
1176
+ "±Ñĭ"
1177
+ ],
1178
+ [
1179
+ "Ð",
1180
+ "¹ÑĤ"
1181
+ ],
1182
+ [
1183
+ "Ð",
1184
+ "¼Ðµ"
1185
+ ],
1186
+ [
1187
+ "Ð",
1188
+ "¿Ð¾Ð»"
1189
+ ],
1190
+ [
1191
+ "Ñ",
1192
+ "ĥ"
1193
+ ],
1194
+ [
1195
+ "Ñ",
1196
+ "Į"
1197
+ ],
1198
+ [
1199
+ "ģ",
1200
+ "пол"
1201
+ ],
1202
+ [
1203
+ "Ĵ",
1204
+ "о"
1205
+ ],
1206
+ [
1207
+ "ļ",
1208
+ "аÐ"
1209
+ ],
1210
+ [
1211
+ "ĠÐ",
1212
+ "§ÑĤ"
1213
+ ],
1214
+ [
1215
+ "ĠÐ",
1216
+ "¸Ñ"
1217
+ ],
1218
+ [
1219
+ "ĠÐ",
1220
+ "¿ÑĢи"
1221
+ ],
1222
+ [
1223
+ "ĠÐ",
1224
+ "Ĵо"
1225
+ ],
1226
+ [
1227
+ "ĠÐ",
1228
+ "ļаÐ"
1229
+ ],
1230
+ [
1231
+ "о",
1232
+ "бÑĭ"
1233
+ ],
1234
+ [
1235
+ "з",
1236
+ "Ñĥ"
1237
+ ],
1238
+ [
1239
+ "йÑĤ",
1240
+ "е"
1241
+ ],
1242
+ [
1243
+ "ме",
1244
+ "ÑĢ"
1245
+ ],
1246
+ [
1247
+ "ÑĮ",
1248
+ "зÑĥ"
1249
+ ],
1250
+ [
1251
+ "ģпол",
1252
+ "ÑĮзÑĥ"
1253
+ ],
1254
+ [
1255
+ "ĠЧÑĤ",
1256
+ "обÑĭ"
1257
+ ],
1258
+ [
1259
+ "ĠиÑ",
1260
+ "ģполÑĮзÑĥ"
1261
+ ],
1262
+ [
1263
+ "ĠпÑĢи",
1264
+ "меÑĢ"
1265
+ ],
1266
+ [
1267
+ "ĠÐĴо",
1268
+ "ÑĤ"
1269
+ ],
1270
+ [
1271
+ "ĠÐļаÐ",
1272
+ "º"
1273
+ ],
1274
+ [
1275
+ "ĠиÑģполÑĮзÑĥ",
1276
+ "йÑĤе"
1277
+ ],
1278
+ [
1279
+ "T",
1280
+ "o"
1281
+ ],
1282
+ [
1283
+ "a",
1284
+ "mple"
1285
+ ],
1286
+ [
1287
+ "e",
1288
+ "re"
1289
+ ],
1290
+ [
1291
+ "h",
1292
+ "ou"
1293
+ ],
1294
+ [
1295
+ "l",
1296
+ "d"
1297
+ ],
1298
+ [
1299
+ "o",
1300
+ "w"
1301
+ ],
1302
+ [
1303
+ "u",
1304
+ "se"
1305
+ ],
1306
+ [
1307
+ "y",
1308
+ "ou"
1309
+ ],
1310
+ [
1311
+ "Ġ",
1312
+ "I"
1313
+ ],
1314
+ [
1315
+ "Ġ",
1316
+ "is"
1317
+ ],
1318
+ [
1319
+ "Ġ",
1320
+ "To"
1321
+ ],
1322
+ [
1323
+ "Ġ",
1324
+ "use"
1325
+ ],
1326
+ [
1327
+ "Ġ",
1328
+ "you"
1329
+ ],
1330
+ [
1331
+ "Ġa",
1332
+ "n"
1333
+ ],
1334
+ [
1335
+ "Ġc",
1336
+ "an"
1337
+ ],
1338
+ [
1339
+ "Ġs",
1340
+ "hou"
1341
+ ],
1342
+ [
1343
+ "ĠH",
1344
+ "ere"
1345
+ ],
1346
+ [
1347
+ "ĠH",
1348
+ "ow"
1349
+ ],
1350
+ [
1351
+ "Ġex",
1352
+ "ample"
1353
+ ],
1354
+ [
1355
+ "Ġshou",
1356
+ "ld"
1357
+ ],
1358
+ [
1359
+ "a",
1360
+ "s"
1361
+ ],
1362
+ [
1363
+ "e",
1364
+ "f"
1365
+ ],
1366
+ [
1367
+ "Ġd",
1368
+ "ef"
1369
+ ],
1370
+ [
1371
+ "Ġfun",
1372
+ "c"
1373
+ ],
1374
+ [
1375
+ "()",
1376
+ ":"
1377
+ ],
1378
+ [
1379
+ "Ġp",
1380
+ "as"
1381
+ ],
1382
+ [
1383
+ "Ġpas",
1384
+ "s"
1385
+ ],
1386
+ [
1387
+ "Ġex",
1388
+ "cept"
1389
+ ],
1390
+ [
1391
+ "O",
1392
+ "R"
1393
+ ],
1394
+ [
1395
+ "Ġ",
1396
+ "OR"
1397
+ ],
1398
+ [
1399
+ "od",
1400
+ "el"
1401
+ ],
1402
+ [
1403
+ "Ġm",
1404
+ "odel"
1405
+ ],
1406
+ [
1407
+ "ĠOR",
1408
+ "M"
1409
+ ],
1410
+ [
1411
+ "Ġmodel",
1412
+ "s"
1413
+ ],
1414
+ [
1415
+ "i",
1416
+ "mple"
1417
+ ],
1418
+ [
1419
+ "o",
1420
+ "o"
1421
+ ],
1422
+ [
1423
+ "Ġs",
1424
+ "imple"
1425
+ ],
1426
+ [
1427
+ "Ġl",
1428
+ "oo"
1429
+ ],
1430
+ [
1431
+ "Ġloo",
1432
+ "p"
1433
+ ],
1434
+ [
1435
+ "c",
1436
+ "onnect"
1437
+ ],
1438
+ [
1439
+ "Ġd",
1440
+ "b"
1441
+ ],
1442
+ [
1443
+ "e",
1444
+ "th"
1445
+ ],
1446
+ [
1447
+ "i",
1448
+ "l"
1449
+ ],
1450
+ [
1451
+ "u",
1452
+ "il"
1453
+ ],
1454
+ [
1455
+ "od",
1456
+ "s"
1457
+ ],
1458
+ [
1459
+ "Ġm",
1460
+ "eth"
1461
+ ],
1462
+ [
1463
+ "Ġb",
1464
+ "uil"
1465
+ ],
1466
+ [
1467
+ "Ġmeth",
1468
+ "ods"
1469
+ ],
1470
+ [
1471
+ "Ġbuil",
1472
+ "t"
1473
+ ],
1474
+ [
1475
+ "c",
1476
+ "k"
1477
+ ],
1478
+ [
1479
+ "e",
1480
+ "x"
1481
+ ],
1482
+ [
1483
+ "Ġb",
1484
+ "lo"
1485
+ ],
1486
+ [
1487
+ "ck",
1488
+ "s"
1489
+ ],
1490
+ [
1491
+ "ex",
1492
+ "cept"
1493
+ ],
1494
+ [
1495
+ "Ġblo",
1496
+ "cks"
1497
+ ],
1498
+ [
1499
+ "'",
1500
+ ")"
1501
+ ],
1502
+ [
1503
+ "(",
1504
+ "'"
1505
+ ],
1506
+ [
1507
+ "H",
1508
+ "el"
1509
+ ],
1510
+ [
1511
+ "r",
1512
+ "in"
1513
+ ],
1514
+ [
1515
+ "Ġp",
1516
+ "rin"
1517
+ ],
1518
+ [
1519
+ "Hel",
1520
+ "lo"
1521
+ ],
1522
+ [
1523
+ "Ġprin",
1524
+ "t"
1525
+ ]
1526
+ ]
1527
+ }
1528
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "bos_token": "<bos>",
4
+ "eos_token": "<eos>",
5
+ "model_max_length": 1000000000000000019884624838656,
6
+ "pad_token": "<pad>",
7
+ "tokenizer_class": "TokenizersBackend"
8
+ }