syko818121 commited on
Commit
04ea0cc
·
verified ·
1 Parent(s): a56b689

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. config.json +23 -0
  2. pytorch_model.bin +3 -0
  3. tokenizer.json +895 -0
  4. tokenizer_config.json +13 -0
  5. vocab.json +196 -0
config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_layer": 6,
3
+ "n_head": 4,
4
+ "n_embd": 256,
5
+ "max_position_embeddings": 512,
6
+ "vocab_size": 194,
7
+ "model_type": "gpt2",
8
+ "architectures": [
9
+ "GPT2LMHeadModel"
10
+ ],
11
+ "attn_pdrop": 0.1,
12
+ "embd_pdrop": 0.1,
13
+ "resid_pdrop": 0.1,
14
+ "initializer_range": 0.02,
15
+ "layer_norm_epsilon": 1e-05,
16
+ "n_positions": 512,
17
+ "scale_attn_weights": true,
18
+ "use_cache": true,
19
+ "pad_token_id": 0,
20
+ "bos_token_id": 2,
21
+ "eos_token_id": 3,
22
+ "unk_token_id": 1
23
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62dfad8cc0ff00af62a3ce18f5369a5428ee54337093da15d693b5a1d6e20f50
3
+ size 26000829
tokenizer.json ADDED
@@ -0,0 +1,895 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "content": "[PAD]",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 1,
17
+ "content": "[UNK]",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 2,
26
+ "content": "[BOS]",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ },
33
+ {
34
+ "id": 3,
35
+ "content": "[EOS]",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
+ }
42
+ ],
43
+ "normalizer": {
44
+ "type": "Sequence",
45
+ "normalizers": [
46
+ {
47
+ "type": "Lowercase"
48
+ },
49
+ {
50
+ "type": "NFKC"
51
+ }
52
+ ]
53
+ },
54
+ "pre_tokenizer": {
55
+ "type": "Whitespace"
56
+ },
57
+ "post_processor": null,
58
+ "decoder": null,
59
+ "model": {
60
+ "type": "BPE",
61
+ "dropout": null,
62
+ "unk_token": "[UNK]",
63
+ "continuing_subword_prefix": null,
64
+ "end_of_word_suffix": null,
65
+ "fuse_unk": false,
66
+ "byte_fallback": false,
67
+ "ignore_merges": false,
68
+ "vocab": {
69
+ "[PAD]": 0,
70
+ "[UNK]": 1,
71
+ "[BOS]": 2,
72
+ "[EOS]": 3,
73
+ "(": 4,
74
+ ")": 5,
75
+ ",": 6,
76
+ ".": 7,
77
+ "a": 8,
78
+ "b": 9,
79
+ "c": 10,
80
+ "d": 11,
81
+ "e": 12,
82
+ "f": 13,
83
+ "g": 14,
84
+ "h": 15,
85
+ "i": 16,
86
+ "j": 17,
87
+ "k": 18,
88
+ "l": 19,
89
+ "m": 20,
90
+ "n": 21,
91
+ "o": 22,
92
+ "p": 23,
93
+ "r": 24,
94
+ "s": 25,
95
+ "t": 26,
96
+ "u": 27,
97
+ "v": 28,
98
+ "y": 29,
99
+ "z": 30,
100
+ "ç": 31,
101
+ "ö": 32,
102
+ "ü": 33,
103
+ "ğ": 34,
104
+ "ı": 35,
105
+ "ş": 36,
106
+ "in": 37,
107
+ "la": 38,
108
+ "er": 39,
109
+ "el": 40,
110
+ "il": 41,
111
+ "ir": 42,
112
+ "ma": 43,
113
+ "me": 44,
114
+ "ır": 45,
115
+ "ka": 46,
116
+ "bir": 47,
117
+ "dil": 48,
118
+ "ar": 49,
119
+ "en": 50,
120
+ "or": 51,
121
+ "tü": 52,
122
+ "ve": 53,
123
+ "ya": 54,
124
+ "lar": 55,
125
+ "da": 56,
126
+ "im": 57,
127
+ "it": 58,
128
+ "mo": 59,
129
+ "mod": 60,
130
+ "em": 61,
131
+ "et": 62,
132
+ "eğ": 63,
133
+ "ha": 64,
134
+ "si": 65,
135
+ "yor": 66,
136
+ "ün": 67,
137
+ "lama": 68,
138
+ "eri": 69,
139
+ "model": 70,
140
+ "ay": 71,
141
+ "dır": 72,
142
+ "eka": 73,
143
+ "kç": 74,
144
+ "ok": 75,
145
+ "ol": 76,
146
+ "pay": 77,
147
+ "zeka": 78,
148
+ "ın": 79,
149
+ "lan": 80,
150
+ "tür": 81,
151
+ "yapay": 82,
152
+ "imi": 83,
153
+ "eğit": 84,
154
+ "al": 85,
155
+ "alan": 86,
156
+ "bu": 87,
157
+ "de": 88,
158
+ "ek": 89,
159
+ "gün": 90,
160
+ "iz": 91,
161
+ "iç": 92,
162
+ "kt": 93,
163
+ "li": 94,
164
+ "oj": 95,
165
+ "va": 96,
166
+ "ön": 97,
167
+ "ük": 98,
168
+ "ğr": 99,
169
+ "ğı": 100,
170
+ "şt": 101,
171
+ "erin": 102,
172
+ "eli": 103,
173
+ "kçe": 104,
174
+ "türkçe": 105,
175
+ "için": 106,
176
+ "an": 107,
177
+ "ba": 108,
178
+ "bil": 109,
179
+ "di": 110,
180
+ "do": 111,
181
+ "ec": 112,
182
+ "fa": 113,
183
+ "gu": 114,
184
+ "ku": 115,
185
+ "kü": 116,
186
+ "kin": 117,
187
+ "le": 118,
188
+ "lı": 119,
189
+ "pr": 120,
190
+ "sı": 121,
191
+ "uy": 122,
192
+ "veri": 123,
193
+ "çok": 124,
194
+ "öğr": 125,
195
+ "ür": 126,
196
+ "malar": 127,
197
+ "emli": 128,
198
+ "modelin": 129,
199
+ "önemli": 130,
200
+ "gulama": 131,
201
+ "uygulama": 132,
202
+ "öğren": 133,
203
+ "ah": 134,
204
+ "bü": 135,
205
+ "ca": 136,
206
+ "du": 137,
207
+ "dı": 138,
208
+ "dir": 139,
209
+ "dün": 140,
210
+ "eç": 141,
211
+ "esi": 142,
212
+ "edi": 143,
213
+ "gin": 144,
214
+ "ip": 145,
215
+ "isi": 146,
216
+ "keli": 147,
217
+ "lem": 148,
218
+ "leri": 149,
219
+ "nu": 150,
220
+ "nol": 151,
221
+ "onu": 152,
222
+ "par": 153,
223
+ "pha": 154,
224
+ "ra": 155,
225
+ "tu": 156,
226
+ "tok": 157,
227
+ "tek": 158,
228
+ "yi": 159,
229
+ "yük": 160,
230
+ "zla": 161,
231
+ "zel": 162,
232
+ "ğal": 163,
233
+ "inin": 164,
234
+ "elerin": 165,
235
+ "ile": 166,
236
+ "ilme": 167,
237
+ "makin": 168,
238
+ "met": 169,
239
+ "eni": 170,
240
+ "eniz": 171,
241
+ "tüpha": 172,
242
+ "ları": 173,
243
+ "etimi": 174,
244
+ "oldu": 175,
245
+ "eğitim": 176,
246
+ "ktır": 177,
247
+ "ğım": 178,
248
+ "ştir": 179,
249
+ "doğal": 180,
250
+ "kütüpha": 181,
251
+ "proj": 182,
252
+ "öğrenimi": 183,
253
+ "büyük": 184,
254
+ "dünya": 185,
255
+ "ediyor": 186,
256
+ "kelime": 187,
257
+ "noloj": 188,
258
+ "tokeniz": 189,
259
+ "teknoloj": 190,
260
+ "makine": 191,
261
+ "kütüphan": 192,
262
+ "tokenizer": 193
263
+ },
264
+ "merges": [
265
+ [
266
+ "i",
267
+ "n"
268
+ ],
269
+ [
270
+ "l",
271
+ "a"
272
+ ],
273
+ [
274
+ "e",
275
+ "r"
276
+ ],
277
+ [
278
+ "e",
279
+ "l"
280
+ ],
281
+ [
282
+ "i",
283
+ "l"
284
+ ],
285
+ [
286
+ "i",
287
+ "r"
288
+ ],
289
+ [
290
+ "m",
291
+ "a"
292
+ ],
293
+ [
294
+ "m",
295
+ "e"
296
+ ],
297
+ [
298
+ "ı",
299
+ "r"
300
+ ],
301
+ [
302
+ "k",
303
+ "a"
304
+ ],
305
+ [
306
+ "b",
307
+ "ir"
308
+ ],
309
+ [
310
+ "d",
311
+ "il"
312
+ ],
313
+ [
314
+ "a",
315
+ "r"
316
+ ],
317
+ [
318
+ "e",
319
+ "n"
320
+ ],
321
+ [
322
+ "o",
323
+ "r"
324
+ ],
325
+ [
326
+ "t",
327
+ "ü"
328
+ ],
329
+ [
330
+ "v",
331
+ "e"
332
+ ],
333
+ [
334
+ "y",
335
+ "a"
336
+ ],
337
+ [
338
+ "la",
339
+ "r"
340
+ ],
341
+ [
342
+ "d",
343
+ "a"
344
+ ],
345
+ [
346
+ "i",
347
+ "m"
348
+ ],
349
+ [
350
+ "i",
351
+ "t"
352
+ ],
353
+ [
354
+ "m",
355
+ "o"
356
+ ],
357
+ [
358
+ "mo",
359
+ "d"
360
+ ],
361
+ [
362
+ "e",
363
+ "m"
364
+ ],
365
+ [
366
+ "e",
367
+ "t"
368
+ ],
369
+ [
370
+ "e",
371
+ "ğ"
372
+ ],
373
+ [
374
+ "h",
375
+ "a"
376
+ ],
377
+ [
378
+ "s",
379
+ "i"
380
+ ],
381
+ [
382
+ "y",
383
+ "or"
384
+ ],
385
+ [
386
+ "ü",
387
+ "n"
388
+ ],
389
+ [
390
+ "la",
391
+ "ma"
392
+ ],
393
+ [
394
+ "er",
395
+ "i"
396
+ ],
397
+ [
398
+ "mod",
399
+ "el"
400
+ ],
401
+ [
402
+ "a",
403
+ "y"
404
+ ],
405
+ [
406
+ "d",
407
+ "ır"
408
+ ],
409
+ [
410
+ "e",
411
+ "ka"
412
+ ],
413
+ [
414
+ "k",
415
+ "ç"
416
+ ],
417
+ [
418
+ "o",
419
+ "k"
420
+ ],
421
+ [
422
+ "o",
423
+ "l"
424
+ ],
425
+ [
426
+ "p",
427
+ "ay"
428
+ ],
429
+ [
430
+ "z",
431
+ "eka"
432
+ ],
433
+ [
434
+ "ı",
435
+ "n"
436
+ ],
437
+ [
438
+ "la",
439
+ "n"
440
+ ],
441
+ [
442
+ "tü",
443
+ "r"
444
+ ],
445
+ [
446
+ "ya",
447
+ "pay"
448
+ ],
449
+ [
450
+ "im",
451
+ "i"
452
+ ],
453
+ [
454
+ "eğ",
455
+ "it"
456
+ ],
457
+ [
458
+ "a",
459
+ "l"
460
+ ],
461
+ [
462
+ "a",
463
+ "lan"
464
+ ],
465
+ [
466
+ "b",
467
+ "u"
468
+ ],
469
+ [
470
+ "d",
471
+ "e"
472
+ ],
473
+ [
474
+ "e",
475
+ "k"
476
+ ],
477
+ [
478
+ "g",
479
+ "ün"
480
+ ],
481
+ [
482
+ "i",
483
+ "z"
484
+ ],
485
+ [
486
+ "i",
487
+ "ç"
488
+ ],
489
+ [
490
+ "k",
491
+ "t"
492
+ ],
493
+ [
494
+ "l",
495
+ "i"
496
+ ],
497
+ [
498
+ "o",
499
+ "j"
500
+ ],
501
+ [
502
+ "v",
503
+ "a"
504
+ ],
505
+ [
506
+ "ö",
507
+ "n"
508
+ ],
509
+ [
510
+ "ü",
511
+ "k"
512
+ ],
513
+ [
514
+ "ğ",
515
+ "r"
516
+ ],
517
+ [
518
+ "ğ",
519
+ "ı"
520
+ ],
521
+ [
522
+ "ş",
523
+ "t"
524
+ ],
525
+ [
526
+ "er",
527
+ "in"
528
+ ],
529
+ [
530
+ "el",
531
+ "i"
532
+ ],
533
+ [
534
+ "kç",
535
+ "e"
536
+ ],
537
+ [
538
+ "tür",
539
+ "kçe"
540
+ ],
541
+ [
542
+ "iç",
543
+ "in"
544
+ ],
545
+ [
546
+ "a",
547
+ "n"
548
+ ],
549
+ [
550
+ "b",
551
+ "a"
552
+ ],
553
+ [
554
+ "b",
555
+ "il"
556
+ ],
557
+ [
558
+ "d",
559
+ "i"
560
+ ],
561
+ [
562
+ "d",
563
+ "o"
564
+ ],
565
+ [
566
+ "e",
567
+ "c"
568
+ ],
569
+ [
570
+ "f",
571
+ "a"
572
+ ],
573
+ [
574
+ "g",
575
+ "u"
576
+ ],
577
+ [
578
+ "k",
579
+ "u"
580
+ ],
581
+ [
582
+ "k",
583
+ "ü"
584
+ ],
585
+ [
586
+ "k",
587
+ "in"
588
+ ],
589
+ [
590
+ "l",
591
+ "e"
592
+ ],
593
+ [
594
+ "l",
595
+ "ı"
596
+ ],
597
+ [
598
+ "p",
599
+ "r"
600
+ ],
601
+ [
602
+ "s",
603
+ "ı"
604
+ ],
605
+ [
606
+ "u",
607
+ "y"
608
+ ],
609
+ [
610
+ "v",
611
+ "eri"
612
+ ],
613
+ [
614
+ "ç",
615
+ "ok"
616
+ ],
617
+ [
618
+ "ö",
619
+ "ğr"
620
+ ],
621
+ [
622
+ "ü",
623
+ "r"
624
+ ],
625
+ [
626
+ "ma",
627
+ "lar"
628
+ ],
629
+ [
630
+ "em",
631
+ "li"
632
+ ],
633
+ [
634
+ "model",
635
+ "in"
636
+ ],
637
+ [
638
+ "ön",
639
+ "emli"
640
+ ],
641
+ [
642
+ "gu",
643
+ "lama"
644
+ ],
645
+ [
646
+ "uy",
647
+ "gulama"
648
+ ],
649
+ [
650
+ "öğr",
651
+ "en"
652
+ ],
653
+ [
654
+ "a",
655
+ "h"
656
+ ],
657
+ [
658
+ "b",
659
+ "ü"
660
+ ],
661
+ [
662
+ "c",
663
+ "a"
664
+ ],
665
+ [
666
+ "d",
667
+ "u"
668
+ ],
669
+ [
670
+ "d",
671
+ "ı"
672
+ ],
673
+ [
674
+ "d",
675
+ "ir"
676
+ ],
677
+ [
678
+ "d",
679
+ "ün"
680
+ ],
681
+ [
682
+ "e",
683
+ "ç"
684
+ ],
685
+ [
686
+ "e",
687
+ "si"
688
+ ],
689
+ [
690
+ "e",
691
+ "di"
692
+ ],
693
+ [
694
+ "g",
695
+ "in"
696
+ ],
697
+ [
698
+ "i",
699
+ "p"
700
+ ],
701
+ [
702
+ "i",
703
+ "si"
704
+ ],
705
+ [
706
+ "k",
707
+ "eli"
708
+ ],
709
+ [
710
+ "l",
711
+ "em"
712
+ ],
713
+ [
714
+ "l",
715
+ "eri"
716
+ ],
717
+ [
718
+ "n",
719
+ "u"
720
+ ],
721
+ [
722
+ "n",
723
+ "ol"
724
+ ],
725
+ [
726
+ "o",
727
+ "nu"
728
+ ],
729
+ [
730
+ "p",
731
+ "ar"
732
+ ],
733
+ [
734
+ "p",
735
+ "ha"
736
+ ],
737
+ [
738
+ "r",
739
+ "a"
740
+ ],
741
+ [
742
+ "t",
743
+ "u"
744
+ ],
745
+ [
746
+ "t",
747
+ "ok"
748
+ ],
749
+ [
750
+ "t",
751
+ "ek"
752
+ ],
753
+ [
754
+ "y",
755
+ "i"
756
+ ],
757
+ [
758
+ "y",
759
+ "ük"
760
+ ],
761
+ [
762
+ "z",
763
+ "la"
764
+ ],
765
+ [
766
+ "z",
767
+ "el"
768
+ ],
769
+ [
770
+ "ğ",
771
+ "al"
772
+ ],
773
+ [
774
+ "in",
775
+ "in"
776
+ ],
777
+ [
778
+ "el",
779
+ "erin"
780
+ ],
781
+ [
782
+ "il",
783
+ "e"
784
+ ],
785
+ [
786
+ "il",
787
+ "me"
788
+ ],
789
+ [
790
+ "ma",
791
+ "kin"
792
+ ],
793
+ [
794
+ "me",
795
+ "t"
796
+ ],
797
+ [
798
+ "en",
799
+ "i"
800
+ ],
801
+ [
802
+ "en",
803
+ "iz"
804
+ ],
805
+ [
806
+ "tü",
807
+ "pha"
808
+ ],
809
+ [
810
+ "lar",
811
+ "ı"
812
+ ],
813
+ [
814
+ "et",
815
+ "imi"
816
+ ],
817
+ [
818
+ "ol",
819
+ "du"
820
+ ],
821
+ [
822
+ "eğit",
823
+ "im"
824
+ ],
825
+ [
826
+ "kt",
827
+ "ır"
828
+ ],
829
+ [
830
+ "ğı",
831
+ "m"
832
+ ],
833
+ [
834
+ "şt",
835
+ "ir"
836
+ ],
837
+ [
838
+ "do",
839
+ "ğal"
840
+ ],
841
+ [
842
+ "kü",
843
+ "tüpha"
844
+ ],
845
+ [
846
+ "pr",
847
+ "oj"
848
+ ],
849
+ [
850
+ "öğren",
851
+ "imi"
852
+ ],
853
+ [
854
+ "bü",
855
+ "yük"
856
+ ],
857
+ [
858
+ "dün",
859
+ "ya"
860
+ ],
861
+ [
862
+ "edi",
863
+ "yor"
864
+ ],
865
+ [
866
+ "keli",
867
+ "me"
868
+ ],
869
+ [
870
+ "nol",
871
+ "oj"
872
+ ],
873
+ [
874
+ "tok",
875
+ "eniz"
876
+ ],
877
+ [
878
+ "tek",
879
+ "noloj"
880
+ ],
881
+ [
882
+ "makin",
883
+ "e"
884
+ ],
885
+ [
886
+ "kütüpha",
887
+ "n"
888
+ ],
889
+ [
890
+ "tokeniz",
891
+ "er"
892
+ ]
893
+ ]
894
+ }
895
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_max_length": 512,
3
+ "add_prefix_space": false,
4
+ "bos_token": "[BOS]",
5
+ "eos_token": "[EOS]",
6
+ "unk_token": "[UNK]",
7
+ "pad_token": "[PAD]",
8
+ "errors": "replace",
9
+ "tokenizer_class": "PreTrainedTokenizerFast",
10
+ "name_or_path": "/content",
11
+ "do_lower_case": true,
12
+ "special_tokens_map_file": null
13
+ }
vocab.json ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "teknoloj": 190,
3
+ "makine": 191,
4
+ "ba": 108,
5
+ "yi": 159,
6
+ "it": 58,
7
+ "gu": 114,
8
+ "pha": 154,
9
+ "kt": 93,
10
+ "\u00fcr": 126,
11
+ "d\u0131r": 72,
12
+ "\u00e7ok": 124,
13
+ "dir": 139,
14
+ ".": 7,
15
+ "\u00e7": 31,
16
+ "ca": 136,
17
+ "or": 51,
18
+ ")": 5,
19
+ "t\u00fcrk\u00e7e": 105,
20
+ "par": 153,
21
+ "mod": 60,
22
+ "n": 21,
23
+ "[BOS]": 2,
24
+ "e\u011fitim": 176,
25
+ "li": 94,
26
+ "kin": 117,
27
+ "b": 9,
28
+ "d": 11,
29
+ "[UNK]": 1,
30
+ "d\u00fcn": 140,
31
+ "ha": 64,
32
+ "et": 62,
33
+ "kt\u0131r": 177,
34
+ "\u015ftir": 179,
35
+ "eniz": 171,
36
+ "yapay": 82,
37
+ "do\u011fal": 180,
38
+ "ve": 53,
39
+ "r": 24,
40
+ "g": 14,
41
+ "o": 22,
42
+ "e": 12,
43
+ "ra": 155,
44
+ "met": 169,
45
+ "er": 39,
46
+ "m": 20,
47
+ "z": 30,
48
+ "\u00f6\u011fr": 125,
49
+ "\u00f6\u011frenimi": 183,
50
+ "in": 37,
51
+ "oldu": 175,
52
+ "lar\u0131": 173,
53
+ "v": 28,
54
+ "el": 40,
55
+ "tu": 156,
56
+ "ma": 43,
57
+ "pr": 120,
58
+ "il": 41,
59
+ ",": 6,
60
+ "b\u00fcy\u00fck": 184,
61
+ "la": 38,
62
+ "im": 57,
63
+ "isi": 146,
64
+ "e\u011f": 63,
65
+ "ek": 89,
66
+ "em": 61,
67
+ "a": 8,
68
+ "ka": 46,
69
+ "\u00fck": 98,
70
+ "inin": 164,
71
+ "eni": 170,
72
+ "i": 16,
73
+ "e\u00e7": 141,
74
+ "\u0131": 35,
75
+ "du": 137,
76
+ "\u00fcn": 67,
77
+ "t\u00fc": 52,
78
+ "tokeniz": 189,
79
+ "noloj": 188,
80
+ "uy": 122,
81
+ "eri": 69,
82
+ "t\u00fcpha": 172,
83
+ "y\u00fck": 160,
84
+ "bil": 109,
85
+ "dil": 48,
86
+ "ediyor": 186,
87
+ "tek": 158,
88
+ "alan": 86,
89
+ "yor": 66,
90
+ "t\u00fcr": 81,
91
+ "\u011fal": 163,
92
+ "k\u00e7e": 104,
93
+ "(": 4,
94
+ "ar": 49,
95
+ "imi": 83,
96
+ "gulama": 131,
97
+ "\u011f\u0131m": 178,
98
+ "model": 70,
99
+ "bu": 87,
100
+ "zla": 161,
101
+ "f": 13,
102
+ "erin": 102,
103
+ "i\u00e7in": 106,
104
+ "\u011f": 34,
105
+ "uygulama": 132,
106
+ "emli": 128,
107
+ "makin": 168,
108
+ "veri": 123,
109
+ "elerin": 165,
110
+ "\u015ft": 101,
111
+ "k\u00e7": 74,
112
+ "[EOS]": 3,
113
+ "\u00f6": 32,
114
+ "di": 110,
115
+ "ah": 134,
116
+ "p": 23,
117
+ "esi": 142,
118
+ "me": 44,
119
+ "u": 27,
120
+ "zel": 162,
121
+ "nol": 151,
122
+ "modelin": 129,
123
+ "\u011f\u0131": 100,
124
+ "zeka": 78,
125
+ "tokenizer": 193,
126
+ "s\u0131": 121,
127
+ "ec": 112,
128
+ "edi": 143,
129
+ "le": 118,
130
+ "proj": 182,
131
+ "kelime": 187,
132
+ "y": 29,
133
+ "k\u00fct\u00fcpha": 181,
134
+ "c": 10,
135
+ "t": 26,
136
+ "tok": 157,
137
+ "ilme": 167,
138
+ "\u00f6n": 97,
139
+ "[PAD]": 0,
140
+ "ol": 76,
141
+ "lan": 80,
142
+ "\u015f": 36,
143
+ "ip": 145,
144
+ "gin": 144,
145
+ "an": 107,
146
+ "en": 50,
147
+ "\u011fr": 99,
148
+ "h": 15,
149
+ "si": 65,
150
+ "oj": 95,
151
+ "d\u00fcnya": 185,
152
+ "mo": 59,
153
+ "de": 88,
154
+ "ile": 166,
155
+ "keli": 147,
156
+ "fa": 113,
157
+ "etimi": 174,
158
+ "j": 17,
159
+ "g\u00fcn": 90,
160
+ "ok": 75,
161
+ "ay": 71,
162
+ "\u00fc": 33,
163
+ "ku": 115,
164
+ "va": 96,
165
+ "i\u00e7": 92,
166
+ "do": 111,
167
+ "onu": 152,
168
+ "\u0131n": 79,
169
+ "k": 18,
170
+ "bir": 47,
171
+ "ir": 42,
172
+ "l\u0131": 119,
173
+ "k\u00fc": 116,
174
+ "lama": 68,
175
+ "eli": 103,
176
+ "\u0131r": 45,
177
+ "eka": 73,
178
+ "pay": 77,
179
+ "l": 19,
180
+ "iz": 91,
181
+ "leri": 149,
182
+ "ya": 54,
183
+ "e\u011fit": 84,
184
+ "\u00f6nemli": 130,
185
+ "k\u00fct\u00fcphan": 192,
186
+ "\u00f6\u011fren": 133,
187
+ "d\u0131": 138,
188
+ "s": 25,
189
+ "da": 56,
190
+ "al": 85,
191
+ "b\u00fc": 135,
192
+ "lem": 148,
193
+ "malar": 127,
194
+ "lar": 55,
195
+ "nu": 150
196
+ }