burak commited on
Commit
5c71899
·
verified ·
1 Parent(s): a439472

SykoLLM v1: Sıfırdan eğitilen 5M parametreli özel model.

Browse files
config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SykoForCausalLM"
4
+ ],
5
+ "author": "Burak",
6
+ "auto_map": {
7
+ "AutoModelForCausalLM": "modeling_syko.SykoForCausalLM",
8
+ "AutoConfig": "modeling_syko.SykoConfig"
9
+ },
10
+ "block_size": 64,
11
+ "bos_token_id": 2,
12
+ "dropout": 0.2,
13
+ "dtype": "float32",
14
+ "eos_token_id": 3,
15
+ "hidden_size": 256,
16
+ "model_type": "syko",
17
+ "n_embd": 256,
18
+ "n_head": 8,
19
+ "n_layer": 6,
20
+ "name": "Syko-5M-Base",
21
+ "num_attention_heads": 8,
22
+ "num_hidden_layers": 6,
23
+ "pad_token_id": 1,
24
+ "tokenizer_class": "SykoTokenizer",
25
+ "transformers_version": "4.57.3",
26
+ "vocab_size": 324
27
+ }
modeling_syko.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import torch
3
+ import torch.nn as nn
4
+ from torch.nn import functional as F
5
+ from transformers import PretrainedConfig, PreTrainedModel
6
+ # BU SATIR YENİ EKLENDİ:
7
+ from transformers.modeling_outputs import CausalLMOutputWithPast
8
+
9
+ class SykoConfig(PretrainedConfig):
10
+ model_type = "syko"
11
+
12
+ def __init__(
13
+ self,
14
+ vocab_size=4096,
15
+ n_embd=256,
16
+ n_layer=6,
17
+ n_head=8,
18
+ block_size=64,
19
+ dropout=0.2,
20
+ **kwargs
21
+ ):
22
+ self.vocab_size = vocab_size
23
+ self.n_embd = n_embd
24
+ self.n_layer = n_layer
25
+ self.n_head = n_head
26
+ self.block_size = block_size
27
+ self.dropout = dropout
28
+
29
+ self.num_hidden_layers = n_layer
30
+ self.hidden_size = n_embd
31
+ self.num_attention_heads = n_head
32
+
33
+ super().__init__(**kwargs)
34
+
35
+ class Head(nn.Module):
36
+ def __init__(self, n_embd, head_size, block_size, dropout):
37
+ super().__init__()
38
+ self.key = nn.Linear(n_embd, head_size, bias=False)
39
+ self.query = nn.Linear(n_embd, head_size, bias=False)
40
+ self.value = nn.Linear(n_embd, head_size, bias=False)
41
+ self.register_buffer('tril', torch.tril(torch.ones(block_size, block_size)))
42
+ self.dropout = nn.Dropout(dropout)
43
+
44
+ def forward(self, x):
45
+ B, T, C = x.shape
46
+ k = self.key(x)
47
+ q = self.query(x)
48
+ wei = q @ k.transpose(-2, -1) * (C ** -0.5)
49
+ wei = wei.masked_fill(self.tril[:T, :T] == 0, float('-inf'))
50
+ wei = F.softmax(wei, dim=-1)
51
+ wei = self.dropout(wei)
52
+ v = self.value(x)
53
+ out = wei @ v
54
+ return out
55
+
56
+ class MultiHeadAttention(nn.Module):
57
+ def __init__(self, n_head, head_size, n_embd, block_size, dropout):
58
+ super().__init__()
59
+ self.heads = nn.ModuleList([Head(n_embd, head_size, block_size, dropout) for _ in range(n_head)])
60
+ self.proj = nn.Linear(n_embd, n_embd)
61
+ self.dropout = nn.Dropout(dropout)
62
+
63
+ def forward(self, x):
64
+ out = torch.cat([h(x) for h in self.heads], dim=-1)
65
+ out = self.dropout(self.proj(out))
66
+ return out
67
+
68
+ class FeedForward(nn.Module):
69
+ def __init__(self, n_embd, dropout):
70
+ super().__init__()
71
+ self.net = nn.Sequential(
72
+ nn.Linear(n_embd, 4 * n_embd),
73
+ nn.GELU(),
74
+ nn.Linear(4 * n_embd, n_embd),
75
+ nn.Dropout(dropout),
76
+ )
77
+
78
+ def forward(self, x):
79
+ return self.net(x)
80
+
81
+ class Block(nn.Module):
82
+ def __init__(self, n_embd, n_head, block_size, dropout):
83
+ super().__init__()
84
+ head_size = n_embd // n_head
85
+ self.sa = MultiHeadAttention(n_head, head_size, n_embd, block_size, dropout)
86
+ self.ffwd = FeedForward(n_embd, dropout)
87
+ self.ln1 = nn.LayerNorm(n_embd)
88
+ self.ln2 = nn.LayerNorm(n_embd)
89
+
90
+ def forward(self, x):
91
+ x = x + self.sa(self.ln1(x))
92
+ x = x + self.ffwd(self.ln2(x))
93
+ return x
94
+
95
+ class SykoForCausalLM(PreTrainedModel):
96
+ config_class = SykoConfig
97
+
98
+ def __init__(self, config):
99
+ super().__init__(config)
100
+ self.vocab_size = config.vocab_size
101
+ self.n_embd = config.n_embd
102
+ self.block_size = config.block_size
103
+ self.n_head = config.n_head
104
+ self.n_layer = config.n_layer
105
+ self.dropout = config.dropout
106
+
107
+ self.token_embedding_table = nn.Embedding(self.vocab_size, self.n_embd)
108
+ self.position_embedding_table = nn.Embedding(self.block_size, self.n_embd)
109
+ self.blocks = nn.Sequential(*[Block(self.n_embd, self.n_head, self.block_size, self.dropout) for _ in range(self.n_layer)])
110
+ self.ln_f = nn.LayerNorm(self.n_embd)
111
+ self.lm_head = nn.Linear(self.n_embd, self.vocab_size)
112
+
113
+ self.apply(self._init_weights)
114
+
115
+ def _init_weights(self, module):
116
+ if isinstance(module, nn.Linear):
117
+ torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
118
+ if module.bias is not None:
119
+ torch.nn.init.zeros_(module.bias)
120
+ elif isinstance(module, nn.Embedding):
121
+ torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
122
+
123
+ def forward(self, input_ids, labels=None, **kwargs):
124
+ idx = input_ids
125
+ B, T = idx.shape
126
+ device = idx.device
127
+
128
+ pos_emb = self.position_embedding_table(torch.arange(T, device=device))
129
+ tok_emb = self.token_embedding_table(idx)
130
+ x = tok_emb + pos_emb
131
+
132
+ x = self.blocks(x)
133
+ x = self.ln_f(x)
134
+ logits = self.lm_head(x)
135
+
136
+ loss = None
137
+ if labels is not None:
138
+ B, T, C = logits.shape
139
+ logits_reshaped = logits.view(B*T, C)
140
+ labels_reshaped = labels.view(B*T)
141
+ loss = F.cross_entropy(logits_reshaped, labels_reshaped)
142
+
143
+ # --- DÜZELTME BURADA ---
144
+ # Tuple yerine CausalLMOutputWithPast döndürüyoruz.
145
+ return CausalLMOutputWithPast(
146
+ loss=loss,
147
+ logits=logits,
148
+ past_key_values=None, # Cache kullanmıyoruz şimdilik
149
+ hidden_states=None,
150
+ attentions=None,
151
+ )
152
+
153
+ def prepare_inputs_for_generation(self, input_ids, **kwargs):
154
+ return {"input_ids": input_ids}
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7d49943be4e301ff57492ca12c84cfceaa86bf10c8031bd191379e0c80c02cb
3
+ size 20553835
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
tokenizer.json ADDED
@@ -0,0 +1,1479 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "content": "[UNK]",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 1,
17
+ "content": "[PAD]",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 2,
26
+ "content": "[BOS]",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ },
33
+ {
34
+ "id": 3,
35
+ "content": "[EOS]",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
+ }
42
+ ],
43
+ "normalizer": null,
44
+ "pre_tokenizer": {
45
+ "type": "ByteLevel",
46
+ "add_prefix_space": false,
47
+ "trim_offsets": true,
48
+ "use_regex": true
49
+ },
50
+ "post_processor": null,
51
+ "decoder": {
52
+ "type": "ByteLevel",
53
+ "add_prefix_space": true,
54
+ "trim_offsets": true,
55
+ "use_regex": true
56
+ },
57
+ "model": {
58
+ "type": "BPE",
59
+ "dropout": null,
60
+ "unk_token": null,
61
+ "continuing_subword_prefix": null,
62
+ "end_of_word_suffix": null,
63
+ "fuse_unk": false,
64
+ "byte_fallback": false,
65
+ "ignore_merges": false,
66
+ "vocab": {
67
+ "[UNK]": 0,
68
+ "[PAD]": 1,
69
+ "[BOS]": 2,
70
+ "[EOS]": 3,
71
+ ".": 4,
72
+ "B": 5,
73
+ "D": 6,
74
+ "E": 7,
75
+ "G": 8,
76
+ "K": 9,
77
+ "L": 10,
78
+ "M": 11,
79
+ "N": 12,
80
+ "O": 13,
81
+ "P": 14,
82
+ "S": 15,
83
+ "T": 16,
84
+ "U": 17,
85
+ "Y": 18,
86
+ "[": 19,
87
+ "]": 20,
88
+ "a": 21,
89
+ "b": 22,
90
+ "c": 23,
91
+ "d": 24,
92
+ "e": 25,
93
+ "f": 26,
94
+ "g": 27,
95
+ "h": 28,
96
+ "i": 29,
97
+ "k": 30,
98
+ "l": 31,
99
+ "m": 32,
100
+ "n": 33,
101
+ "o": 34,
102
+ "p": 35,
103
+ "r": 36,
104
+ "s": 37,
105
+ "t": 38,
106
+ "u": 39,
107
+ "v": 40,
108
+ "y": 41,
109
+ "z": 42,
110
+ "§": 43,
111
+ "±": 44,
112
+ "¶": 45,
113
+ "¼": 46,
114
+ "Ã": 47,
115
+ "Ä": 48,
116
+ "Å": 49,
117
+ "Ċ": 50,
118
+ "Ġ": 51,
119
+ "Ł": 52,
120
+ "ı": 53,
121
+ "EO": 54,
122
+ "EOS": 55,
123
+ "ar": 56,
124
+ "Ġ[": 57,
125
+ "en": 58,
126
+ "ĠÃ": 59,
127
+ "ÄŁ": 60,
128
+ "in": 61,
129
+ "me": 62,
130
+ "od": 63,
131
+ "er": 64,
132
+ "el": 65,
133
+ "an": 66,
134
+ "ir": 67,
135
+ "Ġm": 68,
136
+ "ma": 69,
137
+ "si": 70,
138
+ "Ġç": 71,
139
+ "ÅŁ": 72,
140
+ "Ġy": 73,
141
+ "Ġi": 74,
142
+ "ır": 75,
143
+ "Ġk": 76,
144
+ "Ġs": 77,
145
+ "odel": 78,
146
+ "Ġv": 79,
147
+ "ren": 80,
148
+ "¶ÄŁ": 81,
149
+ "ĠÃ¶ÄŁ": 82,
150
+ "ĠÃ¶ÄŁren": 83,
151
+ "on": 84,
152
+ "ev": 85,
153
+ "Ġya": 86,
154
+ "ara": 87,
155
+ "ok": 88,
156
+ "Ġmodel": 89,
157
+ "lı": 90,
158
+ "it": 91,
159
+ "dir": 92,
160
+ "il": 93,
161
+ "Ġka": 94,
162
+ "eri": 95,
163
+ "im": 96,
164
+ "ĠÃ¶ÄŁrenme": 97,
165
+ "ek": 98,
166
+ "le": 99,
167
+ "as": 100,
168
+ "yon": 101,
169
+ "met": 102,
170
+ "erin": 103,
171
+ "ha": 104,
172
+ "pma": 105,
173
+ "Ġyapma": 106,
174
+ "Ġyapmak": 107,
175
+ "ay": 108,
176
+ "Ġb": 109,
177
+ "Ġp": 110,
178
+ "idir": 111,
179
+ "oken": 112,
180
+ "da": 113,
181
+ "ÄŁit": 114,
182
+ "Ġz": 115,
183
+ "dar": 116,
184
+ "Ġo": 117,
185
+ "Ġkadar": 118,
186
+ "la": 119,
187
+ "ç": 120,
188
+ "dır": 121,
189
+ "ıdır": 122,
190
+ "iy": 123,
191
+ "or": 124,
192
+ "ın": 125,
193
+ "Ġveri": 126,
194
+ "Ġg": 127,
195
+ "ĠÃ¶ÄŁrenmesi": 128,
196
+ "Token": 129,
197
+ "iz": 130,
198
+ "lara": 131,
199
+ "midir": 132,
200
+ "ni": 133,
201
+ "Ġmet": 134,
202
+ "ılara": 135,
203
+ "irme": 136,
204
+ "Ġçev": 137,
205
+ "ÅŁle": 138,
206
+ "ĠiÅŁle": 139,
207
+ "Ġsay": 140,
208
+ "asyon": 141,
209
+ "Tokeniz": 142,
210
+ "Ġmetni": 143,
211
+ "Ġçevirme": 144,
212
+ "ĠiÅŁlemidir": 145,
213
+ "Ġsayılara": 146,
214
+ "Tokenizasyon": 147,
215
+ "Derin": 148,
216
+ "GP": 149,
217
+ "alı": 150,
218
+ "de": 151,
219
+ "hı": 152,
220
+ "leri": 153,
221
+ "zlı": 154,
222
+ "zerin": 155,
223
+ "¼zerin": 156,
224
+ "Ġda": 157,
225
+ "ĠGP": 158,
226
+ "Ġhı": 159,
227
+ "Ġüzerin": 160,
228
+ "Ġçalı": 161,
229
+ "ÅŁÄ±r": 162,
230
+ "Ġmodelleri": 163,
231
+ "Ġdaha": 164,
232
+ "ĠGPU": 165,
233
+ "Ġhızlı": 166,
234
+ "Ġüzerinde": 167,
235
+ "ĠçalÄ±ÅŁÄ±r": 168,
236
+ "Py": 169,
237
+ "hon": 170,
238
+ "kl": 171,
239
+ "thon": 172,
240
+ "odla": 173,
241
+ "Ġçok": 174,
242
+ "Ġile": 175,
243
+ "Ġkodla": 176,
244
+ "evkl": 177,
245
+ "Ġzevkl": 178,
246
+ "Python": 179,
247
+ "Ġkodlama": 180,
248
+ "Ġzevkli": 181,
249
+ "Kod": 182,
250
+ "ken": 183,
251
+ "men": 184,
252
+ "ta": 185,
253
+ "zar": 186,
254
+ "Ġha": 187,
255
+ "arç": 188,
256
+ "ĠÃ¶ÄŁrenmen": 189,
257
+ "Ġyazar": 190,
258
+ "asıdır": 191,
259
+ "Ġbir": 192,
260
+ "Ġparç": 193,
261
+ "Ġhata": 194,
262
+ "ĠÃ¶ÄŁrenmenin": 195,
263
+ "Ġyazarken": 196,
264
+ "Ġparçasıdır": 197,
265
+ "Bu": 198,
266
+ "di": 199,
267
+ "dan": 200,
268
+ "esi": 201,
269
+ "eÅŁ": 202,
270
+ "eÄŁit": 203,
271
+ "fır": 204,
272
+ "resi": 205,
273
+ "ĠeÄŁit": 206,
274
+ "ıfır": 207,
275
+ "Ġmil": 208,
276
+ "Ġsıfır": 209,
277
+ "Ġve": 210,
278
+ "Ġvar": 211,
279
+ "aramet": 212,
280
+ "ildi": 213,
281
+ "ĠbeÅŁ": 214,
282
+ "Ġparamet": 215,
283
+ "ĠeÄŁitildi": 216,
284
+ "Ġmilyon": 217,
285
+ "Ġsıfırdan": 218,
286
+ "Ġparametresi": 219,
287
+ "EÄŁit": 220,
288
+ "lu": 221,
289
+ "lit": 222,
290
+ "ne": 223,
291
+ "se": 224,
292
+ "yi": 225,
293
+ "Ġne": 226,
294
+ "eliy": 227,
295
+ "Ġiyi": 228,
296
+ "Ġkalit": 229,
297
+ "Ġolu": 230,
298
+ "Ġverisi": 231,
299
+ "EÄŁitim": 232,
300
+ "eliyse": 233,
301
+ "Ġkaliteliyse": 234,
302
+ "Ġolur": 235,
303
+ "LP": 236,
304
+ "NLP": 237,
305
+ "Tr": 238,
306
+ "al": 239,
307
+ "dev": 240,
308
+ "for": 241,
309
+ "isi": 242,
310
+ "rs": 243,
311
+ "rim": 244,
312
+ "sfor": 245,
313
+ "tt": 246,
314
+ "ĠNLP": 247,
315
+ "Ġal": 248,
316
+ "Ġdev": 249,
317
+ "arisi": 250,
318
+ "mers": 251,
319
+ "anın": 252,
320
+ "ansfor": 253,
321
+ "Ġmim": 254,
322
+ "Ġyara": 255,
323
+ "Transfor": 256,
324
+ "ttı": 257,
325
+ "Ġalanın": 258,
326
+ "Ġdevrim": 259,
327
+ "Ġmimarisi": 260,
328
+ "Ġyarattı": 261,
329
+ "Transformers": 262,
330
+ "Ġalanında": 263,
331
+ "Ya": 264,
332
+ "ce": 265,
333
+ "ece": 266,
334
+ "len": 267,
335
+ "pay": 268,
336
+ "san": 269,
337
+ "Ġin": 270,
338
+ "ĠÅŁ": 271,
339
+ "ÄŁin": 272,
340
+ "ģın": 273,
341
+ "elece": 274,
342
+ "lıģın": 275,
343
+ "diriy": 276,
344
+ "illen": 277,
345
+ "eka": 278,
346
+ "ekillen": 279,
347
+ "Ġzeka": 280,
348
+ "Ġgelece": 281,
349
+ "Yapay": 282,
350
+ "sanlıģın": 283,
351
+ "Ġinsanlıģın": 284,
352
+ "ĠÅŁekillen": 285,
353
+ "ÄŁini": 286,
354
+ "diriyor": 287,
355
+ "ĠgeleceÄŁini": 288,
356
+ "ĠÅŁekillendiriyor": 289,
357
+ "Model": 290,
358
+ "lidir": 291,
359
+ "sma": 292,
360
+ "token": 293,
361
+ "usma": 294,
362
+ "yı": 295,
363
+ "ĠEOS": 296,
364
+ "Ġtoken": 297,
365
+ "erek": 298,
366
+ "Ġiç": 299,
367
+ "Ġsusma": 300,
368
+ "Ġgerek": 301,
369
+ "Modelin": 302,
370
+ "Ġtokenı": 303,
371
+ "Ġiçin": 304,
372
+ "Ġsusmayı": 305,
373
+ "Ġgereklidir": 306,
374
+ "Ma": 307,
375
+ "at": 308,
376
+ "den": 309,
377
+ "kar": 310,
378
+ "kin": 311,
379
+ "Ġan": 312,
380
+ "ıkar": 313,
381
+ "anat": 314,
382
+ "Ġçıkar": 315,
383
+ "Ġsanat": 316,
384
+ "lam": 317,
385
+ "Ġveriden": 318,
386
+ "Makin": 319,
387
+ "Ġanlam": 320,
388
+ "Ġçıkarma": 321,
389
+ "Ġsanatıdır": 322,
390
+ "Makine": 323
391
+ },
392
+ "merges": [
393
+ [
394
+ "Ä",
395
+ "±"
396
+ ],
397
+ [
398
+ "E",
399
+ "O"
400
+ ],
401
+ [
402
+ "EO",
403
+ "S"
404
+ ],
405
+ [
406
+ "a",
407
+ "r"
408
+ ],
409
+ [
410
+ "Ġ",
411
+ "["
412
+ ],
413
+ [
414
+ "e",
415
+ "n"
416
+ ],
417
+ [
418
+ "Ġ",
419
+ "Ã"
420
+ ],
421
+ [
422
+ "Ä",
423
+ "Ł"
424
+ ],
425
+ [
426
+ "i",
427
+ "n"
428
+ ],
429
+ [
430
+ "m",
431
+ "e"
432
+ ],
433
+ [
434
+ "o",
435
+ "d"
436
+ ],
437
+ [
438
+ "e",
439
+ "r"
440
+ ],
441
+ [
442
+ "e",
443
+ "l"
444
+ ],
445
+ [
446
+ "a",
447
+ "n"
448
+ ],
449
+ [
450
+ "i",
451
+ "r"
452
+ ],
453
+ [
454
+ "Ġ",
455
+ "m"
456
+ ],
457
+ [
458
+ "m",
459
+ "a"
460
+ ],
461
+ [
462
+ "s",
463
+ "i"
464
+ ],
465
+ [
466
+ "ĠÃ",
467
+ "§"
468
+ ],
469
+ [
470
+ "Å",
471
+ "Ł"
472
+ ],
473
+ [
474
+ "Ġ",
475
+ "y"
476
+ ],
477
+ [
478
+ "Ġ",
479
+ "i"
480
+ ],
481
+ [
482
+ "ı",
483
+ "r"
484
+ ],
485
+ [
486
+ "Ġ",
487
+ "k"
488
+ ],
489
+ [
490
+ "Ġ",
491
+ "s"
492
+ ],
493
+ [
494
+ "od",
495
+ "el"
496
+ ],
497
+ [
498
+ "Ġ",
499
+ "v"
500
+ ],
501
+ [
502
+ "r",
503
+ "en"
504
+ ],
505
+ [
506
+ "¶",
507
+ "ÄŁ"
508
+ ],
509
+ [
510
+ "ĠÃ",
511
+ "¶ÄŁ"
512
+ ],
513
+ [
514
+ "ĠÃ¶ÄŁ",
515
+ "ren"
516
+ ],
517
+ [
518
+ "o",
519
+ "n"
520
+ ],
521
+ [
522
+ "e",
523
+ "v"
524
+ ],
525
+ [
526
+ "Ġy",
527
+ "a"
528
+ ],
529
+ [
530
+ "ar",
531
+ "a"
532
+ ],
533
+ [
534
+ "o",
535
+ "k"
536
+ ],
537
+ [
538
+ "Ġm",
539
+ "odel"
540
+ ],
541
+ [
542
+ "l",
543
+ "ı"
544
+ ],
545
+ [
546
+ "i",
547
+ "t"
548
+ ],
549
+ [
550
+ "d",
551
+ "ir"
552
+ ],
553
+ [
554
+ "i",
555
+ "l"
556
+ ],
557
+ [
558
+ "Ġk",
559
+ "a"
560
+ ],
561
+ [
562
+ "er",
563
+ "i"
564
+ ],
565
+ [
566
+ "i",
567
+ "m"
568
+ ],
569
+ [
570
+ "ĠÃ¶ÄŁren",
571
+ "me"
572
+ ],
573
+ [
574
+ "e",
575
+ "k"
576
+ ],
577
+ [
578
+ "l",
579
+ "e"
580
+ ],
581
+ [
582
+ "a",
583
+ "s"
584
+ ],
585
+ [
586
+ "y",
587
+ "on"
588
+ ],
589
+ [
590
+ "me",
591
+ "t"
592
+ ],
593
+ [
594
+ "er",
595
+ "in"
596
+ ],
597
+ [
598
+ "h",
599
+ "a"
600
+ ],
601
+ [
602
+ "p",
603
+ "ma"
604
+ ],
605
+ [
606
+ "Ġya",
607
+ "pma"
608
+ ],
609
+ [
610
+ "Ġyapma",
611
+ "k"
612
+ ],
613
+ [
614
+ "a",
615
+ "y"
616
+ ],
617
+ [
618
+ "Ġ",
619
+ "b"
620
+ ],
621
+ [
622
+ "Ġ",
623
+ "p"
624
+ ],
625
+ [
626
+ "i",
627
+ "dir"
628
+ ],
629
+ [
630
+ "ok",
631
+ "en"
632
+ ],
633
+ [
634
+ "d",
635
+ "a"
636
+ ],
637
+ [
638
+ "ÄŁ",
639
+ "it"
640
+ ],
641
+ [
642
+ "Ġ",
643
+ "z"
644
+ ],
645
+ [
646
+ "d",
647
+ "ar"
648
+ ],
649
+ [
650
+ "Ġ",
651
+ "o"
652
+ ],
653
+ [
654
+ "Ġka",
655
+ "dar"
656
+ ],
657
+ [
658
+ "l",
659
+ "a"
660
+ ],
661
+ [
662
+ "Ã",
663
+ "§"
664
+ ],
665
+ [
666
+ "d",
667
+ "ır"
668
+ ],
669
+ [
670
+ "ı",
671
+ "dır"
672
+ ],
673
+ [
674
+ "i",
675
+ "y"
676
+ ],
677
+ [
678
+ "o",
679
+ "r"
680
+ ],
681
+ [
682
+ "ı",
683
+ "n"
684
+ ],
685
+ [
686
+ "Ġv",
687
+ "eri"
688
+ ],
689
+ [
690
+ "Ġ",
691
+ "g"
692
+ ],
693
+ [
694
+ "ĠÃ¶ÄŁrenme",
695
+ "si"
696
+ ],
697
+ [
698
+ "T",
699
+ "oken"
700
+ ],
701
+ [
702
+ "i",
703
+ "z"
704
+ ],
705
+ [
706
+ "l",
707
+ "ara"
708
+ ],
709
+ [
710
+ "m",
711
+ "idir"
712
+ ],
713
+ [
714
+ "n",
715
+ "i"
716
+ ],
717
+ [
718
+ "Ġ",
719
+ "met"
720
+ ],
721
+ [
722
+ "ı",
723
+ "lara"
724
+ ],
725
+ [
726
+ "ir",
727
+ "me"
728
+ ],
729
+ [
730
+ "Ġç",
731
+ "ev"
732
+ ],
733
+ [
734
+ "ÅŁ",
735
+ "le"
736
+ ],
737
+ [
738
+ "Ġi",
739
+ "ÅŁle"
740
+ ],
741
+ [
742
+ "Ġs",
743
+ "ay"
744
+ ],
745
+ [
746
+ "as",
747
+ "yon"
748
+ ],
749
+ [
750
+ "Token",
751
+ "iz"
752
+ ],
753
+ [
754
+ "Ġmet",
755
+ "ni"
756
+ ],
757
+ [
758
+ "Ġçev",
759
+ "irme"
760
+ ],
761
+ [
762
+ "ĠiÅŁle",
763
+ "midir"
764
+ ],
765
+ [
766
+ "Ġsay",
767
+ "ılara"
768
+ ],
769
+ [
770
+ "Tokeniz",
771
+ "asyon"
772
+ ],
773
+ [
774
+ "D",
775
+ "erin"
776
+ ],
777
+ [
778
+ "G",
779
+ "P"
780
+ ],
781
+ [
782
+ "a",
783
+ "lı"
784
+ ],
785
+ [
786
+ "d",
787
+ "e"
788
+ ],
789
+ [
790
+ "h",
791
+ "ı"
792
+ ],
793
+ [
794
+ "l",
795
+ "eri"
796
+ ],
797
+ [
798
+ "z",
799
+ "lı"
800
+ ],
801
+ [
802
+ "z",
803
+ "erin"
804
+ ],
805
+ [
806
+ "¼",
807
+ "zerin"
808
+ ],
809
+ [
810
+ "Ġ",
811
+ "da"
812
+ ],
813
+ [
814
+ "Ġ",
815
+ "GP"
816
+ ],
817
+ [
818
+ "Ġ",
819
+ "hı"
820
+ ],
821
+ [
822
+ "ĠÃ",
823
+ "¼zerin"
824
+ ],
825
+ [
826
+ "Ġç",
827
+ "alı"
828
+ ],
829
+ [
830
+ "ÅŁ",
831
+ "ır"
832
+ ],
833
+ [
834
+ "Ġmodel",
835
+ "leri"
836
+ ],
837
+ [
838
+ "Ġda",
839
+ "ha"
840
+ ],
841
+ [
842
+ "ĠGP",
843
+ "U"
844
+ ],
845
+ [
846
+ "Ġhı",
847
+ "zlı"
848
+ ],
849
+ [
850
+ "Ġüzerin",
851
+ "de"
852
+ ],
853
+ [
854
+ "Ġçalı",
855
+ "ÅŁÄ±r"
856
+ ],
857
+ [
858
+ "P",
859
+ "y"
860
+ ],
861
+ [
862
+ "h",
863
+ "on"
864
+ ],
865
+ [
866
+ "k",
867
+ "l"
868
+ ],
869
+ [
870
+ "t",
871
+ "hon"
872
+ ],
873
+ [
874
+ "od",
875
+ "la"
876
+ ],
877
+ [
878
+ "Ġç",
879
+ "ok"
880
+ ],
881
+ [
882
+ "Ġi",
883
+ "le"
884
+ ],
885
+ [
886
+ "Ġk",
887
+ "odla"
888
+ ],
889
+ [
890
+ "ev",
891
+ "kl"
892
+ ],
893
+ [
894
+ "Ġz",
895
+ "evkl"
896
+ ],
897
+ [
898
+ "Py",
899
+ "thon"
900
+ ],
901
+ [
902
+ "Ġkodla",
903
+ "ma"
904
+ ],
905
+ [
906
+ "Ġzevkl",
907
+ "i"
908
+ ],
909
+ [
910
+ "K",
911
+ "od"
912
+ ],
913
+ [
914
+ "k",
915
+ "en"
916
+ ],
917
+ [
918
+ "m",
919
+ "en"
920
+ ],
921
+ [
922
+ "t",
923
+ "a"
924
+ ],
925
+ [
926
+ "z",
927
+ "ar"
928
+ ],
929
+ [
930
+ "Ġ",
931
+ "ha"
932
+ ],
933
+ [
934
+ "ar",
935
+ "ç"
936
+ ],
937
+ [
938
+ "ĠÃ¶ÄŁren",
939
+ "men"
940
+ ],
941
+ [
942
+ "Ġya",
943
+ "zar"
944
+ ],
945
+ [
946
+ "as",
947
+ "ıdır"
948
+ ],
949
+ [
950
+ "Ġb",
951
+ "ir"
952
+ ],
953
+ [
954
+ "Ġp",
955
+ "arç"
956
+ ],
957
+ [
958
+ "Ġha",
959
+ "ta"
960
+ ],
961
+ [
962
+ "ĠÃ¶ÄŁrenmen",
963
+ "in"
964
+ ],
965
+ [
966
+ "Ġyazar",
967
+ "ken"
968
+ ],
969
+ [
970
+ "Ġparç",
971
+ "asıdır"
972
+ ],
973
+ [
974
+ "B",
975
+ "u"
976
+ ],
977
+ [
978
+ "d",
979
+ "i"
980
+ ],
981
+ [
982
+ "d",
983
+ "an"
984
+ ],
985
+ [
986
+ "e",
987
+ "si"
988
+ ],
989
+ [
990
+ "e",
991
+ "ÅŁ"
992
+ ],
993
+ [
994
+ "e",
995
+ "ÄŁit"
996
+ ],
997
+ [
998
+ "f",
999
+ "ır"
1000
+ ],
1001
+ [
1002
+ "r",
1003
+ "esi"
1004
+ ],
1005
+ [
1006
+ "Ġ",
1007
+ "eÄŁit"
1008
+ ],
1009
+ [
1010
+ "ı",
1011
+ "fır"
1012
+ ],
1013
+ [
1014
+ "Ġm",
1015
+ "il"
1016
+ ],
1017
+ [
1018
+ "Ġs",
1019
+ "ıfır"
1020
+ ],
1021
+ [
1022
+ "Ġv",
1023
+ "e"
1024
+ ],
1025
+ [
1026
+ "Ġv",
1027
+ "ar"
1028
+ ],
1029
+ [
1030
+ "ara",
1031
+ "met"
1032
+ ],
1033
+ [
1034
+ "il",
1035
+ "di"
1036
+ ],
1037
+ [
1038
+ "Ġb",
1039
+ "eÅŁ"
1040
+ ],
1041
+ [
1042
+ "Ġp",
1043
+ "aramet"
1044
+ ],
1045
+ [
1046
+ "ĠeÄŁit",
1047
+ "ildi"
1048
+ ],
1049
+ [
1050
+ "Ġmil",
1051
+ "yon"
1052
+ ],
1053
+ [
1054
+ "Ġsıfır",
1055
+ "dan"
1056
+ ],
1057
+ [
1058
+ "Ġparamet",
1059
+ "resi"
1060
+ ],
1061
+ [
1062
+ "E",
1063
+ "ÄŁit"
1064
+ ],
1065
+ [
1066
+ "l",
1067
+ "u"
1068
+ ],
1069
+ [
1070
+ "l",
1071
+ "it"
1072
+ ],
1073
+ [
1074
+ "n",
1075
+ "e"
1076
+ ],
1077
+ [
1078
+ "s",
1079
+ "e"
1080
+ ],
1081
+ [
1082
+ "y",
1083
+ "i"
1084
+ ],
1085
+ [
1086
+ "Ġ",
1087
+ "ne"
1088
+ ],
1089
+ [
1090
+ "el",
1091
+ "iy"
1092
+ ],
1093
+ [
1094
+ "Ġi",
1095
+ "yi"
1096
+ ],
1097
+ [
1098
+ "Ġka",
1099
+ "lit"
1100
+ ],
1101
+ [
1102
+ "Ġo",
1103
+ "lu"
1104
+ ],
1105
+ [
1106
+ "Ġveri",
1107
+ "si"
1108
+ ],
1109
+ [
1110
+ "EÄŁit",
1111
+ "im"
1112
+ ],
1113
+ [
1114
+ "eliy",
1115
+ "se"
1116
+ ],
1117
+ [
1118
+ "Ġkalit",
1119
+ "eliyse"
1120
+ ],
1121
+ [
1122
+ "Ġolu",
1123
+ "r"
1124
+ ],
1125
+ [
1126
+ "L",
1127
+ "P"
1128
+ ],
1129
+ [
1130
+ "N",
1131
+ "LP"
1132
+ ],
1133
+ [
1134
+ "T",
1135
+ "r"
1136
+ ],
1137
+ [
1138
+ "a",
1139
+ "l"
1140
+ ],
1141
+ [
1142
+ "d",
1143
+ "ev"
1144
+ ],
1145
+ [
1146
+ "f",
1147
+ "or"
1148
+ ],
1149
+ [
1150
+ "i",
1151
+ "si"
1152
+ ],
1153
+ [
1154
+ "r",
1155
+ "s"
1156
+ ],
1157
+ [
1158
+ "r",
1159
+ "im"
1160
+ ],
1161
+ [
1162
+ "s",
1163
+ "for"
1164
+ ],
1165
+ [
1166
+ "t",
1167
+ "t"
1168
+ ],
1169
+ [
1170
+ "Ġ",
1171
+ "NLP"
1172
+ ],
1173
+ [
1174
+ "Ġ",
1175
+ "al"
1176
+ ],
1177
+ [
1178
+ "Ġ",
1179
+ "dev"
1180
+ ],
1181
+ [
1182
+ "ar",
1183
+ "isi"
1184
+ ],
1185
+ [
1186
+ "me",
1187
+ "rs"
1188
+ ],
1189
+ [
1190
+ "an",
1191
+ "ın"
1192
+ ],
1193
+ [
1194
+ "an",
1195
+ "sfor"
1196
+ ],
1197
+ [
1198
+ "Ġm",
1199
+ "im"
1200
+ ],
1201
+ [
1202
+ "Ġy",
1203
+ "ara"
1204
+ ],
1205
+ [
1206
+ "Tr",
1207
+ "ansfor"
1208
+ ],
1209
+ [
1210
+ "tt",
1211
+ "ı"
1212
+ ],
1213
+ [
1214
+ "Ġal",
1215
+ "anın"
1216
+ ],
1217
+ [
1218
+ "Ġdev",
1219
+ "rim"
1220
+ ],
1221
+ [
1222
+ "Ġmim",
1223
+ "arisi"
1224
+ ],
1225
+ [
1226
+ "Ġyara",
1227
+ "ttı"
1228
+ ],
1229
+ [
1230
+ "Transfor",
1231
+ "mers"
1232
+ ],
1233
+ [
1234
+ "Ġalanın",
1235
+ "da"
1236
+ ],
1237
+ [
1238
+ "Y",
1239
+ "a"
1240
+ ],
1241
+ [
1242
+ "c",
1243
+ "e"
1244
+ ],
1245
+ [
1246
+ "e",
1247
+ "ce"
1248
+ ],
1249
+ [
1250
+ "l",
1251
+ "en"
1252
+ ],
1253
+ [
1254
+ "p",
1255
+ "ay"
1256
+ ],
1257
+ [
1258
+ "s",
1259
+ "an"
1260
+ ],
1261
+ [
1262
+ "Ġ",
1263
+ "in"
1264
+ ],
1265
+ [
1266
+ "Ġ",
1267
+ "ÅŁ"
1268
+ ],
1269
+ [
1270
+ "ÄŁ",
1271
+ "in"
1272
+ ],
1273
+ [
1274
+ "ÄŁ",
1275
+ "ın"
1276
+ ],
1277
+ [
1278
+ "el",
1279
+ "ece"
1280
+ ],
1281
+ [
1282
+ "lı",
1283
+ "ģın"
1284
+ ],
1285
+ [
1286
+ "dir",
1287
+ "iy"
1288
+ ],
1289
+ [
1290
+ "il",
1291
+ "len"
1292
+ ],
1293
+ [
1294
+ "ek",
1295
+ "a"
1296
+ ],
1297
+ [
1298
+ "ek",
1299
+ "illen"
1300
+ ],
1301
+ [
1302
+ "Ġz",
1303
+ "eka"
1304
+ ],
1305
+ [
1306
+ "Ġg",
1307
+ "elece"
1308
+ ],
1309
+ [
1310
+ "Ya",
1311
+ "pay"
1312
+ ],
1313
+ [
1314
+ "san",
1315
+ "lıģın"
1316
+ ],
1317
+ [
1318
+ "Ġin",
1319
+ "sanlıģın"
1320
+ ],
1321
+ [
1322
+ "ĠÅŁ",
1323
+ "ekillen"
1324
+ ],
1325
+ [
1326
+ "ÄŁin",
1327
+ "i"
1328
+ ],
1329
+ [
1330
+ "diriy",
1331
+ "or"
1332
+ ],
1333
+ [
1334
+ "Ġgelece",
1335
+ "ÄŁini"
1336
+ ],
1337
+ [
1338
+ "ĠÅŁekillen",
1339
+ "diriyor"
1340
+ ],
1341
+ [
1342
+ "M",
1343
+ "odel"
1344
+ ],
1345
+ [
1346
+ "l",
1347
+ "idir"
1348
+ ],
1349
+ [
1350
+ "s",
1351
+ "ma"
1352
+ ],
1353
+ [
1354
+ "t",
1355
+ "oken"
1356
+ ],
1357
+ [
1358
+ "u",
1359
+ "sma"
1360
+ ],
1361
+ [
1362
+ "y",
1363
+ "ı"
1364
+ ],
1365
+ [
1366
+ "Ġ",
1367
+ "EOS"
1368
+ ],
1369
+ [
1370
+ "Ġ",
1371
+ "token"
1372
+ ],
1373
+ [
1374
+ "er",
1375
+ "ek"
1376
+ ],
1377
+ [
1378
+ "Ġi",
1379
+ "ç"
1380
+ ],
1381
+ [
1382
+ "Ġs",
1383
+ "usma"
1384
+ ],
1385
+ [
1386
+ "Ġg",
1387
+ "erek"
1388
+ ],
1389
+ [
1390
+ "Model",
1391
+ "in"
1392
+ ],
1393
+ [
1394
+ "Ġtoken",
1395
+ "ı"
1396
+ ],
1397
+ [
1398
+ "Ġiç",
1399
+ "in"
1400
+ ],
1401
+ [
1402
+ "Ġsusma",
1403
+ "yı"
1404
+ ],
1405
+ [
1406
+ "Ġgerek",
1407
+ "lidir"
1408
+ ],
1409
+ [
1410
+ "M",
1411
+ "a"
1412
+ ],
1413
+ [
1414
+ "a",
1415
+ "t"
1416
+ ],
1417
+ [
1418
+ "d",
1419
+ "en"
1420
+ ],
1421
+ [
1422
+ "k",
1423
+ "ar"
1424
+ ],
1425
+ [
1426
+ "k",
1427
+ "in"
1428
+ ],
1429
+ [
1430
+ "Ġ",
1431
+ "an"
1432
+ ],
1433
+ [
1434
+ "ı",
1435
+ "kar"
1436
+ ],
1437
+ [
1438
+ "an",
1439
+ "at"
1440
+ ],
1441
+ [
1442
+ "Ġç",
1443
+ "ıkar"
1444
+ ],
1445
+ [
1446
+ "Ġs",
1447
+ "anat"
1448
+ ],
1449
+ [
1450
+ "la",
1451
+ "m"
1452
+ ],
1453
+ [
1454
+ "Ġveri",
1455
+ "den"
1456
+ ],
1457
+ [
1458
+ "Ma",
1459
+ "kin"
1460
+ ],
1461
+ [
1462
+ "Ġan",
1463
+ "lam"
1464
+ ],
1465
+ [
1466
+ "Ġçıkar",
1467
+ "ma"
1468
+ ],
1469
+ [
1470
+ "Ġsanat",
1471
+ "ıdır"
1472
+ ],
1473
+ [
1474
+ "Makin",
1475
+ "e"
1476
+ ]
1477
+ ]
1478
+ }
1479
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[UNK]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[PAD]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[BOS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[EOS]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ }
35
+ },
36
+ "clean_up_tokenization_spaces": false,
37
+ "extra_special_tokens": {},
38
+ "model_max_length": 1000000000000000019884624838656,
39
+ "tokenizer_class": "PreTrainedTokenizerFast"
40
+ }