KikKoh commited on
Commit
e977247
·
verified ·
1 Parent(s): 791dcac

Upload 4 files

Browse files
input_tokenizer/tokenizer.json CHANGED
@@ -31,7 +31,7 @@
31
  "special": true
32
  },
33
  {
34
- "id": 98,
35
  "content": "<pad>",
36
  "single_word": false,
37
  "lstrip": false,
@@ -69,383 +69,203 @@
69
  ],
70
  [
71
  " ",
72
- -2.0491303237853096
73
- ],
74
- [
75
- "-",
76
- -2.438041258277112
77
  ],
78
  [
79
  "n",
80
- -2.501524477266583
81
  ],
82
  [
83
  "t",
84
- -2.638661273777304
85
  ],
86
  [
87
- "h",
88
- -2.692965388541229
89
  ],
90
  [
91
- "i",
92
- -2.694754512457008
93
  ],
94
  [
95
  "s",
96
- -2.8923410887869334
97
  ],
98
  [
99
  "k",
100
- -3.1970415940597174
101
  ],
102
  [
103
  "u",
104
- -3.2651700304177123
105
  ],
106
  [
107
  "g",
108
- -3.4248376939813774
109
  ],
110
  [
111
  "a",
112
- -3.4963570859315585
113
  ],
114
  [
115
- "á",
116
- -3.9240834819779824
117
  ],
118
  [
119
- "o",
120
- -3.975795370165889
121
  ],
122
  [
123
- "l",
124
- -4.007994364937257
125
  ],
126
  [
127
  "̍",
128
- -4.170577777562061
129
- ],
130
- [
131
- ".",
132
- -4.189869429609686
133
  ],
134
  [
135
  "â",
136
- -4.25267258530512
137
  ],
138
  [
139
  "ā",
140
- -4.298943240036779
141
  ],
142
  [
143
  "p",
144
- -4.3723955360015605
145
- ],
146
- [
147
- "à",
148
- -4.461337584827133
149
  ],
150
  [
151
  "b",
152
- -4.466076323196452
153
  ],
154
  [
155
- ",",
156
- -4.499891555448983
157
  ],
158
  [
159
- "í",
160
- -4.578458027626528
161
  ],
162
  [
163
- "m",
164
- -4.5786016851469995
165
  ],
166
  [
167
  "e",
168
- -4.749594791761277
169
  ],
170
  [
171
  "ó",
172
- -4.759358486978165
173
  ],
174
  [
175
  "ī",
176
- -4.845970910394813
177
  ],
178
  [
179
  "ì",
180
- -4.851051839639437
181
  ],
182
  [
183
  "ê",
184
- -4.877819521265044
185
  ],
186
  [
187
  "ē",
188
- -4.927070017931555
189
  ],
190
  [
191
  "ō",
192
- -5.013998122557426
193
  ],
194
  [
195
  "î",
196
- -5.099728488704239
197
  ],
198
  [
199
  "ū",
200
- -5.347765015858414
201
  ],
202
  [
203
  "ô",
204
- -5.365594702921749
205
- ],
206
- [
207
- "T",
208
- -5.418076380746381
209
  ],
210
  [
211
  "ò",
212
- -5.444021231995655
213
- ],
214
- [
215
- "I",
216
- -5.644843649981011
217
  ],
218
  [
219
  "ú",
220
- -5.656596146044344
221
  ],
222
  [
223
  "̄",
224
- -5.816993865454794
225
  ],
226
  [
227
  "è",
228
- -5.825455837126889
229
- ],
230
- [
231
- "L",
232
- -5.997384198304952
233
  ],
234
  [
235
  "j",
236
- -6.023851907952154
237
  ],
238
  [
239
  "é",
240
- -6.0893458766159565
241
  ],
242
  [
243
  "û",
244
- -6.36828010754205
245
  ],
246
  [
247
  "ù",
248
- -6.395308778217399
249
- ],
250
- [
251
- "K",
252
- -6.453533494168924
253
- ],
254
- [
255
- "G",
256
- -6.49075152735316
257
- ],
258
- [
259
- "?",
260
- -6.808536997604716
261
- ],
262
- [
263
- "H",
264
- -6.961241358131142
265
- ],
266
- [
267
- "S",
268
- -7.013969214753503
269
- ],
270
- [
271
- "”",
272
- -7.060997180654681
273
- ],
274
- [
275
- "“",
276
- -7.060997180654681
277
- ],
278
- [
279
- "!",
280
- -7.1527881697284315
281
  ],
282
  [
283
  "̂",
284
- -7.164162696539445
285
  ],
286
  [
287
  "ǹ",
288
- -7.416567092235995
289
- ],
290
- [
291
- "B",
292
- -7.561557318255249
293
  ],
294
  [
295
  "ń",
296
- -7.616965035855269
297
- ],
298
- [
299
- "P",
300
- -7.724493393771001
301
- ],
302
- [
303
- "A",
304
- -7.790034021304488
305
- ],
306
- [
307
- "M",
308
- -8.091475968442492
309
- ],
310
- [
311
- "N",
312
- -8.58636336389603
313
- ],
314
- [
315
- "Ū",
316
- -8.677334622535016
317
- ],
318
- [
319
- "J",
320
- -8.836547433198561
321
- ],
322
- [
323
- "U",
324
- -9.428393546992902
325
- ],
326
- [
327
- "À",
328
- -9.976578745300651
329
- ],
330
- [
331
- "O",
332
- -10.076652901770805
333
- ],
334
- [
335
- "Í",
336
- -10.112367187485088
337
- ],
338
- [
339
- ";",
340
- -10.187865762983664
341
- ],
342
- [
343
- "E",
344
- -10.227865762983663
345
- ],
346
- [
347
- "Ī",
348
- -10.45608428359349
349
- ],
350
- [
351
- "Â",
352
- -10.623094947508182
353
- ],
354
- [
355
- "Ē",
356
- -10.685594947508188
357
- ],
358
- [
359
- "Ô",
360
- -10.685594947508188
361
- ],
362
- [
363
- "Á",
364
- -10.752261614174865
365
- ],
366
- [
367
- "Ā",
368
- -10.752261614174865
369
- ],
370
- [
371
- "─",
372
- -10.98394659586001
373
  ],
374
  [
375
  "ḿ",
376
- -11.920490607456603
377
- ],
378
- [
379
- "Î",
380
- -11.920490607456603
381
- ],
382
- [
383
- "…",
384
- -12.170490607456603
385
- ],
386
- [
387
- "‘",
388
- -12.170490607456603
389
- ],
390
- [
391
- "’",
392
- -12.170490607456603
393
- ],
394
- [
395
- "Ì",
396
- -12.503823940789935
397
  ],
398
  [
399
  "0",
400
- -12.503823940789935
401
  ],
402
  [
403
- "Ê",
404
- -12.503823940789935
405
  ],
406
  [
407
  "2",
408
- -13.003823940789935
409
- ],
410
- [
411
- "Ó",
412
- -13.003823940789935
413
- ],
414
- [
415
- "̋",
416
- -13.003823940789935
417
  ],
418
  [
419
  "9",
420
- -13.003823940789935
421
  ],
422
  [
423
- "Ǹ",
424
- -13.003823940789935
425
- ],
426
- [
427
- "1",
428
- -13.003823940789935
429
  ],
430
  [
431
  "4",
432
- -14.003823940789935
433
- ],
434
- [
435
- " ",
436
- -14.003823940789935
437
- ],
438
- [
439
- "Ō",
440
- -14.003823940789935
441
  ],
442
  [
443
  "3",
444
- -14.003823940789935
445
- ],
446
- [
447
- "Ú",
448
- -14.003823940789935
449
  ]
450
  ],
451
  "byte_fallback": false
 
31
  "special": true
32
  },
33
  {
34
+ "id": 53,
35
  "content": "<pad>",
36
  "single_word": false,
37
  "lstrip": false,
 
69
  ],
70
  [
71
  " ",
72
+ -1.5286495954173631
 
 
 
 
73
  ],
74
  [
75
  "n",
76
+ -2.4611217054515855
77
  ],
78
  [
79
  "t",
80
+ -2.540307941605734
81
  ],
82
  [
83
+ "i",
84
+ -2.605613338169867
85
  ],
86
  [
87
+ "h",
88
+ -2.640926653183934
89
  ],
90
  [
91
  "s",
92
+ -2.838120763773432
93
  ],
94
  [
95
  "k",
96
+ -3.1211051609419993
97
  ],
98
  [
99
  "u",
100
+ -3.2249282875507816
101
  ],
102
  [
103
  "g",
104
+ -3.3411392475472077
105
  ],
106
  [
107
  "a",
108
+ -3.444651584822539
109
  ],
110
  [
111
+ "l",
112
+ -3.841640698090009
113
  ],
114
  [
115
+ "á",
116
+ -3.8848449071201774
117
  ],
118
  [
119
+ "o",
120
+ -3.935398567362226
121
  ],
122
  [
123
  "̍",
124
+ -4.132458522274371
 
 
 
 
125
  ],
126
  [
127
  "â",
128
+ -4.212791854068779
129
  ],
130
  [
131
  "ā",
132
+ -4.259196026935101
133
  ],
134
  [
135
  "p",
136
+ -4.299807936667646
 
 
 
 
137
  ],
138
  [
139
  "b",
140
+ -4.3836364827691074
141
  ],
142
  [
143
+ "à",
144
+ -4.419137992356868
145
  ],
146
  [
147
+ "m",
148
+ -4.511037313050792
149
  ],
150
  [
151
+ "í",
152
+ -4.53632471778387
153
  ],
154
  [
155
  "e",
156
+ -4.707223106194123
157
  ],
158
  [
159
  "ó",
160
+ -4.720895027230478
161
  ],
162
  [
163
  "ī",
164
+ -4.804104578134297
165
  ],
166
  [
167
  "ì",
168
+ -4.812366760121055
169
  ],
170
  [
171
  "ê",
172
+ -4.839119095873702
173
  ],
174
  [
175
  "ē",
176
+ -4.885699057727079
177
  ],
178
  [
179
  "ō",
180
+ -4.97565684240295
181
  ],
182
  [
183
  "î",
184
+ -5.060400335789719
185
  ],
186
  [
187
  "ū",
188
+ -5.274313929865132
189
  ],
190
  [
191
  "ô",
192
+ -5.322438446627812
 
 
 
 
193
  ],
194
  [
195
  "ò",
196
+ -5.405901976707965
 
 
 
 
197
  ],
198
  [
199
  "ú",
200
+ -5.618054771719086
201
  ],
202
  [
203
  "̄",
204
+ -5.7788746101671045
205
  ],
206
  [
207
  "è",
208
+ -5.7873365818392
 
 
 
 
209
  ],
210
  [
211
  "j",
212
+ -5.927136012185764
213
  ],
214
  [
215
  "é",
216
+ -6.051226621328267
217
  ],
218
  [
219
  "û",
220
+ -6.330160852254361
221
  ],
222
  [
223
  "ù",
224
+ -6.357189522929709
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  ],
226
  [
227
  "̂",
228
+ -7.126043441251755
229
  ],
230
  [
231
  "ǹ",
232
+ -7.373551868781087
 
 
 
 
233
  ],
234
  [
235
  "ń",
236
+ -7.57884578056758
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
  ],
238
  [
239
  "ḿ",
240
+ -11.882371352168914
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  ],
242
  [
243
  "0",
244
+ -12.465704685502246
245
  ],
246
  [
247
+ "1",
248
+ -12.965704685502246
249
  ],
250
  [
251
  "2",
252
+ -12.965704685502246
 
 
 
 
 
 
 
 
253
  ],
254
  [
255
  "9",
256
+ -12.965704685502246
257
  ],
258
  [
259
+ "̋",
260
+ -12.965704685502246
 
 
 
 
261
  ],
262
  [
263
  "4",
264
+ -13.965704685502246
 
 
 
 
 
 
 
 
265
  ],
266
  [
267
  "3",
268
+ -13.965704685502246
 
 
 
 
269
  ]
270
  ],
271
  "byte_fallback": false
input_tokenizer/tokenizer_config.json CHANGED
@@ -24,7 +24,7 @@
24
  "single_word": false,
25
  "special": true
26
  },
27
- "98": {
28
  "content": "<pad>",
29
  "lstrip": false,
30
  "normalized": false,
 
24
  "single_word": false,
25
  "special": true
26
  },
27
+ "53": {
28
  "content": "<pad>",
29
  "lstrip": false,
30
  "normalized": false,