parislo commited on
Commit
b5d2f99
·
verified ·
1 Parent(s): a70bb1f

Upload tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer.json +210 -170
tokenizer.json CHANGED
@@ -289,181 +289,223 @@
289
  "ł": 254,
290
  "Ń": 255,
291
  "th": 256,
292
- "the": 257,
293
- "Ġthe": 258,
294
- "Ġi": 259,
295
- "Ġa": 260,
296
- "ĠÎ": 261,
297
- "en": 262,
298
- "re": 263,
299
- "Ġo": 264,
300
- "si": 265,
301
- "Ġis": 266,
302
- "al": 267,
303
- "ri": 268,
304
- "at": 269,
305
- "es": 270,
306
- "le": 271,
307
- "on": 272,
308
- "ν": 273,
309
- "Ïģ": 274,
310
- "Ġf": 275,
311
- "ĠÏ": 276,
312
- "Ġof": 277,
313
- "nd": 278,
314
- "Ġ2": 279,
315
- "ÏģÎ": 280,
316
- "an": 281,
317
- "he": 282,
318
- "Ġb": 283,
319
- "Ġc": 284,
320
- "Ġe": 285,
321
- "Ġs": 286,
322
- "Ġt": 287,
323
- "Eu": 288,
324
- "ion": 289,
325
- "la": 290,
326
- "mu": 291,
327
- "om": 292,
328
- "or": 293,
329
- "ore": 294,
330
- "se": 295,
331
- "ten": 296,
332
- "ε": 297,
333
- "ο": 298,
334
- "ĠT": 299,
335
- "Ġsi": 300,
336
- "ĠEu": 301,
337
- "Ġand": 302,
338
- "Ġfu": 303,
339
- "ĠÏĦ": 304,
340
- "mula": 305,
341
- "ormula": 306,
342
- "Ġsid": 307,
343
- "'s": 308,
344
- "ag": 309,
345
- "et": 310,
346
- "hy": 311,
347
- "po": 312,
348
- "qu": 313,
349
- "use": 314,
350
- "¹Ï": 315,
351
- "ί": 316,
352
- "α": 317,
353
- "η": 318,
354
- "ÏĤ": 319,
355
- "Ïħ": 320,
356
- "Ġ+": 321,
357
- "Ġ-": 322,
358
- "Ġ=": 323,
359
- "Ġl": 324,
360
- "Ġth": 325,
361
- "Ġre": 326,
362
- "İν": 327,
363
- "ther": 328,
364
- "Ġin": 329,
365
- "Ġγ": 330,
366
- "eng": 331,
367
- "ent": 332,
368
- "Ġother": 333,
369
- "rig": 334,
370
- "ler": 335,
371
- "Ġformula": 336,
372
- "ĠThe": 337,
373
- "ĠEuler": 338,
374
- "Ġsides": 339,
375
- "It": 340,
376
- "Py": 341,
377
- "am": 342,
378
- "are": 343,
379
- "ct": 344,
380
- "gle": 345,
381
- "hi": 346,
382
- "ht": 347,
383
- "in": 348,
384
- "li": 349,
385
- "lat": 350,
386
- "nct": 351,
387
- "ple": 352,
388
- "ry": 353,
389
- "um": 354,
390
- "wo": 355,
391
- "whe": 356,
392
- "³Ï": 357,
393
- "γÏ": 358,
394
- "ÏĢ": 359,
395
- "ÏĦ": 360,
396
- "Ġg": 361,
397
- "Ġn": 362,
398
- "Ġhy": 363,
399
- "Ġrig": 364,
400
- "ĠIt": 365,
401
- "ĠPy": 366,
402
- "ĥη": 367,
403
- "īν": 368,
404
- "thag": 369,
405
- "Ġan": 370,
406
- "Ġα": 371,
407
- "Ġβ": 372,
408
- "Ġμ": 373,
409
- "ndam": 374,
410
- "ÏģιÏ": 375,
411
- "Ġcom": 376,
412
- "Ġex": 377,
413
- "Ġsqu": 378,
414
- "Ġtwo": 379,
415
- "ions": 380,
416
- "omet": 381,
417
- "orem": 382,
418
- "orean": 383,
419
- "tenuse": 384,
420
- "οÏħ": 385,
421
- "Ġfunct": 386,
422
- "Ġfundam": 387,
423
- "potenuse": 388,
424
- "Ġleng": 389,
425
- "Ġthat": 390,
426
- "Ġrelat": 391,
427
- "ental": 392,
428
- "plex": 393,
429
- "where": 394,
430
- "Ġhypotenuse": 395,
431
- "Ġright": 396,
432
- "ĠPythag": 397,
433
- "Ġcomplex": 398,
434
- "Ġsquare": 399,
435
- "Ġfundamental": 400,
436
- "Ġlength": 401,
437
- "ĠPythagorean": 402
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
438
  },
439
  "merges": [
440
  "t h",
 
 
441
  "th e",
 
 
442
  "Ġ the",
 
443
  "Ġ i",
 
444
  "Ġ a",
445
- "Ġ Î",
446
  "e n",
447
  "r e",
 
 
448
  "Ġ o",
 
 
449
  "s i",
 
450
  "Ġi s",
451
  "a l",
452
  "r i",
 
 
453
  "a t",
454
  "e s",
455
  "l e",
456
  "o n",
457
- "Î ½",
458
- "Ï ģ",
 
459
  "Ġ f",
460
- Ï",
461
  "Ġo f",
 
462
  "n d",
 
 
463
  "Ġ 2",
464
- "Ïģ Î",
 
 
 
 
465
  "a n",
466
  "h e",
 
467
  "Ġ b",
468
  "Ġ c",
469
  "Ġ e",
@@ -478,14 +520,12 @@
478
  "o re",
479
  "s e",
480
  "t en",
481
- µ",
482
- "Î ¿",
483
  "Ġ T",
484
  "Ġ si",
485
  "Ġ Eu",
486
  "Ġa nd",
487
  "Ġf u",
488
- "ĠÏ Ħ",
489
  "mu la",
490
  "or mula",
491
  "Ġsi d",
@@ -496,26 +536,25 @@
496
  "p o",
497
  "q u",
498
  "u se",
499
- "¹ Ï",
500
- "Î ¯",
501
- "Î ±",
502
- "Î ·",
503
  "Ï Ĥ",
504
- "Ï ħ",
505
  "Ġ +",
506
  "Ġ -",
507
  "Ġ =",
508
  "Ġ l",
509
  "Ġ th",
 
510
  "Ġ re",
511
- "İ Î½",
512
  "the r",
 
513
  "Ġi n",
514
- "ĠÎ ³",
515
  "en g",
516
  "en t",
 
517
  "Ġo ther",
518
  "ri g",
 
519
  "le r",
520
  "Ġf ormula",
521
  "ĠT he",
@@ -538,25 +577,23 @@
538
  "u m",
539
  "w o",
540
  "w he",
541
- "³ Ï",
542
- "Î ³Ï",
543
- "Ï Ģ",
544
- "Ï Ħ",
545
  "Ġ g",
546
  "Ġ n",
547
  "Ġ hy",
548
  "Ġ rig",
549
  "Ġ It",
550
  "Ġ Py",
551
- "ĥ η",
552
- "ī ν",
553
  "th ag",
 
554
  "Ġa n",
555
  "ĠÎ ±",
556
  "ĠÎ ²",
557
- "ĠÎ ¼",
 
558
  "nd am",
559
- "ÏģÎ ¹Ï",
560
  "Ġc om",
561
  "Ġe x",
562
  "Ġs qu",
@@ -566,13 +603,16 @@
566
  "ore m",
567
  "ore an",
568
  "ten use",
569
- "ο Ïħ",
570
  "Ġfu nct",
571
  "Ġfu ndam",
572
  "po tenuse",
 
573
  "Ġl eng",
574
  "Ġth at",
 
575
  "Ġre lat",
 
 
576
  "ent al",
577
  "ple x",
578
  "whe re",
 
289
  "ł": 254,
290
  "Ń": 255,
291
  "th": 256,
292
+ "ν": 257,
293
+ "ÏĦ": 258,
294
+ "the": 259,
295
+ "Ïģ": 260,
296
+ "ι": 261,
297
+ "Ġthe": 262,
298
+ "ε": 263,
299
+ "Ġi": 264,
300
+ "Ïģι": 265,
301
+ "Ġa": 266,
302
+ "ÏĦÏģι": 267,
303
+ "en": 268,
304
+ "re": 269,
305
+ "ĠÎ": 270,
306
+ "ÏĦÏģιÎ": 271,
307
+ "Ġo": 272,
308
+ "νÎ": 273,
309
+ "Ïī": 274,
310
+ "si": 275,
311
+ "Ïİ": 276,
312
+ "Ġis": 277,
313
+ "al": 278,
314
+ "ri": 279,
315
+ "Ïĥ": 280,
316
+ "Ïİν": 281,
317
+ "at": 282,
318
+ "es": 283,
319
+ "le": 284,
320
+ "on": 285,
321
+ "³Ïī": 286,
322
+ "¼Îµ": 287,
323
+ "¿Î": 288,
324
+ "Ġf": 289,
325
+ "ĠÏĦÏģιÎ": 290,
326
+ "Ġof": 291,
327
+ "³ÏīνÎ": 292,
328
+ "nd": 293,
329
+ "ºÏİν": 294,
330
+ "η": 295,
331
+ "Ġ2": 296,
332
+ "ÏĦÏģικÏİν": 297,
333
+ "¼ÎµÏĦÏģικÏİν": 298,
334
+ "¿Î¼ÎµÏĦÏģικÏİν": 299,
335
+ "ĠÏĦÏģιγÏīνÎ": 300,
336
+ "ĠÏĦÏģιγÏīνομεÏĦÏģικÏİν": 301,
337
+ "an": 302,
338
+ "he": 303,
339
+ "Ïħ": 304,
340
+ "Ġb": 305,
341
+ "Ġc": 306,
342
+ "Ġe": 307,
343
+ "Ġs": 308,
344
+ "Ġt": 309,
345
+ "Eu": 310,
346
+ "ion": 311,
347
+ "la": 312,
348
+ "mu": 313,
349
+ "om": 314,
350
+ "or": 315,
351
+ "ore": 316,
352
+ "se": 317,
353
+ "ten": 318,
354
+ "α": 319,
355
+ "ĠT": 320,
356
+ "Ġsi": 321,
357
+ "ĠEu": 322,
358
+ "Ġand": 323,
359
+ "Ġfu": 324,
360
+ "mula": 325,
361
+ "ormula": 326,
362
+ "Ġsid": 327,
363
+ "'s": 328,
364
+ "ag": 329,
365
+ "et": 330,
366
+ "hy": 331,
367
+ "po": 332,
368
+ "qu": 333,
369
+ "use": 334,
370
+ "ÏĢ": 335,
371
+ "ÏĤ": 336,
372
+ "Ġ+": 337,
373
+ "Ġ-": 338,
374
+ "Ġ=": 339,
375
+ "Ġl": 340,
376
+ "Ġth": 341,
377
+ "ĠÏĦ": 342,
378
+ "Ġre": 343,
379
+ "ĠÏĥ": 344,
380
+ "ther": 345,
381
+ "εÎ": 346,
382
+ "Ġin": 347,
383
+ "eng": 348,
384
+ "ent": 349,
385
+ "Ġγ": 350,
386
+ "Ġother": 351,
387
+ "rig": 352,
388
+ "Ïĥη": 353,
389
+ "ler": 354,
390
+ "Ġformula": 355,
391
+ "ĠThe": 356,
392
+ "ĠEuler": 357,
393
+ "Ġsides": 358,
394
+ "It": 359,
395
+ "Py": 360,
396
+ "am": 361,
397
+ "are": 362,
398
+ "ct": 363,
399
+ "gle": 364,
400
+ "hi": 365,
401
+ "ht": 366,
402
+ "in": 367,
403
+ "li": 368,
404
+ "lat": 369,
405
+ "nct": 370,
406
+ "ple": 371,
407
+ "ry": 372,
408
+ "um": 373,
409
+ "wo": 374,
410
+ "whe": 375,
411
+ "ºÎ": 376,
412
+ "¿Ïģ": 377,
413
+ "¿Ïħ": 378,
414
+ "Ġg": 379,
415
+ "Ġn": 380,
416
+ "Ġhy": 381,
417
+ "Ġrig": 382,
418
+ "ĠIt": 383,
419
+ "ĠPy": 384,
420
+ "thag": 385,
421
+ "ÏĦÎ": 386,
422
+ "Ġan": 387,
423
+ "Ġα": 388,
424
+ "Ġβ": 389,
425
+ "να": 390,
426
+ "Ïīν": 391,
427
+ "ndam": 392,
428
+ "Ġcom": 393,
429
+ "Ġex": 394,
430
+ "Ġsqu": 395,
431
+ "Ġtwo": 396,
432
+ "ions": 397,
433
+ "omet": 398,
434
+ "orem": 399,
435
+ "orean": 400,
436
+ "tenuse": 401,
437
+ "Ġfunct": 402,
438
+ "Ġfundam": 403,
439
+ "potenuse": 404,
440
+ "ÏĢÎ": 405,
441
+ "Ġleng": 406,
442
+ "Ġthat": 407,
443
+ "ĠÏĦη": 408,
444
+ "Ġrelat": 409,
445
+ "ĠÏĥÏħ": 410,
446
+ "εί": 411,
447
+ "ental": 412,
448
+ "plex": 413,
449
+ "where": 414,
450
+ "Ġhypotenuse": 415,
451
+ "Ġright": 416,
452
+ "ĠPythag": 417,
453
+ "Ġcomplex": 418,
454
+ "Ġsquare": 419,
455
+ "Ġfundamental": 420,
456
+ "Ġlength": 421,
457
+ "ĠPythagorean": 422
458
  },
459
  "merges": [
460
  "t h",
461
+ "Î ½",
462
+ "Ï Ħ",
463
  "th e",
464
+ "Ï ģ",
465
+ "Î ¹",
466
  "Ġ the",
467
+ "Î µ",
468
  "Ġ i",
469
+ "Ïģ ι",
470
  "Ġ a",
471
+ "ÏĦ Ïģι",
472
  "e n",
473
  "r e",
474
+ "Ġ Î",
475
+ "ÏĦÏģι Î",
476
  "Ġ o",
477
+ "ν Î",
478
+ "Ï ī",
479
  "s i",
480
+ "Ï İ",
481
  "Ġi s",
482
  "a l",
483
  "r i",
484
+ "Ï ĥ",
485
+ "Ïİ Î½",
486
  "a t",
487
  "e s",
488
  "l e",
489
  "o n",
490
+ "³ Ïī",
491
+ "¼ ε",
492
+ "¿ Î",
493
  "Ġ f",
494
+ ÏĦÏģιÎ",
495
  "Ġo f",
496
+ "³Ïī νÎ",
497
  "n d",
498
+ "º Ïİν",
499
+ "Î ·",
500
  "Ġ 2",
501
+ "ÏĦÏģιΠºÏİν",
502
+ "¼Îµ ÏĦÏģικÏİν",
503
+ "¿Î ¼ÎµÏĦÏģικÏİν",
504
+ "ĠÏĦÏģιΠ³ÏīνÎ",
505
+ "ĠÏĦÏģιγÏīνΠ¿Î¼ÎµÏĦÏģικÏİν",
506
  "a n",
507
  "h e",
508
+ "Ï ħ",
509
  "Ġ b",
510
  "Ġ c",
511
  "Ġ e",
 
520
  "o re",
521
  "s e",
522
  "t en",
523
+ ±",
 
524
  "Ġ T",
525
  "Ġ si",
526
  "Ġ Eu",
527
  "Ġa nd",
528
  "Ġf u",
 
529
  "mu la",
530
  "or mula",
531
  "Ġsi d",
 
536
  "p o",
537
  "q u",
538
  "u se",
539
+ "Ï Ģ",
 
 
 
540
  "Ï Ĥ",
 
541
  "Ġ +",
542
  "Ġ -",
543
  "Ġ =",
544
  "Ġ l",
545
  "Ġ th",
546
+ "Ġ ÏĦ",
547
  "Ġ re",
548
+ "Ġ Ïĥ",
549
  "the r",
550
+ "ε Î",
551
  "Ġi n",
 
552
  "en g",
553
  "en t",
554
+ "ĠÎ ³",
555
  "Ġo ther",
556
  "ri g",
557
+ "Ïĥ η",
558
  "le r",
559
  "Ġf ormula",
560
  "ĠT he",
 
577
  "u m",
578
  "w o",
579
  "w he",
580
+ "º Î",
581
+ "¿ Ïģ",
582
+ "¿ Ïħ",
 
583
  "Ġ g",
584
  "Ġ n",
585
  "Ġ hy",
586
  "Ġ rig",
587
  "Ġ It",
588
  "Ġ Py",
 
 
589
  "th ag",
590
+ "ÏĦ Î",
591
  "Ġa n",
592
  "ĠÎ ±",
593
  "ĠÎ ²",
594
+ "νΠ±",
595
+ "Ïī ν",
596
  "nd am",
 
597
  "Ġc om",
598
  "Ġe x",
599
  "Ġs qu",
 
603
  "ore m",
604
  "ore an",
605
  "ten use",
 
606
  "Ġfu nct",
607
  "Ġfu ndam",
608
  "po tenuse",
609
+ "ÏĢ Î",
610
  "Ġl eng",
611
  "Ġth at",
612
+ "ĠÏĦ η",
613
  "Ġre lat",
614
+ "ĠÏĥ Ïħ",
615
+ "εΠ¯",
616
  "ent al",
617
  "ple x",
618
  "whe re",