Mauricio-100 commited on
Commit
4d2f644
·
verified ·
1 Parent(s): 5bfca97

Upload tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer.json +626 -369
tokenizer.json CHANGED
@@ -70,225 +70,282 @@
70
  "<pad>": 2,
71
  "<unk>": 3,
72
  "<mask>": 4,
73
- ".": 5,
74
- "A": 6,
75
- "B": 7,
76
- "E": 8,
77
- "F": 9,
78
- "I": 10,
79
- "L": 11,
80
- "M": 12,
81
- "O": 13,
82
- "R": 14,
83
- "S": 15,
84
- "T": 16,
85
- "a": 17,
86
- "c": 18,
87
- "d": 19,
88
- "e": 20,
89
- "f": 21,
90
- "g": 22,
91
- "h": 23,
92
- "i": 24,
93
- "j": 25,
94
- "l": 26,
95
- "m": 27,
96
- "n": 28,
97
- "o": 29,
98
- "p": 30,
99
- "r": 31,
100
- "s": 32,
101
- "t": 33,
102
- "u": 34,
103
- "v": 35,
104
- "x": 36,
105
- "y": 37,
106
- "è": 38,
107
- "in": 39,
108
- "de": 40,
109
- "an": 41,
110
- "en": 42,
111
- "ra": 43,
112
- "te": 44,
113
- "ing": 45,
114
- "ce": 46,
115
- "ch": 47,
116
- "is": 48,
117
- "le": 49,
118
- "ode": 50,
119
- "ro": 51,
120
- "ti": 52,
121
- "ar": 53,
122
- "lo": 54,
123
- "or": 55,
124
- "pro": 56,
125
- "tu": 57,
126
- "ua": 58,
127
- "OR": 59,
128
- "atu": 60,
129
- "ci": 61,
130
- "ge": 62,
131
- "gen": 63,
132
- "gua": 64,
133
- "lan": 65,
134
- "men": 66,
135
- "mode": 67,
136
- "no": 68,
137
- "ning": 69,
138
- "natu": 70,
139
- "on": 71,
140
- "rs": 72,
141
- "ss": 73,
142
- "un": 74,
143
- "and": 75,
144
- "ral": 76,
145
- "ish": 77,
146
- "lear": 78,
147
- "tion": 79,
148
- "proce": 80,
149
- "ORB": 81,
150
- "ment": 82,
151
- "natural": 83,
152
- "learning": 84,
153
- "AI": 85,
154
- "El": 86,
155
- "En": 87,
156
- "Fr": 88,
157
- "Le": 89,
158
- "Mode": 90,
159
- "Sp": 91,
160
- "Th": 92,
161
- "al": 93,
162
- "ach": 94,
163
- "age": 95,
164
- "ass": 96,
165
- "ation": 97,
166
- "cl": 98,
167
- "code": 99,
168
- "ction": 100,
169
- "du": 101,
170
- "": 102,
171
- "ep": 103,
172
- "es": 104,
173
- "era": 105,
174
- "elo": 106,
175
- "etu": 107,
176
- "ers": 108,
177
- "fi": 109,
178
- "fro": 110,
179
- "for": 111,
180
- "fun": 112,
181
- "gl": 113,
182
- "gy": 114,
183
- "gra": 115,
184
- "gage": 116,
185
- "im": 117,
186
- "it": 118,
187
- "ite": 119,
188
- "igen": 120,
189
- "je": 121,
190
- "ll": 122,
191
- "len": 123,
192
- "ling": 124,
193
- "lti": 125,
194
- "mm": 126,
195
- "mo": 127,
196
- "mu": 128,
197
- "mach": 129,
198
- "mers": 130,
199
- "ou": 131,
200
- "pu": 132,
201
- "por": 133,
202
- "pment": 134,
203
- "pou": 135,
204
- "rn": 136,
205
- "ran": 137,
206
- "retu": 138,
207
- "sa": 139,
208
- "san": 140,
209
- "sci": 141,
210
- "sfor": 142,
211
- "tra": 143,
212
- "tor": 144,
213
- "tand": 145,
214
- "tran": 146,
215
- "vation": 147,
216
- "velo": 148,
217
- "xt": 149,
218
- "ine": 150,
219
- "inte": 151,
220
- "inno": 152,
221
- "init": 153,
222
- "def": 154,
223
- "ders": 155,
224
- "deep": 156,
225
- "develo": 157,
226
- "anish": 158,
227
- "ence": 159,
228
- "ench": 160,
229
- "tes": 161,
230
- "tech": 162,
231
- "text": 163,
232
- "issan": 164,
233
- "tifi": 165,
234
- "artifi": 166,
235
- "logy": 167,
236
- "progra": 168,
237
- "ual": 169,
238
- "cial": 170,
239
- "genera": 171,
240
- "guage": 172,
241
- "guaje": 173,
242
- "langage": 174,
243
- "language": 175,
244
- "model": 176,
245
- "modelo": 177,
246
- "nology": 178,
247
- "ssing": 179,
248
- "unders": 180,
249
- "procesa": 181,
250
- "processing": 182,
251
- "Engl": 183,
252
- "French": 184,
253
- "Model": 185,
254
- "Spanish": 186,
255
- "This": 187,
256
- "class": 188,
257
- "dèle": 189,
258
- "est": 190,
259
- "from": 191,
260
- "function": 192,
261
- "impor": 193,
262
- "itement": 194,
263
- "igence": 195,
264
- "lligence": 196,
265
- "lenguaje": 197,
266
- "lingual": 198,
267
- "ltilingual": 199,
268
- "mming": 200,
269
- "modèle": 201,
270
- "multilingual": 202,
271
- "machine": 203,
272
- "puissan": 204,
273
- "pour": 205,
274
- "return": 206,
275
- "science": 207,
276
- "sformers": 208,
277
- "traitement": 209,
278
- "torch": 210,
279
- "tands": 211,
280
- "transformers": 212,
281
- "intelligence": 213,
282
- "innovation": 214,
283
- "development": 215,
284
- "technology": 216,
285
- "artificial": 217,
286
- "programming": 218,
287
- "generates": 219,
288
- "understands": 220,
289
- "English": 221,
290
- "import": 222,
291
- "puissant": 223
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
  },
293
  "merges": [
294
  [
@@ -299,6 +356,14 @@
299
  "d",
300
  "e"
301
  ],
 
 
 
 
 
 
 
 
302
  [
303
  "a",
304
  "n"
@@ -307,33 +372,53 @@
307
  "e",
308
  "n"
309
  ],
 
 
 
 
 
 
 
 
310
  [
311
  "r",
312
  "a"
313
  ],
314
  [
315
- "t",
 
 
 
 
316
  "e"
317
  ],
 
 
 
 
318
  [
319
  "in",
320
  "g"
321
  ],
322
  [
323
- "c",
324
- "e"
325
  ],
326
  [
327
  "c",
328
  "h"
329
  ],
 
 
 
 
330
  [
331
  "i",
332
- "s"
333
  ],
334
  [
335
  "l",
336
- "e"
337
  ],
338
  [
339
  "o",
@@ -341,48 +426,88 @@
341
  ],
342
  [
343
  "r",
344
- "o"
345
  ],
346
  [
347
  "t",
348
- "i"
349
  ],
350
  [
351
- "a",
352
- "r"
353
  ],
354
  [
355
- "l",
356
- "o"
357
  ],
358
  [
359
- "o",
360
- "r"
361
  ],
362
  [
363
- "p",
364
- "ro"
365
  ],
366
  [
367
- "t",
368
- "u"
369
  ],
370
  [
371
  "u",
372
  "a"
373
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
  [
375
  "O",
376
  "R"
377
  ],
 
 
 
 
378
  [
379
  "a",
380
  "tu"
381
  ],
 
 
 
 
382
  [
383
  "c",
384
  "i"
385
  ],
 
 
 
 
 
 
 
 
386
  [
387
  "g",
388
  "e"
@@ -393,11 +518,23 @@
393
  ],
394
  [
395
  "g",
396
- "ua"
 
 
 
 
397
  ],
398
  [
399
  "l",
400
- "an"
 
 
 
 
 
 
 
 
401
  ],
402
  [
403
  "m",
@@ -408,8 +545,8 @@
408
  "ode"
409
  ],
410
  [
411
- "n",
412
- "o"
413
  ],
414
  [
415
  "n",
@@ -420,25 +557,37 @@
420
  "atu"
421
  ],
422
  [
423
- "o",
424
- "n"
425
  ],
426
  [
427
  "r",
428
  "s"
429
  ],
430
  [
431
- "s",
432
- "s"
 
 
 
 
433
  ],
434
  [
435
  "u",
436
  "n"
437
  ],
 
 
 
 
438
  [
439
  "an",
440
  "d"
441
  ],
 
 
 
 
442
  [
443
  "ra",
444
  "l"
@@ -452,17 +601,25 @@
452
  "ar"
453
  ],
454
  [
455
- "ti",
456
- "on"
457
  ],
458
  [
459
  "pro",
460
- "ce"
 
 
 
 
461
  ],
462
  [
463
  "OR",
464
  "B"
465
  ],
 
 
 
 
466
  [
467
  "men",
468
  "t"
@@ -475,10 +632,6 @@
475
  "lear",
476
  "ning"
477
  ],
478
- [
479
- "A",
480
- "I"
481
- ],
482
  [
483
  "E",
484
  "l"
@@ -499,6 +652,10 @@
499
  "M",
500
  "ode"
501
  ],
 
 
 
 
502
  [
503
  "S",
504
  "p"
@@ -513,16 +670,12 @@
513
  ],
514
  [
515
  "a",
516
- "ch"
517
  ],
518
  [
519
  "a",
520
  "ge"
521
  ],
522
- [
523
- "a",
524
- "ss"
525
- ],
526
  [
527
  "a",
528
  "tion"
@@ -531,6 +684,14 @@
531
  "c",
532
  "l"
533
  ],
 
 
 
 
 
 
 
 
534
  [
535
  "c",
536
  "ode"
@@ -549,27 +710,23 @@
549
  ],
550
  [
551
  "e",
552
- "p"
553
- ],
554
- [
555
- "e",
556
- "s"
557
  ],
558
  [
559
  "e",
560
- "ra"
561
  ],
562
  [
563
  "e",
564
- "lo"
565
  ],
566
  [
567
  "e",
568
- "tu"
569
  ],
570
  [
571
  "e",
572
- "rs"
573
  ],
574
  [
575
  "f",
@@ -577,7 +734,7 @@
577
  ],
578
  [
579
  "f",
580
- "ro"
581
  ],
582
  [
583
  "f",
@@ -597,32 +754,44 @@
597
  ],
598
  [
599
  "g",
600
- "ra"
601
  ],
602
  [
603
- "g",
604
- "age"
605
  ],
606
  [
607
  "i",
608
- "m"
609
  ],
610
  [
611
  "i",
612
- "t"
613
  ],
614
  [
615
  "i",
616
- "te"
617
  ],
618
  [
619
  "i",
620
- "gen"
 
 
 
 
621
  ],
622
  [
623
  "j",
624
  "e"
625
  ],
 
 
 
 
 
 
 
 
626
  [
627
  "l",
628
  "l"
@@ -631,6 +800,10 @@
631
  "l",
632
  "en"
633
  ],
 
 
 
 
634
  [
635
  "l",
636
  "ing"
@@ -651,33 +824,25 @@
651
  "m",
652
  "u"
653
  ],
654
- [
655
- "m",
656
- "ach"
657
- ],
658
- [
659
- "m",
660
- "ers"
661
- ],
662
  [
663
  "o",
664
- "u"
665
  ],
666
  [
667
- "p",
668
- "u"
669
  ],
670
  [
671
- "p",
672
- "or"
673
  ],
674
  [
675
  "p",
676
- "ment"
677
  ],
678
  [
679
  "p",
680
- "ou"
681
  ],
682
  [
683
  "r",
@@ -688,16 +853,20 @@
688
  "an"
689
  ],
690
  [
691
- "r",
692
- "etu"
693
  ],
694
  [
695
  "s",
696
- "a"
697
  ],
698
  [
699
  "s",
700
- "an"
 
 
 
 
701
  ],
702
  [
703
  "s",
@@ -709,11 +878,11 @@
709
  ],
710
  [
711
  "t",
712
- "ra"
713
  ],
714
  [
715
  "t",
716
- "or"
717
  ],
718
  [
719
  "t",
@@ -729,7 +898,11 @@
729
  ],
730
  [
731
  "v",
732
- "elo"
 
 
 
 
733
  ],
734
  [
735
  "x",
@@ -765,56 +938,104 @@
765
  ],
766
  [
767
  "de",
768
- "velo"
769
  ],
770
  [
771
- "an",
772
- "ish"
773
  ],
774
  [
775
- "en",
776
- "ce"
777
  ],
778
  [
779
- "en",
780
  "ch"
781
  ],
782
  [
783
  "te",
784
- "s"
785
  ],
786
  [
787
  "te",
 
 
 
 
 
 
 
 
788
  "ch"
789
  ],
790
  [
791
- "te",
792
- "xt"
 
 
 
 
 
 
 
 
 
 
 
 
793
  ],
794
  [
795
  "is",
796
  "san"
797
  ],
798
  [
799
- "ti",
800
- "fi"
801
  ],
802
  [
803
  "ar",
804
- "tifi"
 
 
 
 
805
  ],
806
  [
807
  "lo",
808
  "gy"
809
  ],
810
  [
811
- "pro",
812
- "gra"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
813
  ],
814
  [
815
  "ua",
816
  "l"
817
  ],
 
 
 
 
 
 
 
 
818
  [
819
  "ci",
820
  "al"
@@ -824,20 +1045,28 @@
824
  "era"
825
  ],
826
  [
827
- "gua",
828
- "ge"
829
  ],
830
  [
831
- "gua",
832
- "je"
833
  ],
834
  [
835
- "lan",
836
- "gage"
 
 
 
 
837
  ],
838
  [
839
- "lan",
840
- "guage"
 
 
 
 
841
  ],
842
  [
843
  "mode",
@@ -848,24 +1077,40 @@
848
  "lo"
849
  ],
850
  [
851
- "no",
852
- "logy"
853
  ],
854
  [
855
- "ss",
856
- "ing"
857
  ],
858
  [
859
  "un",
860
  "ders"
861
  ],
862
  [
863
- "proce",
864
- "sa"
 
 
 
 
865
  ],
866
  [
867
- "proce",
868
- "ssing"
 
 
 
 
 
 
 
 
 
 
 
 
869
  ],
870
  [
871
  "En",
@@ -879,6 +1124,10 @@
879
  "Mode",
880
  "l"
881
  ],
 
 
 
 
882
  [
883
  "Sp",
884
  "anish"
@@ -887,38 +1136,46 @@
887
  "Th",
888
  "is"
889
  ],
 
 
 
 
890
  [
891
  "cl",
892
  "ass"
893
  ],
 
 
 
 
894
  [
895
  "dè",
896
  "le"
897
  ],
898
  [
899
- "es",
900
- "t"
901
  ],
902
  [
903
- "fro",
904
- "m"
905
  ],
906
  [
907
  "fun",
908
  "ction"
909
  ],
910
  [
911
- "im",
912
- "por"
913
- ],
914
- [
915
- "ite",
916
- "ment"
917
  ],
918
  [
919
  "igen",
920
  "ce"
921
  ],
 
 
 
 
922
  [
923
  "ll",
924
  "igence"
@@ -935,10 +1192,6 @@
935
  "lti",
936
  "lingual"
937
  ],
938
- [
939
- "mm",
940
- "ing"
941
- ],
942
  [
943
  "mo",
944
  "dèle"
@@ -948,20 +1201,8 @@
948
  "ltilingual"
949
  ],
950
  [
951
- "mach",
952
- "ine"
953
- ],
954
- [
955
- "pu",
956
- "issan"
957
- ],
958
- [
959
- "pou",
960
- "r"
961
- ],
962
- [
963
- "retu",
964
- "rn"
965
  ],
966
  [
967
  "sci",
@@ -971,14 +1212,14 @@
971
  "sfor",
972
  "mers"
973
  ],
974
- [
975
- "tra",
976
- "itement"
977
- ],
978
  [
979
  "tor",
980
  "ch"
981
  ],
 
 
 
 
982
  [
983
  "tand",
984
  "s"
@@ -987,6 +1228,10 @@
987
  "tran",
988
  "sformers"
989
  ],
 
 
 
 
990
  [
991
  "inte",
992
  "lligence"
@@ -996,40 +1241,52 @@
996
  "vation"
997
  ],
998
  [
999
- "develo",
1000
- "pment"
1001
  ],
1002
  [
1003
  "tech",
1004
  "nology"
1005
  ],
1006
  [
1007
- "artifi",
1008
- "cial"
1009
  ],
1010
  [
1011
- "progra",
1012
- "mming"
 
 
 
 
 
 
 
 
1013
  ],
1014
  [
1015
  "genera",
1016
  "tes"
1017
  ],
 
 
 
 
1018
  [
1019
  "unders",
1020
  "tands"
1021
  ],
1022
  [
1023
- "Engl",
1024
- "ish"
1025
  ],
1026
  [
1027
- "impor",
1028
- "t"
1029
  ],
1030
  [
1031
- "puissan",
1032
- "t"
1033
  ]
1034
  ]
1035
  }
 
70
  "<pad>": 2,
71
  "<unk>": 3,
72
  "<mask>": 4,
73
+ "'": 5,
74
+ "-": 6,
75
+ ".": 7,
76
+ "0": 8,
77
+ "1": 9,
78
+ "A": 10,
79
+ "B": 11,
80
+ "E": 12,
81
+ "F": 13,
82
+ "I": 14,
83
+ "L": 15,
84
+ "M": 16,
85
+ "O": 17,
86
+ "R": 18,
87
+ "S": 19,
88
+ "T": 20,
89
+ "a": 21,
90
+ "b": 22,
91
+ "c": 23,
92
+ "d": 24,
93
+ "e": 25,
94
+ "f": 26,
95
+ "g": 27,
96
+ "h": 28,
97
+ "i": 29,
98
+ "j": 30,
99
+ "k": 31,
100
+ "l": 32,
101
+ "m": 33,
102
+ "n": 34,
103
+ "o": 35,
104
+ "p": 36,
105
+ "r": 37,
106
+ "s": 38,
107
+ "t": 39,
108
+ "u": 40,
109
+ "v": 41,
110
+ "w": 42,
111
+ "x": 43,
112
+ "y": 44,
113
+ "è": 45,
114
+ "in": 46,
115
+ "de": 47,
116
+ "or": 48,
117
+ "te": 49,
118
+ "an": 50,
119
+ "en": 51,
120
+ "es": 52,
121
+ "op": 53,
122
+ "ra": 54,
123
+ "is": 55,
124
+ "le": 56,
125
+ "pr": 57,
126
+ "ing": 58,
127
+ "ar": 59,
128
+ "ch": 60,
129
+ "gop": 61,
130
+ "io": 62,
131
+ "lo": 63,
132
+ "ode": 64,
133
+ "re": 65,
134
+ "tu": 66,
135
+ "pro": 67,
136
+ "gopu": 68,
137
+ "cre": 69,
138
+ "me": 70,
139
+ "no": 71,
140
+ "ua": 72,
141
+ "ur": 73,
142
+ "inc": 74,
143
+ "orb": 75,
144
+ "ang": 76,
145
+ "ion": 77,
146
+ "AI": 78,
147
+ "OR": 79,
148
+ "ate": 80,
149
+ "atu": 81,
150
+ "ce": 82,
151
+ "ci": 83,
152
+ "ces": 84,
153
+ "flo": 85,
154
+ "ge": 86,
155
+ "gen": 87,
156
+ "gra": 88,
157
+ "ite": 89,
158
+ "lang": 90,
159
+ "ma": 91,
160
+ "mp": 92,
161
+ "men": 93,
162
+ "mode": 94,
163
+ "mme": 95,
164
+ "ning": 96,
165
+ "natu": 97,
166
+ "pu": 98,
167
+ "rs": 99,
168
+ "ti": 100,
169
+ "tion": 101,
170
+ "un": 102,
171
+ "v1": 103,
172
+ "and": 104,
173
+ "est": 105,
174
+ "ral": 106,
175
+ "ish": 107,
176
+ "lear": 108,
177
+ "proces": 109,
178
+ "progra": 110,
179
+ "create": 111,
180
+ "ORB": 112,
181
+ "flow": 113,
182
+ "ment": 114,
183
+ "natural": 115,
184
+ "learning": 116,
185
+ "El": 117,
186
+ "En": 118,
187
+ "Fr": 119,
188
+ "Le": 120,
189
+ "Mode": 121,
190
+ "Mang": 122,
191
+ "Sp": 123,
192
+ "Th": 124,
193
+ "al": 125,
194
+ "as": 126,
195
+ "age": 127,
196
+ "ation": 128,
197
+ "cl": 129,
198
+ "cor": 130,
199
+ "cio": 131,
200
+ "code": 132,
201
+ "ction": 133,
202
+ "du": 134,
203
+ "": 135,
204
+ "el": 136,
205
+ "ep": 137,
206
+ "er": 138,
207
+ "et": 139,
208
+ "era": 140,
209
+ "fi": 141,
210
+ "fr": 142,
211
+ "for": 143,
212
+ "fun": 144,
213
+ "gl": 145,
214
+ "gy": 146,
215
+ "gua": 147,
216
+ "it": 148,
217
+ "ior": 149,
218
+ "itu": 150,
219
+ "igen": 151,
220
+ "imp": 152,
221
+ "icio": 153,
222
+ "je": 154,
223
+ "ka": 155,
224
+ "kflow": 156,
225
+ "ll": 157,
226
+ "len": 158,
227
+ "les": 159,
228
+ "ling": 160,
229
+ "lti": 161,
230
+ "mm": 162,
231
+ "mo": 163,
232
+ "mu": 164,
233
+ "of": 165,
234
+ "om": 166,
235
+ "our": 167,
236
+ "par": 168,
237
+ "pour": 169,
238
+ "rn": 170,
239
+ "ran": 171,
240
+ "san": 172,
241
+ "ses": 173,
242
+ "sing": 174,
243
+ "sion": 175,
244
+ "sci": 176,
245
+ "sfor": 177,
246
+ "tor": 178,
247
+ "tra": 179,
248
+ "tand": 180,
249
+ "tran": 181,
250
+ "vation": 182,
251
+ "vel": 183,
252
+ "wor": 184,
253
+ "xt": 185,
254
+ "ine": 186,
255
+ "inte": 187,
256
+ "inno": 188,
257
+ "init": 189,
258
+ "def": 190,
259
+ "ders": 191,
260
+ "deep": 192,
261
+ "devel": 193,
262
+ "ort": 194,
263
+ "tes": 195,
264
+ "tech": 196,
265
+ "temp": 197,
266
+ "text": 198,
267
+ "anish": 199,
268
+ "ench": 200,
269
+ "enAI": 201,
270
+ "ence": 202,
271
+ "opment": 203,
272
+ "openAI": 204,
273
+ "issan": 205,
274
+ "prior": 206,
275
+ "arti": 207,
276
+ "chine": 208,
277
+ "logy": 209,
278
+ "retu": 210,
279
+ "cree": 211,
280
+ "mers": 212,
281
+ "nomme": 213,
282
+ "nology": 214,
283
+ "ual": 215,
284
+ "uage": 216,
285
+ "uricio": 217,
286
+ "cial": 218,
287
+ "genera": 219,
288
+ "ites": 220,
289
+ "itement": 221,
290
+ "langage": 222,
291
+ "language": 223,
292
+ "machine": 224,
293
+ "mauricio": 225,
294
+ "model": 226,
295
+ "modelo": 227,
296
+ "pus": 228,
297
+ "puissan": 229,
298
+ "unders": 230,
299
+ "procesa": 231,
300
+ "processing": 232,
301
+ "programme": 233,
302
+ "programm": 234,
303
+ "createur": 235,
304
+ "flower": 236,
305
+ "Engl": 237,
306
+ "French": 238,
307
+ "Model": 239,
308
+ "Mangitu": 240,
309
+ "Spanish": 241,
310
+ "This": 242,
311
+ "ass": 243,
312
+ "class": 244,
313
+ "corpus": 245,
314
+ "dèle": 246,
315
+ "ficial": 247,
316
+ "from": 248,
317
+ "function": 249,
318
+ "guaje": 250,
319
+ "igence": 251,
320
+ "import": 252,
321
+ "lligence": 253,
322
+ "lenguaje": 254,
323
+ "lingual": 255,
324
+ "ltilingual": 256,
325
+ "modèle": 257,
326
+ "multilingual": 258,
327
+ "session": 259,
328
+ "science": 260,
329
+ "sformers": 261,
330
+ "torch": 262,
331
+ "traitement": 263,
332
+ "tands": 264,
333
+ "transformers": 265,
334
+ "workflow": 266,
335
+ "intelligence": 267,
336
+ "innovation": 268,
337
+ "development": 269,
338
+ "technology": 270,
339
+ "temps": 271,
340
+ "priorites": 272,
341
+ "artificial": 273,
342
+ "return": 274,
343
+ "generates": 275,
344
+ "puissant": 276,
345
+ "understands": 277,
346
+ "programming": 278,
347
+ "English": 279,
348
+ "Mangituka": 280
349
  },
350
  "merges": [
351
  [
 
356
  "d",
357
  "e"
358
  ],
359
+ [
360
+ "o",
361
+ "r"
362
+ ],
363
+ [
364
+ "t",
365
+ "e"
366
+ ],
367
  [
368
  "a",
369
  "n"
 
372
  "e",
373
  "n"
374
  ],
375
+ [
376
+ "e",
377
+ "s"
378
+ ],
379
+ [
380
+ "o",
381
+ "p"
382
+ ],
383
  [
384
  "r",
385
  "a"
386
  ],
387
  [
388
+ "i",
389
+ "s"
390
+ ],
391
+ [
392
+ "l",
393
  "e"
394
  ],
395
+ [
396
+ "p",
397
+ "r"
398
+ ],
399
  [
400
  "in",
401
  "g"
402
  ],
403
  [
404
+ "a",
405
+ "r"
406
  ],
407
  [
408
  "c",
409
  "h"
410
  ],
411
+ [
412
+ "g",
413
+ "op"
414
+ ],
415
  [
416
  "i",
417
+ "o"
418
  ],
419
  [
420
  "l",
421
+ "o"
422
  ],
423
  [
424
  "o",
 
426
  ],
427
  [
428
  "r",
429
+ "e"
430
  ],
431
  [
432
  "t",
433
+ "u"
434
  ],
435
  [
436
+ "pr",
437
+ "o"
438
  ],
439
  [
440
+ "gop",
441
+ "u"
442
  ],
443
  [
444
+ "c",
445
+ "re"
446
  ],
447
  [
448
+ "m",
449
+ "e"
450
  ],
451
  [
452
+ "n",
453
+ "o"
454
  ],
455
  [
456
  "u",
457
  "a"
458
  ],
459
+ [
460
+ "u",
461
+ "r"
462
+ ],
463
+ [
464
+ "in",
465
+ "c"
466
+ ],
467
+ [
468
+ "or",
469
+ "b"
470
+ ],
471
+ [
472
+ "an",
473
+ "g"
474
+ ],
475
+ [
476
+ "io",
477
+ "n"
478
+ ],
479
+ [
480
+ "A",
481
+ "I"
482
+ ],
483
  [
484
  "O",
485
  "R"
486
  ],
487
+ [
488
+ "a",
489
+ "te"
490
+ ],
491
  [
492
  "a",
493
  "tu"
494
  ],
495
+ [
496
+ "c",
497
+ "e"
498
+ ],
499
  [
500
  "c",
501
  "i"
502
  ],
503
+ [
504
+ "c",
505
+ "es"
506
+ ],
507
+ [
508
+ "f",
509
+ "lo"
510
+ ],
511
  [
512
  "g",
513
  "e"
 
518
  ],
519
  [
520
  "g",
521
+ "ra"
522
+ ],
523
+ [
524
+ "i",
525
+ "te"
526
  ],
527
  [
528
  "l",
529
+ "ang"
530
+ ],
531
+ [
532
+ "m",
533
+ "a"
534
+ ],
535
+ [
536
+ "m",
537
+ "p"
538
  ],
539
  [
540
  "m",
 
545
  "ode"
546
  ],
547
  [
548
+ "m",
549
+ "me"
550
  ],
551
  [
552
  "n",
 
557
  "atu"
558
  ],
559
  [
560
+ "p",
561
+ "u"
562
  ],
563
  [
564
  "r",
565
  "s"
566
  ],
567
  [
568
+ "t",
569
+ "i"
570
+ ],
571
+ [
572
+ "t",
573
+ "ion"
574
  ],
575
  [
576
  "u",
577
  "n"
578
  ],
579
+ [
580
+ "v",
581
+ "1"
582
+ ],
583
  [
584
  "an",
585
  "d"
586
  ],
587
+ [
588
+ "es",
589
+ "t"
590
+ ],
591
  [
592
  "ra",
593
  "l"
 
601
  "ar"
602
  ],
603
  [
604
+ "pro",
605
+ "ces"
606
  ],
607
  [
608
  "pro",
609
+ "gra"
610
+ ],
611
+ [
612
+ "cre",
613
+ "ate"
614
  ],
615
  [
616
  "OR",
617
  "B"
618
  ],
619
+ [
620
+ "flo",
621
+ "w"
622
+ ],
623
  [
624
  "men",
625
  "t"
 
632
  "lear",
633
  "ning"
634
  ],
 
 
 
 
635
  [
636
  "E",
637
  "l"
 
652
  "M",
653
  "ode"
654
  ],
655
+ [
656
+ "M",
657
+ "ang"
658
+ ],
659
  [
660
  "S",
661
  "p"
 
670
  ],
671
  [
672
  "a",
673
+ "s"
674
  ],
675
  [
676
  "a",
677
  "ge"
678
  ],
 
 
 
 
679
  [
680
  "a",
681
  "tion"
 
684
  "c",
685
  "l"
686
  ],
687
+ [
688
+ "c",
689
+ "or"
690
+ ],
691
+ [
692
+ "c",
693
+ "io"
694
+ ],
695
  [
696
  "c",
697
  "ode"
 
710
  ],
711
  [
712
  "e",
713
+ "l"
 
 
 
 
714
  ],
715
  [
716
  "e",
717
+ "p"
718
  ],
719
  [
720
  "e",
721
+ "r"
722
  ],
723
  [
724
  "e",
725
+ "t"
726
  ],
727
  [
728
  "e",
729
+ "ra"
730
  ],
731
  [
732
  "f",
 
734
  ],
735
  [
736
  "f",
737
+ "r"
738
  ],
739
  [
740
  "f",
 
754
  ],
755
  [
756
  "g",
757
+ "ua"
758
  ],
759
  [
760
+ "i",
761
+ "t"
762
  ],
763
  [
764
  "i",
765
+ "or"
766
  ],
767
  [
768
  "i",
769
+ "tu"
770
  ],
771
  [
772
  "i",
773
+ "gen"
774
  ],
775
  [
776
  "i",
777
+ "mp"
778
+ ],
779
+ [
780
+ "i",
781
+ "cio"
782
  ],
783
  [
784
  "j",
785
  "e"
786
  ],
787
+ [
788
+ "k",
789
+ "a"
790
+ ],
791
+ [
792
+ "k",
793
+ "flow"
794
+ ],
795
  [
796
  "l",
797
  "l"
 
800
  "l",
801
  "en"
802
  ],
803
+ [
804
+ "l",
805
+ "es"
806
+ ],
807
  [
808
  "l",
809
  "ing"
 
824
  "m",
825
  "u"
826
  ],
 
 
 
 
 
 
 
 
827
  [
828
  "o",
829
+ "f"
830
  ],
831
  [
832
+ "o",
833
+ "m"
834
  ],
835
  [
836
+ "o",
837
+ "ur"
838
  ],
839
  [
840
  "p",
841
+ "ar"
842
  ],
843
  [
844
  "p",
845
+ "our"
846
  ],
847
  [
848
  "r",
 
853
  "an"
854
  ],
855
  [
856
+ "s",
857
+ "an"
858
  ],
859
  [
860
  "s",
861
+ "es"
862
  ],
863
  [
864
  "s",
865
+ "ing"
866
+ ],
867
+ [
868
+ "s",
869
+ "ion"
870
  ],
871
  [
872
  "s",
 
878
  ],
879
  [
880
  "t",
881
+ "or"
882
  ],
883
  [
884
  "t",
885
+ "ra"
886
  ],
887
  [
888
  "t",
 
898
  ],
899
  [
900
  "v",
901
+ "el"
902
+ ],
903
+ [
904
+ "w",
905
+ "or"
906
  ],
907
  [
908
  "x",
 
938
  ],
939
  [
940
  "de",
941
+ "vel"
942
  ],
943
  [
944
+ "or",
945
+ "t"
946
  ],
947
  [
948
+ "te",
949
+ "s"
950
  ],
951
  [
952
+ "te",
953
  "ch"
954
  ],
955
  [
956
  "te",
957
+ "mp"
958
  ],
959
  [
960
  "te",
961
+ "xt"
962
+ ],
963
+ [
964
+ "an",
965
+ "ish"
966
+ ],
967
+ [
968
+ "en",
969
  "ch"
970
  ],
971
  [
972
+ "en",
973
+ "AI"
974
+ ],
975
+ [
976
+ "en",
977
+ "ce"
978
+ ],
979
+ [
980
+ "op",
981
+ "ment"
982
+ ],
983
+ [
984
+ "op",
985
+ "enAI"
986
  ],
987
  [
988
  "is",
989
  "san"
990
  ],
991
  [
992
+ "pr",
993
+ "ior"
994
  ],
995
  [
996
  "ar",
997
+ "ti"
998
+ ],
999
+ [
1000
+ "ch",
1001
+ "ine"
1002
  ],
1003
  [
1004
  "lo",
1005
  "gy"
1006
  ],
1007
  [
1008
+ "re",
1009
+ "tu"
1010
+ ],
1011
+ [
1012
+ "cre",
1013
+ "e"
1014
+ ],
1015
+ [
1016
+ "me",
1017
+ "rs"
1018
+ ],
1019
+ [
1020
+ "no",
1021
+ "mme"
1022
+ ],
1023
+ [
1024
+ "no",
1025
+ "logy"
1026
  ],
1027
  [
1028
  "ua",
1029
  "l"
1030
  ],
1031
+ [
1032
+ "ua",
1033
+ "ge"
1034
+ ],
1035
+ [
1036
+ "ur",
1037
+ "icio"
1038
+ ],
1039
  [
1040
  "ci",
1041
  "al"
 
1045
  "era"
1046
  ],
1047
  [
1048
+ "ite",
1049
+ "s"
1050
  ],
1051
  [
1052
+ "ite",
1053
+ "ment"
1054
  ],
1055
  [
1056
+ "lang",
1057
+ "age"
1058
+ ],
1059
+ [
1060
+ "lang",
1061
+ "uage"
1062
  ],
1063
  [
1064
+ "ma",
1065
+ "chine"
1066
+ ],
1067
+ [
1068
+ "ma",
1069
+ "uricio"
1070
  ],
1071
  [
1072
  "mode",
 
1077
  "lo"
1078
  ],
1079
  [
1080
+ "pu",
1081
+ "s"
1082
  ],
1083
  [
1084
+ "pu",
1085
+ "issan"
1086
  ],
1087
  [
1088
  "un",
1089
  "ders"
1090
  ],
1091
  [
1092
+ "proces",
1093
+ "a"
1094
+ ],
1095
+ [
1096
+ "proces",
1097
+ "sing"
1098
  ],
1099
  [
1100
+ "progra",
1101
+ "mme"
1102
+ ],
1103
+ [
1104
+ "progra",
1105
+ "mm"
1106
+ ],
1107
+ [
1108
+ "create",
1109
+ "ur"
1110
+ ],
1111
+ [
1112
+ "flow",
1113
+ "er"
1114
  ],
1115
  [
1116
  "En",
 
1124
  "Mode",
1125
  "l"
1126
  ],
1127
+ [
1128
+ "Mang",
1129
+ "itu"
1130
+ ],
1131
  [
1132
  "Sp",
1133
  "anish"
 
1136
  "Th",
1137
  "is"
1138
  ],
1139
+ [
1140
+ "as",
1141
+ "s"
1142
+ ],
1143
  [
1144
  "cl",
1145
  "ass"
1146
  ],
1147
+ [
1148
+ "cor",
1149
+ "pus"
1150
+ ],
1151
  [
1152
  "dè",
1153
  "le"
1154
  ],
1155
  [
1156
+ "fi",
1157
+ "cial"
1158
  ],
1159
  [
1160
+ "fr",
1161
+ "om"
1162
  ],
1163
  [
1164
  "fun",
1165
  "ction"
1166
  ],
1167
  [
1168
+ "gua",
1169
+ "je"
 
 
 
 
1170
  ],
1171
  [
1172
  "igen",
1173
  "ce"
1174
  ],
1175
+ [
1176
+ "imp",
1177
+ "ort"
1178
+ ],
1179
  [
1180
  "ll",
1181
  "igence"
 
1192
  "lti",
1193
  "lingual"
1194
  ],
 
 
 
 
1195
  [
1196
  "mo",
1197
  "dèle"
 
1201
  "ltilingual"
1202
  ],
1203
  [
1204
+ "ses",
1205
+ "sion"
 
 
 
 
 
 
 
 
 
 
 
 
1206
  ],
1207
  [
1208
  "sci",
 
1212
  "sfor",
1213
  "mers"
1214
  ],
 
 
 
 
1215
  [
1216
  "tor",
1217
  "ch"
1218
  ],
1219
+ [
1220
+ "tra",
1221
+ "itement"
1222
+ ],
1223
  [
1224
  "tand",
1225
  "s"
 
1228
  "tran",
1229
  "sformers"
1230
  ],
1231
+ [
1232
+ "wor",
1233
+ "kflow"
1234
+ ],
1235
  [
1236
  "inte",
1237
  "lligence"
 
1241
  "vation"
1242
  ],
1243
  [
1244
+ "devel",
1245
+ "opment"
1246
  ],
1247
  [
1248
  "tech",
1249
  "nology"
1250
  ],
1251
  [
1252
+ "temp",
1253
+ "s"
1254
  ],
1255
  [
1256
+ "prior",
1257
+ "ites"
1258
+ ],
1259
+ [
1260
+ "arti",
1261
+ "ficial"
1262
+ ],
1263
+ [
1264
+ "retu",
1265
+ "rn"
1266
  ],
1267
  [
1268
  "genera",
1269
  "tes"
1270
  ],
1271
+ [
1272
+ "puissan",
1273
+ "t"
1274
+ ],
1275
  [
1276
  "unders",
1277
  "tands"
1278
  ],
1279
  [
1280
+ "programm",
1281
+ "ing"
1282
  ],
1283
  [
1284
+ "Engl",
1285
+ "ish"
1286
  ],
1287
  [
1288
+ "Mangitu",
1289
+ "ka"
1290
  ]
1291
  ]
1292
  }