Update Aricate custom tokenizer vocabulary.
Browse files- aricate_tokenizer.txt +1 -0
aricate_tokenizer.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"<pad>": 0, "<unk>": 1, "<eos>": 2, "<sep>": 3, "?": 4, "pika": 5, "!": 6, "tu": 7, "chu": 8, "quel": 9, "est": 10, "pika...": 11, "as": 12, "ton": 13, "pika-chu": 14, "un": 15, "pikachu": 16, "quelle": 17, "de": 18, "pika.": 19, "où": 20, "veux": 21, "es": 22, "en": 23, "est-ce": 24, "que": 25, "la": 26, "c'est": 27, "quoi": 28, "comment": 29, "fait": 30, "aujourd'hui": 31, "plat": 32, "préféré": 33, "jouer": 34, "heure": 35, "est-il": 36, "faim": 37, "dis": 38, "ça": 39, "vas-tu": 40, "vu": 41, "sacha": 42, "peux-tu": 43, "on": 44, "le": 45, "bruit": 46, "chu...": 47, "aimes": 48, "je": 49, "peux": 50, "te": 51, "va": 52, "merci": 53, "es-tu": 54, "pika..": 55, "chu.": 56, "!!": 57, "temps": 58, "fait-il": 59, "chaaaa...": 60, "habites-tu": 61, "triste": 62, "content": 63, "pikachu,": 64, "attaque": 65, "favorite": 66, "'bonjour'": 67, "français.": 68, "m'aimes": 69, "chaaa": 70, "capitale": 71, "france": 72, "ta": 73, "couleur": 74, "préférée": 75, "chou": 76, "danser": 77, "âge": 78, "as-tu": 79, "mot": 80, "anglais.": 81, "peur": 82, "chhh...": 83, "raconte-moi": 84, "une": 85, "histoire.": 86, "pour": 87, "voler": 88, "chaa": 89, "qu'est-ce": 90, "regardes": 91, "coup": 92, "jus": 93, "nom": 94, "évolution": 95, "fais": 96, "d'animal.": 97, "fatigué": 98, "dis-moi": 99, "secret.": 100, "pika-chu.": 101, "les": 102, "pommes": 103, "qu'il": 104, "chaud": 105, "chaaa...": 106, "des": 107, "amis": 108, "compte": 109, "jusqu'à": 110, "trois.": 111, "quand": 112, "noël": 113, "chapeau": 114, "caresser": 115, "chocolat": 116, "niveau": 117, "beaucoup.": 118, "à": 119, "bientôt": 120, "qui": 121, "salut,": 122, "quoi?": 123, "pika,": 124, "salut": 125, "pika....pika...chu": 126, "forme": 127, "coucou": 128, "pika...chu!!": 129, "bonjour": 130, "vas": 131, "bien": 132, "...": 133, "neuf": 134, "pika-pika,": 135, "chuu": 136, "ce": 137, "pikaa.": 138, "prendre": 139, "photo": 140, "au": 141, "revoir": 142, "m'écoutes": 143, "parler": 144, "français": 145, "type": 146}
|