| { |
| "added_tokens_decoder": { |
| "0": { |
| "content": "<pad>", |
| "lstrip": false, |
| "normalized": false, |
| "rstrip": false, |
| "single_word": false, |
| "special": true |
| }, |
| "1": { |
| "content": "<unk>", |
| "lstrip": false, |
| "normalized": false, |
| "rstrip": false, |
| "single_word": false, |
| "special": true |
| }, |
| "2": { |
| "content": "<s>", |
| "lstrip": false, |
| "normalized": false, |
| "rstrip": false, |
| "single_word": false, |
| "special": true |
| }, |
| "3": { |
| "content": "</s>", |
| "lstrip": false, |
| "normalized": false, |
| "rstrip": false, |
| "single_word": false, |
| "special": true |
| }, |
| "256001": { |
| "content": "__ace__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256002": { |
| "content": "__ace_Latn__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256003": { |
| "content": "__acm__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256004": { |
| "content": "__acq__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256005": { |
| "content": "__aeb__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256006": { |
| "content": "__afr__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256007": { |
| "content": "__ajp__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256008": { |
| "content": "__aka__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256009": { |
| "content": "__amh__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256010": { |
| "content": "__apc__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256011": { |
| "content": "__arb__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256012": { |
| "content": "__ars__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256013": { |
| "content": "__ary__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256014": { |
| "content": "__arz__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256015": { |
| "content": "__asm__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256016": { |
| "content": "__ast__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256017": { |
| "content": "__awa__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256018": { |
| "content": "__ayr__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256019": { |
| "content": "__azb__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256020": { |
| "content": "__azj__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256021": { |
| "content": "__bak__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256022": { |
| "content": "__bam__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256023": { |
| "content": "__ban__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256024": { |
| "content": "__bel__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256025": { |
| "content": "__bem__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256026": { |
| "content": "__ben__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256027": { |
| "content": "__bho__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256028": { |
| "content": "__bjn__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256029": { |
| "content": "__bjn_Latn__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256030": { |
| "content": "__bod__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256031": { |
| "content": "__bos__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256032": { |
| "content": "__bug__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256033": { |
| "content": "__bul__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256034": { |
| "content": "__cat__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256035": { |
| "content": "__ceb__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256036": { |
| "content": "__ces__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256037": { |
| "content": "__cjk__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256038": { |
| "content": "__ckb__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256039": { |
| "content": "__crh__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256040": { |
| "content": "__cym__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256041": { |
| "content": "__dan__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256042": { |
| "content": "__deu__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256043": { |
| "content": "__dik__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256044": { |
| "content": "__dyu__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256045": { |
| "content": "__dzo__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256046": { |
| "content": "__ell__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256047": { |
| "content": "__eng__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256048": { |
| "content": "__epo__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256049": { |
| "content": "__est__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256050": { |
| "content": "__eus__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256051": { |
| "content": "__ewe__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256052": { |
| "content": "__fao__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256053": { |
| "content": "__pes__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256054": { |
| "content": "__fij__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256055": { |
| "content": "__fin__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256056": { |
| "content": "__fon__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256057": { |
| "content": "__fra__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256058": { |
| "content": "__fur__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256059": { |
| "content": "__fuv__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256060": { |
| "content": "__gla__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256061": { |
| "content": "__gle__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256062": { |
| "content": "__glg__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256063": { |
| "content": "__grn__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256064": { |
| "content": "__guj__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256065": { |
| "content": "__hat__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256066": { |
| "content": "__hau__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256067": { |
| "content": "__heb__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256068": { |
| "content": "__hin__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256069": { |
| "content": "__hne__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256070": { |
| "content": "__hrv__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256071": { |
| "content": "__hun__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256072": { |
| "content": "__hye__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256073": { |
| "content": "__ibo__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256074": { |
| "content": "__ilo__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256075": { |
| "content": "__ind__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256076": { |
| "content": "__isl__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256077": { |
| "content": "__ita__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256078": { |
| "content": "__jav__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256079": { |
| "content": "__jpn__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256080": { |
| "content": "__kab__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256081": { |
| "content": "__kac__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256082": { |
| "content": "__kam__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256083": { |
| "content": "__kan__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256084": { |
| "content": "__kas__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256085": { |
| "content": "__kas_Deva__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256086": { |
| "content": "__kat__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256087": { |
| "content": "__knc__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256088": { |
| "content": "__knc_Latn__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256089": { |
| "content": "__kaz__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256090": { |
| "content": "__kbp__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256091": { |
| "content": "__kea__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256092": { |
| "content": "__khm__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256093": { |
| "content": "__kik__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256094": { |
| "content": "__kin__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256095": { |
| "content": "__kir__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256096": { |
| "content": "__kmb__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256097": { |
| "content": "__kon__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256098": { |
| "content": "__kor__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256099": { |
| "content": "__kmr__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256100": { |
| "content": "__lao__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256101": { |
| "content": "__lvs__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256102": { |
| "content": "__lij__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256103": { |
| "content": "__lim__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256104": { |
| "content": "__lin__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256105": { |
| "content": "__lit__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256106": { |
| "content": "__lmo__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256107": { |
| "content": "__ltg__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256108": { |
| "content": "__ltz__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256109": { |
| "content": "__lua__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256110": { |
| "content": "__lug__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256111": { |
| "content": "__luo__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256112": { |
| "content": "__lus__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256113": { |
| "content": "__mag__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256114": { |
| "content": "__mai__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256115": { |
| "content": "__mal__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256116": { |
| "content": "__mar__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256117": { |
| "content": "__min__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256118": { |
| "content": "__mkd__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256119": { |
| "content": "__plt__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256120": { |
| "content": "__mlt__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256121": { |
| "content": "__mni__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256122": { |
| "content": "__khk__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256123": { |
| "content": "__mos__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256124": { |
| "content": "__mri__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256125": { |
| "content": "__zsm__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256126": { |
| "content": "__mya__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256127": { |
| "content": "__nld__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256128": { |
| "content": "__nno__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256129": { |
| "content": "__nob__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256130": { |
| "content": "__npi__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256131": { |
| "content": "__nso__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256132": { |
| "content": "__nus__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256133": { |
| "content": "__nya__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256134": { |
| "content": "__oci__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256135": { |
| "content": "__gaz__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256136": { |
| "content": "__ory__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256137": { |
| "content": "__pag__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256138": { |
| "content": "__pan__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256139": { |
| "content": "__pap__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256140": { |
| "content": "__pol__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256141": { |
| "content": "__por__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256142": { |
| "content": "__prs__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256143": { |
| "content": "__pbt__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256144": { |
| "content": "__quy__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256145": { |
| "content": "__ron__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256146": { |
| "content": "__run__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256147": { |
| "content": "__rus__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256148": { |
| "content": "__sag__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256149": { |
| "content": "__san__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256150": { |
| "content": "__sat__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256151": { |
| "content": "__scn__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256152": { |
| "content": "__shn__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256153": { |
| "content": "__sin__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256154": { |
| "content": "__slk__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256155": { |
| "content": "__slv__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256156": { |
| "content": "__smo__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256157": { |
| "content": "__sna__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256158": { |
| "content": "__snd__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256159": { |
| "content": "__som__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256160": { |
| "content": "__sot__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256161": { |
| "content": "__spa__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256162": { |
| "content": "__als__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256163": { |
| "content": "__srd__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256164": { |
| "content": "__srp__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256165": { |
| "content": "__ssw__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256166": { |
| "content": "__sun__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256167": { |
| "content": "__swe__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256168": { |
| "content": "__swh__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256169": { |
| "content": "__szl__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256170": { |
| "content": "__tam__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256171": { |
| "content": "__tat__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256172": { |
| "content": "__tel__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256173": { |
| "content": "__tgk__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256174": { |
| "content": "__tgl__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256175": { |
| "content": "__tha__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256176": { |
| "content": "__tir__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256177": { |
| "content": "__taq__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256178": { |
| "content": "__taq_Tfng__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256179": { |
| "content": "__tpi__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256180": { |
| "content": "__tsn__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256181": { |
| "content": "__tso__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256182": { |
| "content": "__tuk__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256183": { |
| "content": "__tum__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256184": { |
| "content": "__tur__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256185": { |
| "content": "__twi__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256186": { |
| "content": "__tzm__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256187": { |
| "content": "__uig__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256188": { |
| "content": "__ukr__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256189": { |
| "content": "__umb__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256190": { |
| "content": "__urd__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256191": { |
| "content": "__uzn__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256192": { |
| "content": "__vec__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256193": { |
| "content": "__vie__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256194": { |
| "content": "__war__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256195": { |
| "content": "__wol__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256196": { |
| "content": "__xho__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256197": { |
| "content": "__ydd__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256198": { |
| "content": "__yor__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256199": { |
| "content": "__yue__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256200": { |
| "content": "__cmn__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256201": { |
| "content": "__cmn_Hant__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256202": { |
| "content": "__zul__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| } |
| }, |
| "additional_special_tokens": [ |
| "<pad>", |
| "<unk>", |
| "<s>", |
| "</s>", |
| "__ace__", |
| "__ace_Latn__", |
| "__acm__", |
| "__acq__", |
| "__aeb__", |
| "__afr__", |
| "__ajp__", |
| "__aka__", |
| "__amh__", |
| "__apc__", |
| "__arb__", |
| "__ars__", |
| "__ary__", |
| "__arz__", |
| "__asm__", |
| "__ast__", |
| "__awa__", |
| "__ayr__", |
| "__azb__", |
| "__azj__", |
| "__bak__", |
| "__bam__", |
| "__ban__", |
| "__bel__", |
| "__bem__", |
| "__ben__", |
| "__bho__", |
| "__bjn__", |
| "__bjn_Latn__", |
| "__bod__", |
| "__bos__", |
| "__bug__", |
| "__bul__", |
| "__cat__", |
| "__ceb__", |
| "__ces__", |
| "__cjk__", |
| "__ckb__", |
| "__crh__", |
| "__cym__", |
| "__dan__", |
| "__deu__", |
| "__dik__", |
| "__dyu__", |
| "__dzo__", |
| "__ell__", |
| "__eng__", |
| "__epo__", |
| "__est__", |
| "__eus__", |
| "__ewe__", |
| "__fao__", |
| "__pes__", |
| "__fij__", |
| "__fin__", |
| "__fon__", |
| "__fra__", |
| "__fur__", |
| "__fuv__", |
| "__gla__", |
| "__gle__", |
| "__glg__", |
| "__grn__", |
| "__guj__", |
| "__hat__", |
| "__hau__", |
| "__heb__", |
| "__hin__", |
| "__hne__", |
| "__hrv__", |
| "__hun__", |
| "__hye__", |
| "__ibo__", |
| "__ilo__", |
| "__ind__", |
| "__isl__", |
| "__ita__", |
| "__jav__", |
| "__jpn__", |
| "__kab__", |
| "__kac__", |
| "__kam__", |
| "__kan__", |
| "__kas__", |
| "__kas_Deva__", |
| "__kat__", |
| "__knc__", |
| "__knc_Latn__", |
| "__kaz__", |
| "__kbp__", |
| "__kea__", |
| "__khm__", |
| "__kik__", |
| "__kin__", |
| "__kir__", |
| "__kmb__", |
| "__kon__", |
| "__kor__", |
| "__kmr__", |
| "__lao__", |
| "__lvs__", |
| "__lij__", |
| "__lim__", |
| "__lin__", |
| "__lit__", |
| "__lmo__", |
| "__ltg__", |
| "__ltz__", |
| "__lua__", |
| "__lug__", |
| "__luo__", |
| "__lus__", |
| "__mag__", |
| "__mai__", |
| "__mal__", |
| "__mar__", |
| "__min__", |
| "__mkd__", |
| "__plt__", |
| "__mlt__", |
| "__mni__", |
| "__khk__", |
| "__mos__", |
| "__mri__", |
| "__zsm__", |
| "__mya__", |
| "__nld__", |
| "__nno__", |
| "__nob__", |
| "__npi__", |
| "__nso__", |
| "__nus__", |
| "__nya__", |
| "__oci__", |
| "__gaz__", |
| "__ory__", |
| "__pag__", |
| "__pan__", |
| "__pap__", |
| "__pol__", |
| "__por__", |
| "__prs__", |
| "__pbt__", |
| "__quy__", |
| "__ron__", |
| "__run__", |
| "__rus__", |
| "__sag__", |
| "__san__", |
| "__sat__", |
| "__scn__", |
| "__shn__", |
| "__sin__", |
| "__slk__", |
| "__slv__", |
| "__smo__", |
| "__sna__", |
| "__snd__", |
| "__som__", |
| "__sot__", |
| "__spa__", |
| "__als__", |
| "__srd__", |
| "__srp__", |
| "__ssw__", |
| "__sun__", |
| "__swe__", |
| "__swh__", |
| "__szl__", |
| "__tam__", |
| "__tat__", |
| "__tel__", |
| "__tgk__", |
| "__tgl__", |
| "__tha__", |
| "__tir__", |
| "__taq__", |
| "__taq_Tfng__", |
| "__tpi__", |
| "__tsn__", |
| "__tso__", |
| "__tuk__", |
| "__tum__", |
| "__tur__", |
| "__twi__", |
| "__tzm__", |
| "__uig__", |
| "__ukr__", |
| "__umb__", |
| "__urd__", |
| "__uzn__", |
| "__vec__", |
| "__vie__", |
| "__war__", |
| "__wol__", |
| "__xho__", |
| "__ydd__", |
| "__yor__", |
| "__yue__", |
| "__cmn__", |
| "__cmn_Hant__", |
| "__zul__" |
| ], |
| "bos_token": "<s>", |
| "clean_up_tokenization_spaces": true, |
| "cls_token": "<s>", |
| "eos_token": "</s>", |
| "model_max_length": 1000000000000000019884624838656, |
| "pad_token": "<pad>", |
| "processor_class": "SeamlessM4TProcessor", |
| "sep_token": "</s>", |
| "sp_model_kwargs": {}, |
| "src_lang": "__eng__", |
| "tgt_lang": "__fra__", |
| "tokenizer_class": "SeamlessM4TTokenizer", |
| "tokenizer_file": null, |
| "unk_token": "<unk>" |
| } |
|
|