| { |
| "added_tokens_decoder": { |
| "0": { |
| "content": "<pad>", |
| "lstrip": false, |
| "normalized": false, |
| "rstrip": false, |
| "single_word": false, |
| "special": true |
| }, |
| "1": { |
| "content": "<unk>", |
| "lstrip": false, |
| "normalized": false, |
| "rstrip": false, |
| "single_word": false, |
| "special": true |
| }, |
| "2": { |
| "content": "<s>", |
| "lstrip": false, |
| "normalized": false, |
| "rstrip": false, |
| "single_word": false, |
| "special": true |
| }, |
| "3": { |
| "content": "</s>", |
| "lstrip": false, |
| "normalized": false, |
| "rstrip": false, |
| "single_word": false, |
| "special": true |
| }, |
| "256001": { |
| "content": "__ace__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256002": { |
| "content": "__ace_Latn__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256003": { |
| "content": "__acm__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256004": { |
| "content": "__acq__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256005": { |
| "content": "__aeb__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256006": { |
| "content": "__afr__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256007": { |
| "content": "__ajp__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256008": { |
| "content": "__aka__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256009": { |
| "content": "__amh__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256010": { |
| "content": "__apc__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256011": { |
| "content": "__arb__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256012": { |
| "content": "__ars__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256013": { |
| "content": "__ary__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256014": { |
| "content": "__arz__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256015": { |
| "content": "__asm__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256016": { |
| "content": "__ast__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256017": { |
| "content": "__awa__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256018": { |
| "content": "__ayr__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256019": { |
| "content": "__azb__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256020": { |
| "content": "__azj__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256021": { |
| "content": "__bak__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256022": { |
| "content": "__bam__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256023": { |
| "content": "__ban__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256024": { |
| "content": "__bel__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256025": { |
| "content": "__bem__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256026": { |
| "content": "__ben__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256027": { |
| "content": "__bho__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256028": { |
| "content": "__bjn__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256029": { |
| "content": "__bjn_Latn__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256030": { |
| "content": "__bod__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256031": { |
| "content": "__bos__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256032": { |
| "content": "__bug__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256033": { |
| "content": "__bul__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256034": { |
| "content": "__cat__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256035": { |
| "content": "__ceb__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256036": { |
| "content": "__ces__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256037": { |
| "content": "__cjk__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256038": { |
| "content": "__ckb__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256039": { |
| "content": "__crh__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256040": { |
| "content": "__cym__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256041": { |
| "content": "__dan__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256042": { |
| "content": "__deu__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256043": { |
| "content": "__dik__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256044": { |
| "content": "__dyu__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256045": { |
| "content": "__dzo__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256046": { |
| "content": "__ell__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256047": { |
| "content": "__eng__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256048": { |
| "content": "__epo__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256049": { |
| "content": "__est__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256050": { |
| "content": "__eus__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256051": { |
| "content": "__ewe__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256052": { |
| "content": "__fao__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256053": { |
| "content": "__pes__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256054": { |
| "content": "__fij__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256055": { |
| "content": "__fin__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256056": { |
| "content": "__fon__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256057": { |
| "content": "__fra__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256058": { |
| "content": "__fur__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256059": { |
| "content": "__fuv__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256060": { |
| "content": "__gla__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256061": { |
| "content": "__gle__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256062": { |
| "content": "__glg__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256063": { |
| "content": "__grn__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256064": { |
| "content": "__guj__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256065": { |
| "content": "__hat__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256066": { |
| "content": "__hau__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256067": { |
| "content": "__heb__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256068": { |
| "content": "__hin__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256069": { |
| "content": "__hne__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256070": { |
| "content": "__hrv__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256071": { |
| "content": "__hun__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256072": { |
| "content": "__hye__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256073": { |
| "content": "__ibo__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256074": { |
| "content": "__ilo__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256075": { |
| "content": "__ind__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256076": { |
| "content": "__isl__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256077": { |
| "content": "__ita__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256078": { |
| "content": "__jav__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256079": { |
| "content": "__jpn__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256080": { |
| "content": "__kab__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256081": { |
| "content": "__kac__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256082": { |
| "content": "__kam__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256083": { |
| "content": "__kan__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256084": { |
| "content": "__kas__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256085": { |
| "content": "__kas_Deva__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256086": { |
| "content": "__kat__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256087": { |
| "content": "__knc__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256088": { |
| "content": "__knc_Latn__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256089": { |
| "content": "__kaz__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256090": { |
| "content": "__kbp__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256091": { |
| "content": "__kea__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256092": { |
| "content": "__khm__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256093": { |
| "content": "__kik__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256094": { |
| "content": "__kin__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256095": { |
| "content": "__kir__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256096": { |
| "content": "__kmb__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256097": { |
| "content": "__kon__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256098": { |
| "content": "__kor__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256099": { |
| "content": "__kmr__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256100": { |
| "content": "__lao__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256101": { |
| "content": "__lvs__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256102": { |
| "content": "__lij__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256103": { |
| "content": "__lim__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256104": { |
| "content": "__lin__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256105": { |
| "content": "__lit__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256106": { |
| "content": "__lmo__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256107": { |
| "content": "__ltg__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256108": { |
| "content": "__ltz__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256109": { |
| "content": "__lua__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256110": { |
| "content": "__lug__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256111": { |
| "content": "__luo__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256112": { |
| "content": "__lus__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256113": { |
| "content": "__mag__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256114": { |
| "content": "__mai__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256115": { |
| "content": "__mal__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256116": { |
| "content": "__mar__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256117": { |
| "content": "__min__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256118": { |
| "content": "__mkd__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256119": { |
| "content": "__plt__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256120": { |
| "content": "__mlt__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256121": { |
| "content": "__mni__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256122": { |
| "content": "__khk__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256123": { |
| "content": "__mos__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256124": { |
| "content": "__mri__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256125": { |
| "content": "__zsm__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256126": { |
| "content": "__mya__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256127": { |
| "content": "__nld__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256128": { |
| "content": "__nno__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256129": { |
| "content": "__nob__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256130": { |
| "content": "__npi__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256131": { |
| "content": "__nso__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256132": { |
| "content": "__nus__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256133": { |
| "content": "__nya__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256134": { |
| "content": "__oci__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256135": { |
| "content": "__gaz__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256136": { |
| "content": "__ory__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256137": { |
| "content": "__pag__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256138": { |
| "content": "__pan__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256139": { |
| "content": "__pap__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256140": { |
| "content": "__pol__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256141": { |
| "content": "__por__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256142": { |
| "content": "__prs__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256143": { |
| "content": "__pbt__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256144": { |
| "content": "__quy__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256145": { |
| "content": "__ron__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256146": { |
| "content": "__run__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256147": { |
| "content": "__rus__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256148": { |
| "content": "__sag__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256149": { |
| "content": "__san__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256150": { |
| "content": "__sat__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256151": { |
| "content": "__scn__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256152": { |
| "content": "__shn__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256153": { |
| "content": "__sin__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256154": { |
| "content": "__slk__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256155": { |
| "content": "__slv__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256156": { |
| "content": "__smo__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256157": { |
| "content": "__sna__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256158": { |
| "content": "__snd__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256159": { |
| "content": "__som__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256160": { |
| "content": "__sot__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256161": { |
| "content": "__spa__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256162": { |
| "content": "__als__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256163": { |
| "content": "__srd__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256164": { |
| "content": "__srp__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256165": { |
| "content": "__ssw__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256166": { |
| "content": "__sun__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256167": { |
| "content": "__swe__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256168": { |
| "content": "__swh__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256169": { |
| "content": "__szl__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256170": { |
| "content": "__tam__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256171": { |
| "content": "__tat__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256172": { |
| "content": "__tel__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256173": { |
| "content": "__tgk__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256174": { |
| "content": "__tgl__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256175": { |
| "content": "__tha__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256176": { |
| "content": "__tir__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256177": { |
| "content": "__taq__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256178": { |
| "content": "__taq_Tfng__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256179": { |
| "content": "__tpi__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256180": { |
| "content": "__tsn__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256181": { |
| "content": "__tso__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256182": { |
| "content": "__tuk__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256183": { |
| "content": "__tum__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256184": { |
| "content": "__tur__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256185": { |
| "content": "__twi__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256186": { |
| "content": "__tzm__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256187": { |
| "content": "__uig__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256188": { |
| "content": "__ukr__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256189": { |
| "content": "__umb__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256190": { |
| "content": "__urd__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256191": { |
| "content": "__uzn__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256192": { |
| "content": "__vec__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256193": { |
| "content": "__vie__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256194": { |
| "content": "__war__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256195": { |
| "content": "__wol__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256196": { |
| "content": "__xho__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256197": { |
| "content": "__ydd__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256198": { |
| "content": "__yor__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256199": { |
| "content": "__yue__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256200": { |
| "content": "__cmn__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256201": { |
| "content": "__cmn_Hant__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256202": { |
| "content": "__zul__", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256203": { |
| "content": "<MINED_DATA>", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256204": { |
| "content": "<MMT_BT_DATA>", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| }, |
| "256205": { |
| "content": "<SMT_BT_DATA>", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": true |
| } |
| }, |
| "additional_special_tokens": [ |
| "__ace__", |
| "__ace_Latn__", |
| "__acm__", |
| "__acq__", |
| "__aeb__", |
| "__afr__", |
| "__ajp__", |
| "__aka__", |
| "__amh__", |
| "__apc__", |
| "__arb__", |
| "__ars__", |
| "__ary__", |
| "__arz__", |
| "__asm__", |
| "__ast__", |
| "__awa__", |
| "__ayr__", |
| "__azb__", |
| "__azj__", |
| "__bak__", |
| "__bam__", |
| "__ban__", |
| "__bel__", |
| "__bem__", |
| "__ben__", |
| "__bho__", |
| "__bjn__", |
| "__bjn_Latn__", |
| "__bod__", |
| "__bos__", |
| "__bug__", |
| "__bul__", |
| "__cat__", |
| "__ceb__", |
| "__ces__", |
| "__cjk__", |
| "__ckb__", |
| "__crh__", |
| "__cym__", |
| "__dan__", |
| "__deu__", |
| "__dik__", |
| "__dyu__", |
| "__dzo__", |
| "__ell__", |
| "__eng__", |
| "__epo__", |
| "__est__", |
| "__eus__", |
| "__ewe__", |
| "__fao__", |
| "__pes__", |
| "__fij__", |
| "__fin__", |
| "__fon__", |
| "__fra__", |
| "__fur__", |
| "__fuv__", |
| "__gla__", |
| "__gle__", |
| "__glg__", |
| "__grn__", |
| "__guj__", |
| "__hat__", |
| "__hau__", |
| "__heb__", |
| "__hin__", |
| "__hne__", |
| "__hrv__", |
| "__hun__", |
| "__hye__", |
| "__ibo__", |
| "__ilo__", |
| "__ind__", |
| "__isl__", |
| "__ita__", |
| "__jav__", |
| "__jpn__", |
| "__kab__", |
| "__kac__", |
| "__kam__", |
| "__kan__", |
| "__kas__", |
| "__kas_Deva__", |
| "__kat__", |
| "__knc__", |
| "__knc_Latn__", |
| "__kaz__", |
| "__kbp__", |
| "__kea__", |
| "__khm__", |
| "__kik__", |
| "__kin__", |
| "__kir__", |
| "__kmb__", |
| "__kon__", |
| "__kor__", |
| "__kmr__", |
| "__lao__", |
| "__lvs__", |
| "__lij__", |
| "__lim__", |
| "__lin__", |
| "__lit__", |
| "__lmo__", |
| "__ltg__", |
| "__ltz__", |
| "__lua__", |
| "__lug__", |
| "__luo__", |
| "__lus__", |
| "__mag__", |
| "__mai__", |
| "__mal__", |
| "__mar__", |
| "__min__", |
| "__mkd__", |
| "__plt__", |
| "__mlt__", |
| "__mni__", |
| "__khk__", |
| "__mos__", |
| "__mri__", |
| "__zsm__", |
| "__mya__", |
| "__nld__", |
| "__nno__", |
| "__nob__", |
| "__npi__", |
| "__nso__", |
| "__nus__", |
| "__nya__", |
| "__oci__", |
| "__gaz__", |
| "__ory__", |
| "__pag__", |
| "__pan__", |
| "__pap__", |
| "__pol__", |
| "__por__", |
| "__prs__", |
| "__pbt__", |
| "__quy__", |
| "__ron__", |
| "__run__", |
| "__rus__", |
| "__sag__", |
| "__san__", |
| "__sat__", |
| "__scn__", |
| "__shn__", |
| "__sin__", |
| "__slk__", |
| "__slv__", |
| "__smo__", |
| "__sna__", |
| "__snd__", |
| "__som__", |
| "__sot__", |
| "__spa__", |
| "__als__", |
| "__srd__", |
| "__srp__", |
| "__ssw__", |
| "__sun__", |
| "__swe__", |
| "__swh__", |
| "__szl__", |
| "__tam__", |
| "__tat__", |
| "__tel__", |
| "__tgk__", |
| "__tgl__", |
| "__tha__", |
| "__tir__", |
| "__taq__", |
| "__taq_Tfng__", |
| "__tpi__", |
| "__tsn__", |
| "__tso__", |
| "__tuk__", |
| "__tum__", |
| "__tur__", |
| "__twi__", |
| "__tzm__", |
| "__uig__", |
| "__ukr__", |
| "__umb__", |
| "__urd__", |
| "__uzn__", |
| "__vec__", |
| "__vie__", |
| "__war__", |
| "__wol__", |
| "__xho__", |
| "__ydd__", |
| "__yor__", |
| "__yue__", |
| "__cmn__", |
| "__cmn_Hant__", |
| "__zul__", |
| "<MINED_DATA>", |
| "<MMT_BT_DATA>", |
| "<SMT_BT_DATA>" |
| ], |
| "bos_token": "<s>", |
| "clean_up_tokenization_spaces": true, |
| "cls_token": "<s>", |
| "eos_token": "</s>", |
| "language_code": [ |
| "ace", |
| "ace_Latn", |
| "acm", |
| "acq", |
| "aeb", |
| "afr", |
| "ajp", |
| "aka", |
| "amh", |
| "apc", |
| "arb", |
| "ars", |
| "ary", |
| "arz", |
| "asm", |
| "ast", |
| "awa", |
| "ayr", |
| "azb", |
| "azj", |
| "bak", |
| "bam", |
| "ban", |
| "bel", |
| "bem", |
| "ben", |
| "bho", |
| "bjn", |
| "bjn_Latn", |
| "bod", |
| "bos", |
| "bug", |
| "bul", |
| "cat", |
| "ceb", |
| "ces", |
| "cjk", |
| "ckb", |
| "crh", |
| "cym", |
| "dan", |
| "deu", |
| "dik", |
| "dyu", |
| "dzo", |
| "ell", |
| "eng", |
| "epo", |
| "est", |
| "eus", |
| "ewe", |
| "fao", |
| "pes", |
| "fij", |
| "fin", |
| "fon", |
| "fra", |
| "fur", |
| "fuv", |
| "gla", |
| "gle", |
| "glg", |
| "grn", |
| "guj", |
| "hat", |
| "hau", |
| "heb", |
| "hin", |
| "hne", |
| "hrv", |
| "hun", |
| "hye", |
| "ibo", |
| "ilo", |
| "ind", |
| "isl", |
| "ita", |
| "jav", |
| "jpn", |
| "kab", |
| "kac", |
| "kam", |
| "kan", |
| "kas", |
| "kas_Deva", |
| "kat", |
| "knc", |
| "knc_Latn", |
| "kaz", |
| "kbp", |
| "kea", |
| "khm", |
| "kik", |
| "kin", |
| "kir", |
| "kmb", |
| "kon", |
| "kor", |
| "kmr", |
| "lao", |
| "lvs", |
| "lij", |
| "lim", |
| "lin", |
| "lit", |
| "lmo", |
| "ltg", |
| "ltz", |
| "lua", |
| "lug", |
| "luo", |
| "lus", |
| "mag", |
| "mai", |
| "mal", |
| "mar", |
| "min", |
| "mkd", |
| "plt", |
| "mlt", |
| "mni", |
| "khk", |
| "mos", |
| "mri", |
| "zsm", |
| "mya", |
| "nld", |
| "nno", |
| "nob", |
| "npi", |
| "nso", |
| "nus", |
| "nya", |
| "oci", |
| "gaz", |
| "ory", |
| "pag", |
| "pan", |
| "pap", |
| "pol", |
| "por", |
| "prs", |
| "pbt", |
| "quy", |
| "ron", |
| "run", |
| "rus", |
| "sag", |
| "san", |
| "sat", |
| "scn", |
| "shn", |
| "sin", |
| "slk", |
| "slv", |
| "smo", |
| "sna", |
| "snd", |
| "som", |
| "sot", |
| "spa", |
| "als", |
| "srd", |
| "srp", |
| "ssw", |
| "sun", |
| "swe", |
| "swh", |
| "szl", |
| "tam", |
| "tat", |
| "tel", |
| "tgk", |
| "tgl", |
| "tha", |
| "tir", |
| "taq", |
| "taq_Tfng", |
| "tpi", |
| "tsn", |
| "tso", |
| "tuk", |
| "tum", |
| "tur", |
| "twi", |
| "tzm", |
| "uig", |
| "ukr", |
| "umb", |
| "urd", |
| "uzn", |
| "vec", |
| "vie", |
| "war", |
| "wol", |
| "xho", |
| "ydd", |
| "yor", |
| "yue", |
| "cmn", |
| "cmn_Hant", |
| "zul" |
| ], |
| "model_max_length": 1000000000000000019884624838656, |
| "pad_token": "<pad>", |
| "processor_class": "SeamlessM4TProcessor", |
| "sep_token": "</s>", |
| "sp_model_kwargs": {}, |
| "src_lang": "__eng__", |
| "tgt_lang": "__fra__", |
| "tokenizer_class": "SeamlessM4TTokenizer", |
| "tokenizer_file": null, |
| "unk_token": "<unk>" |
| } |
|
|