bps-indosbert-base-p2 / tokenizer_config.json
yahyaabd's picture
Upload folder using huggingface_hub
504f3cf verified
{
"added_tokens_decoder": {
"0": {
"content": "[PAD]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "[UNK]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "[CLS]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"3": {
"content": "[SEP]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"4": {
"content": "[MASK]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30521": {
"content": "BPS",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30522": {
"content": "Badan Pusat Statistik",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30523": {
"content": "PDRB",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30524": {
"content": "PDB",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30525": {
"content": "IHK",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30526": {
"content": "IPH",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30527": {
"content": "IHP",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30528": {
"content": "IHPB",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30529": {
"content": "NTP",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30530": {
"content": "ITK",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30531": {
"content": "ITB",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30532": {
"content": "IMK",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30533": {
"content": "LPE",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30534": {
"content": "TPK",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30535": {
"content": "TPT",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30536": {
"content": "UMP",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30537": {
"content": "IPM",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30538": {
"content": "SP2020",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30539": {
"content": "SUPAS",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30540": {
"content": "SDKI",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30541": {
"content": "SDGI",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30542": {
"content": "KB",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30543": {
"content": "HLS",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30544": {
"content": "RLS",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30545": {
"content": "SUTAS",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30546": {
"content": "ST2023",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30547": {
"content": "ST2013",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30548": {
"content": "LTT",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30549": {
"content": "LTN",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30550": {
"content": "NTUP",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30551": {
"content": "BPP",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30552": {
"content": "Ekspor",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30553": {
"content": "Impor",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30554": {
"content": "HS",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30555": {
"content": "FOB",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30556": {
"content": "CIF",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30557": {
"content": "Sakernas",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30558": {
"content": "TPAK",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30559": {
"content": "PKL",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30560": {
"content": "KKL",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30561": {
"content": "KKI",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30562": {
"content": "KCI",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30563": {
"content": "KRT",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30564": {
"content": "RT",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30565": {
"content": "RW",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30566": {
"content": "BRS",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30567": {
"content": "Publikasi",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30568": {
"content": "_Statistik",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30569": {
"content": "Sektoral",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30570": {
"content": "Metadata",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30571": {
"content": "Katalog",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30572": {
"content": "Survei",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30573": {
"content": "EKSPOR",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30574": {
"content": "IMPOR",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30575": {
"content": "SAKERNAS",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30576": {
"content": "PUBLIKASI",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30577": {
"content": "KEGIATAN",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30578": {
"content": "STATISTIK",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30579": {
"content": "SEKTORAL",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30580": {
"content": "KATALOG",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30581": {
"content": "SURVEI",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30582": {
"content": "pdrb",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30583": {
"content": "ihk",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30584": {
"content": "iph",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30585": {
"content": "ntp",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30586": {
"content": "itk",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30587": {
"content": "imk",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30588": {
"content": "lpe",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30589": {
"content": "tpk",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30590": {
"content": "tpt",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30591": {
"content": "ipm",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30592": {
"content": "sp2020",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30593": {
"content": "supas",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30594": {
"content": "sdki",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30595": {
"content": "sdgi",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30596": {
"content": "hls",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30597": {
"content": "rls",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30598": {
"content": "sutas",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30599": {
"content": "st2023",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30600": {
"content": "st2013",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30601": {
"content": "ltt",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30602": {
"content": "ltn",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30603": {
"content": "ntup",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30604": {
"content": "bpp",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30605": {
"content": "fob",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30606": {
"content": "cif",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30607": {
"content": "sakernas",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30608": {
"content": "tpak",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30609": {
"content": "kkl",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30610": {
"content": "kki",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30611": {
"content": "kci",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30612": {
"content": "krt",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30613": {
"content": "brs",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30614": {
"content": "sektoral",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30615": {
"content": "metadata",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30616": {
"content": "SP2010",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30617": {
"content": "Statistik",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30618": {
"content": "SUSENAS",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30619": {
"content": "SHK",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30620": {
"content": "SP",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30621": {
"content": "SE",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30622": {
"content": "ST",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30623": {
"content": "Podes",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30624": {
"content": "IBS",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30625": {
"content": "SKHI",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30626": {
"content": "GK",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30627": {
"content": "METADATA",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30628": {
"content": "PODES",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30629": {
"content": "ihp",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30630": {
"content": "ihpb",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30631": {
"content": "sp2010",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30632": {
"content": "susenas",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30633": {
"content": "shk",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30634": {
"content": "podes",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30635": {
"content": "ibs",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30636": {
"content": "skhi",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30637": {
"content": "GDP",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30638": {
"content": "GRDP",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30639": {
"content": "CPI",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30640": {
"content": "IPP",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30641": {
"content": "HDI",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30642": {
"content": "UHC",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30643": {
"content": "MYS",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30644": {
"content": "LFPR",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30645": {
"content": "MDGs",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30646": {
"content": "SDGs",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30647": {
"content": "LFS",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30648": {
"content": "BOP",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30649": {
"content": "CMR",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30650": {
"content": "IMR",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30651": {
"content": "U5MR",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30652": {
"content": "MMR",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30653": {
"content": "TFR",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30654": {
"content": "ASFR",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
}
},
"clean_up_tokenization_spaces": true,
"cls_token": "[CLS]",
"do_basic_tokenize": true,
"do_lower_case": true,
"extra_special_tokens": {},
"mask_token": "[MASK]",
"model_max_length": 512,
"never_split": null,
"pad_token": "[PAD]",
"sep_token": "[SEP]",
"strip_accents": null,
"tokenize_chinese_chars": true,
"tokenizer_class": "BertTokenizer",
"unk_token": "[UNK]"
}