chnaaam commited on
Commit
ab891e2
·
1 Parent(s): 26860cf

add tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"[LABEL10]": 32013, "[LABEL13]": 32016, "[LABEL7]": 32010, "[LABEL14]": 32017, "[LABEL1]": 32004, "[LABEL18]": 32021, "[LABEL19]": 32022, "[LABEL29]": 32032, "[E2]": 32002, "[LABEL12]": 32015, "[LABEL27]": 32030, "[LABEL23]": 32026, "[LABEL21]": 32024, "[LABEL15]": 32018, "[LABEL20]": 32023, "[LABEL28]": 32031, "[LABEL9]": 32012, "[LABEL30]": 32033, "[LABEL6]": 32009, "[LABEL4]": 32007, "[LABEL3]": 32006, "[LABEL17]": 32020, "[LABEL24]": 32027, "[LABEL16]": 32019, "[LABEL26]": 32029, "[/E1]": 32001, "[LABEL22]": 32025, "[/E2]": 32003, "[LABEL2]": 32005, "[LABEL8]": 32011, "[E1]": 32000, "[LABEL11]": 32014, "[LABEL5]": 32008, "[LABEL25]": 32028}
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "additional_special_tokens": ["[E1]", "[/E1]", "[E2]", "[/E2]", "[LABEL1]", "[LABEL2]", "[LABEL3]", "[LABEL4]", "[LABEL5]", "[LABEL6]", "[LABEL7]", "[LABEL8]", "[LABEL9]", "[LABEL10]", "[LABEL11]", "[LABEL12]", "[LABEL13]", "[LABEL14]", "[LABEL15]", "[LABEL16]", "[LABEL17]", "[LABEL18]", "[LABEL19]", "[LABEL20]", "[LABEL21]", "[LABEL22]", "[LABEL23]", "[LABEL24]", "[LABEL25]", "[LABEL26]", "[LABEL27]", "[LABEL28]", "[LABEL29]", "[LABEL30]"]}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": false, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "do_basic_tokenize": true, "never_split": null, "bos_token": "[CLS]", "eos_token": "[SEP]", "model_max_length": 512, "special_tokens_map_file": "/home/work/.cache/huggingface/transformers/9d0c87e44b00acfbfbae931b2e4068eb6311a0c3e71e23e5400bdf57cab4bfbf.70c17d6e4d492c8f24f5bb97ab56c7f272e947112c6faf9dd846da42ba13eb23", "name_or_path": "./", "tokenizer_class": "BertTokenizer"}
vocab.txt ADDED
The diff for this file is too large to render. See raw diff