daje commited on
Commit
bbd9727
·
1 Parent(s): 711912c

Upload tokenizer

Browse files
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": {
6
+ "content": "[MASK]",
7
+ "lstrip": true,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "[PAD]",
13
+ "sep_token": "[SEP]",
14
+ "unk_token": "[UNK]"
15
+ }
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17dc471055592d3cc9e0a5831e769246a8a001a4d27551c9ed79668173c7b407
3
+ size 371427
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": null,
3
+ "bos_token": "[CLS]",
4
+ "cls_token": "[CLS]",
5
+ "do_lower_case": false,
6
+ "eos_token": "[SEP]",
7
+ "keep_accents": false,
8
+ "mask_token": {
9
+ "__type": "AddedToken",
10
+ "content": "[MASK]",
11
+ "lstrip": true,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "name_or_path": "skt/kobert-base-v1",
17
+ "pad_token": "[PAD]",
18
+ "remove_space": true,
19
+ "sep_token": "[SEP]",
20
+ "sp_model_kwargs": {},
21
+ "special_tokens_map_file": "/home/sseung/.cache/huggingface/hub/models--skt--kobert-base-v1/snapshots/a9f5849fce18fb088f0cd0f9b29ec3f756958464/special_tokens_map.json",
22
+ "tokenizer_class": "XLNetTokenizer",
23
+ "unk_token": "[UNK]"
24
+ }