seoyoung commited on
Commit
80ad5ba
Β·
1 Parent(s): a1598cd

add tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +33 -0
  2. tokenizer.json +0 -0
  3. tokenizer_config.json +36 -0
special_tokens_map.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "#@μ£Όμ†Œ#",
4
+ "#@이λͺ¨ν‹°μ½˜#",
5
+ "#@이름#",
6
+ "#@URL#",
7
+ "#@μ†Œμ†#",
8
+ "#@기타#",
9
+ "#@μ „λ²ˆ#",
10
+ "#@계정#",
11
+ "#@url#",
12
+ "#@번호#",
13
+ "#@금육#",
14
+ "#@신원#",
15
+ "#@μž₯μ†Œ#",
16
+ "#@μ‹œμŠ€ν…œ#사진#",
17
+ "#@μ‹œμŠ€ν…œ#λ™μ˜μƒ#",
18
+ "#@μ‹œμŠ€ν…œ#기타#",
19
+ "#@μ‹œμŠ€ν…œ#검색#",
20
+ "#@μ‹œμŠ€ν…œ#지도#",
21
+ "#@μ‹œμŠ€ν…œ#μ‚­μ œ#",
22
+ "#@μ‹œμŠ€ν…œ#파일#",
23
+ "#@μ‹œμŠ€ν…œ#μ†‘κΈˆ#",
24
+ "#@μ‹œμŠ€ν…œ#"
25
+ ],
26
+ "bos_token": "[BOS]",
27
+ "cls_token": "[BOS]",
28
+ "eos_token": "[EOS]",
29
+ "mask_token": "[MASK]",
30
+ "pad_token": "[PAD]",
31
+ "sep_token": "[SEP]",
32
+ "unk_token": "[UNK]"
33
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "#@μ£Όμ†Œ#",
4
+ "#@이λͺ¨ν‹°μ½˜#",
5
+ "#@이름#",
6
+ "#@URL#",
7
+ "#@μ†Œμ†#",
8
+ "#@기타#",
9
+ "#@μ „λ²ˆ#",
10
+ "#@계정#",
11
+ "#@url#",
12
+ "#@번호#",
13
+ "#@금육#",
14
+ "#@신원#",
15
+ "#@μž₯μ†Œ#",
16
+ "#@μ‹œμŠ€ν…œ#사진#",
17
+ "#@μ‹œμŠ€ν…œ#λ™μ˜μƒ#",
18
+ "#@μ‹œμŠ€ν…œ#기타#",
19
+ "#@μ‹œμŠ€ν…œ#검색#",
20
+ "#@μ‹œμŠ€ν…œ#지도#",
21
+ "#@μ‹œμŠ€ν…œ#μ‚­μ œ#",
22
+ "#@μ‹œμŠ€ν…œ#파일#",
23
+ "#@μ‹œμŠ€ν…œ#μ†‘κΈˆ#",
24
+ "#@μ‹œμŠ€ν…œ#"
25
+ ],
26
+ "bos_token": "[BOS]",
27
+ "cls_token": "[BOS]",
28
+ "eos_token": "[EOS]",
29
+ "mask_token": "[MASK]",
30
+ "name_or_path": "alaggung/bart-pretrained",
31
+ "pad_token": "[PAD]",
32
+ "sep_token": "[SEP]",
33
+ "special_tokens_map_file": "/root/.cache/huggingface/transformers/846fe2996aec3b0232d18049d01f3b998ebfaead13ce84937a0541c086d50103.fa860d1b6246633ccda064e867611d04b4fa718efef676d6b5268c3b1def2231",
34
+ "tokenizer_class": "PreTrainedTokenizerFast",
35
+ "unk_token": "[UNK]"
36
+ }