Rofla commited on
Commit
9bbca68
·
verified ·
1 Parent(s): 0a6d00c

Delete tokenizer

Browse files
tokenizer/merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
tokenizer/special_tokens_map.json DELETED
@@ -1,15 +0,0 @@
1
- {
2
- "bos_token": "<s>",
3
- "cls_token": "<s>",
4
- "eos_token": "</s>",
5
- "mask_token": {
6
- "content": "<mask>",
7
- "lstrip": true,
8
- "normalized": false,
9
- "rstrip": false,
10
- "single_word": false
11
- },
12
- "pad_token": "<pad>",
13
- "sep_token": "</s>",
14
- "unk_token": "<unk>"
15
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tokenizer/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
tokenizer/tokenizer_config.json DELETED
@@ -1,20 +0,0 @@
1
- {
2
- "add_prefix_space": false,
3
- "bos_token": "<s>",
4
- "clean_up_tokenization_spaces": true,
5
- "cls_token": "<s>",
6
- "eos_token": "</s>",
7
- "errors": "replace",
8
- "mask_token": "<mask>",
9
- "max_length": null,
10
- "model_max_length": 512,
11
- "pad_to_multiple_of": null,
12
- "pad_token": "<pad>",
13
- "pad_token_type_id": 0,
14
- "padding_side": "right",
15
- "processor_class": "ClapProcessor",
16
- "sep_token": "</s>",
17
- "tokenizer_class": "RobertaTokenizer",
18
- "trim_offsets": true,
19
- "unk_token": "<unk>"
20
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tokenizer/vocab.json DELETED
The diff for this file is too large to render. See raw diff