junaidamk commited on
Commit
9e5697e
·
1 Parent(s): c0cb425

add tokenizer

Browse files
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
tokenizer.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [],
6
+ "normalizer": null,
7
+ "pre_tokenizer": null,
8
+ "post_processor": null,
9
+ "decoder": null,
10
+ "model": {
11
+ "type": "WordPiece",
12
+ "unk_token": "[UNK]",
13
+ "continuing_subword_prefix": "##",
14
+ "max_input_chars_per_word": 100,
15
+ "vocab": {}
16
+ }
17
+ }
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"tokenizer_class": "PreTrainedTokenizerFast"}