rufeshe commited on
Commit
f008ee8
·
verified ·
1 Parent(s): 3e608cc

Upload tokenizer

Browse files
Files changed (2) hide show
  1. README.md +3 -0
  2. tokenizer_config.json +9 -1
README.md CHANGED
@@ -1,3 +1,6 @@
 
 
 
1
  library_name: transformers
2
  tags:
3
  - amharic
 
1
+ ---
2
+ {}
3
+ ---
4
  library_name: transformers
5
  tags:
6
  - amharic
tokenizer_config.json CHANGED
@@ -45,10 +45,18 @@
45
  "clean_up_tokenization_spaces": true,
46
  "cls_token": "<s>",
47
  "eos_token": "</s>",
 
48
  "mask_token": "<mask>",
 
49
  "model_max_length": 512,
 
50
  "pad_token": "<pad>",
 
 
51
  "sep_token": "</s>",
52
- "tokenizer_class": "XLMRobertaTokenizer",
 
 
 
53
  "unk_token": "<unk>"
54
  }
 
45
  "clean_up_tokenization_spaces": true,
46
  "cls_token": "<s>",
47
  "eos_token": "</s>",
48
+ "extra_special_tokens": {},
49
  "mask_token": "<mask>",
50
+ "max_length": 128,
51
  "model_max_length": 512,
52
+ "pad_to_multiple_of": null,
53
  "pad_token": "<pad>",
54
+ "pad_token_type_id": 0,
55
+ "padding_side": "right",
56
  "sep_token": "</s>",
57
+ "stride": 0,
58
+ "tokenizer_class": "XLMRobertaTokenizerFast",
59
+ "truncation_side": "right",
60
+ "truncation_strategy": "longest_first",
61
  "unk_token": "<unk>"
62
  }