Upload tokenizer

#1
by Cratee - opened
Files changed (3) hide show
  1. merges.txt +1 -1
  2. tokenizer.json +2 -6
  3. tokenizer_config.json +1 -2
merges.txt CHANGED
@@ -1,4 +1,4 @@
1
- #version: 0.2 - Trained by `huggingface/tokenizers`
2
  Ġ t
3
  Ġ a
4
  h e
 
1
+ #version: 0.2
2
  Ġ t
3
  Ġ a
4
  h e
tokenizer.json CHANGED
@@ -1,11 +1,6 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 512,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
  "padding": null,
10
  "added_tokens": [
11
  {
@@ -87,6 +82,7 @@
87
  "continuing_subword_prefix": "",
88
  "end_of_word_suffix": "",
89
  "fuse_unk": false,
 
90
  "vocab": {
91
  "<s>": 0,
92
  "<pad>": 1,
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
 
82
  "continuing_subword_prefix": "",
83
  "end_of_word_suffix": "",
84
  "fuse_unk": false,
85
+ "byte_fallback": false,
86
  "vocab": {
87
  "<s>": 0,
88
  "<pad>": 1,
tokenizer_config.json CHANGED
@@ -1,15 +1,14 @@
1
  {
2
  "add_prefix_space": false,
3
  "bos_token": "<s>",
 
4
  "cls_token": "<s>",
5
  "eos_token": "</s>",
6
  "errors": "replace",
7
  "mask_token": "<mask>",
8
  "model_max_length": 1024,
9
- "name_or_path": "facebook/bart-large",
10
  "pad_token": "<pad>",
11
  "sep_token": "</s>",
12
- "special_tokens_map_file": null,
13
  "tokenizer_class": "BartTokenizer",
14
  "trim_offsets": true,
15
  "unk_token": "<unk>"
 
1
  {
2
  "add_prefix_space": false,
3
  "bos_token": "<s>",
4
+ "clean_up_tokenization_spaces": true,
5
  "cls_token": "<s>",
6
  "eos_token": "</s>",
7
  "errors": "replace",
8
  "mask_token": "<mask>",
9
  "model_max_length": 1024,
 
10
  "pad_token": "<pad>",
11
  "sep_token": "</s>",
 
12
  "tokenizer_class": "BartTokenizer",
13
  "trim_offsets": true,
14
  "unk_token": "<unk>"