EXCO123 commited on
Commit
8f02703
·
verified ·
1 Parent(s): 8b535b3

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +2 -2
  2. tokenizer_config.json +7 -0
tokenizer.json CHANGED
@@ -2,13 +2,13 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 600,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
  "padding": {
10
  "strategy": {
11
- "Fixed": 600
12
  },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 512,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
  "padding": {
10
  "strategy": {
11
+ "Fixed": 512
12
  },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
tokenizer_config.json CHANGED
@@ -147,9 +147,16 @@
147
  "clean_up_tokenization_spaces": false,
148
  "eos_token": "<|im_end|>",
149
  "extra_special_tokens": {},
 
150
  "model_max_length": 8192,
 
151
  "pad_token": "<|im_end|>",
 
 
 
152
  "tokenizer_class": "GPT2Tokenizer",
 
 
153
  "unk_token": "<|endoftext|>",
154
  "vocab_size": 49152
155
  }
 
147
  "clean_up_tokenization_spaces": false,
148
  "eos_token": "<|im_end|>",
149
  "extra_special_tokens": {},
150
+ "max_length": 600,
151
  "model_max_length": 8192,
152
+ "pad_to_multiple_of": null,
153
  "pad_token": "<|im_end|>",
154
+ "pad_token_type_id": 0,
155
+ "padding_side": "right",
156
+ "stride": 0,
157
  "tokenizer_class": "GPT2Tokenizer",
158
+ "truncation_side": "right",
159
+ "truncation_strategy": "longest_first",
160
  "unk_token": "<|endoftext|>",
161
  "vocab_size": 49152
162
  }