latishab commited on
Commit
3b03bd4
·
verified ·
1 Parent(s): d3188ba

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +4 -7
  2. tokenizer_config.json +0 -3
tokenizer.json CHANGED
@@ -1,13 +1,10 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 256,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
  "padding": {
10
- "strategy": "BatchLongest",
 
 
11
  "direction": "Right",
12
  "pad_to_multiple_of": null,
13
  "pad_id": 2,
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
 
 
 
 
 
4
  "padding": {
5
+ "strategy": {
6
+ "Fixed": 256
7
+ },
8
  "direction": "Right",
9
  "pad_to_multiple_of": null,
10
  "pad_id": 2,
tokenizer_config.json CHANGED
@@ -153,10 +153,7 @@
153
  "pad_token": "<|im_end|>",
154
  "pad_token_type_id": 0,
155
  "padding_side": "right",
156
- "stride": 0,
157
  "tokenizer_class": "GPT2Tokenizer",
158
- "truncation_side": "right",
159
- "truncation_strategy": "longest_first",
160
  "unk_token": "<|endoftext|>",
161
  "vocab_size": 49152
162
  }
 
153
  "pad_token": "<|im_end|>",
154
  "pad_token_type_id": 0,
155
  "padding_side": "right",
 
156
  "tokenizer_class": "GPT2Tokenizer",
 
 
157
  "unk_token": "<|endoftext|>",
158
  "vocab_size": 49152
159
  }