kerzgrr commited on
Commit
28727e9
·
verified ·
1 Parent(s): 3813dea

Upload 8 files

Browse files
Files changed (3) hide show
  1. merges.txt +0 -0
  2. tokenizer_config.json +2 -1
  3. vocab.json +0 -0
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,11 +1,12 @@
1
  {
2
- "tokenizer_class": "PreTrainedTokenizerFast",
3
  "tokenizer_file": "tokenizer.json",
4
  "model_max_length": 1024,
5
  "add_prefix_space": true,
6
  "clean_up_tokenization_spaces": false,
7
  "bos_token": "<|begin_of_text|>",
8
  "eos_token": "<|eot_id|>",
 
9
  "pad_token": "<|pad|>",
10
  "unk_token": "<|unk|>",
11
  "additional_special_tokens": [
 
1
  {
2
+ "tokenizer_class": "GPT2TokenizerFast",
3
  "tokenizer_file": "tokenizer.json",
4
  "model_max_length": 1024,
5
  "add_prefix_space": true,
6
  "clean_up_tokenization_spaces": false,
7
  "bos_token": "<|begin_of_text|>",
8
  "eos_token": "<|eot_id|>",
9
+ "eot_token": "<|eot_id|>",
10
  "pad_token": "<|pad|>",
11
  "unk_token": "<|unk|>",
12
  "additional_special_tokens": [
vocab.json CHANGED
The diff for this file is too large to render. See raw diff