AlexWortega commited on
Commit
49b337f
·
1 Parent(s): dba0237

Upload tokenizer

Browse files
merges.txt CHANGED
@@ -1,4 +1,4 @@
1
- #version: 0.2 - Trained by `huggingface/tokenizers`
2
  Ġ Ð
3
  Ð ¾
4
  Ð µ
 
1
+ #version: 0.2
2
  Ġ Ð
3
  Ð ¾
4
  Ð µ
special_tokens_map.json CHANGED
@@ -20,6 +20,7 @@
20
  "rstrip": false,
21
  "single_word": false
22
  },
 
23
  "unk_token": {
24
  "content": "<|endoftext|>",
25
  "lstrip": false,
 
20
  "rstrip": false,
21
  "single_word": false
22
  },
23
+ "pad_token": "<|endoftext|>",
24
  "unk_token": {
25
  "content": "<|endoftext|>",
26
  "lstrip": false,
tokenizer.json CHANGED
@@ -84,6 +84,7 @@
84
  "continuing_subword_prefix": "",
85
  "end_of_word_suffix": "",
86
  "fuse_unk": false,
 
87
  "vocab": {
88
  "<pad>": 0,
89
  "<s>": 1,
 
84
  "continuing_subword_prefix": "",
85
  "end_of_word_suffix": "",
86
  "fuse_unk": false,
87
+ "byte_fallback": false,
88
  "vocab": {
89
  "<pad>": 0,
90
  "<s>": 1,
tokenizer_config.json CHANGED
@@ -20,6 +20,7 @@
20
  "errors": "replace",
21
  "model_max_length": 1000000000000000019884624838656,
22
  "pad_token": null,
 
23
  "special_tokens_map_file": null,
24
  "tokenizer_class": "GPT2Tokenizer",
25
  "unk_token": {
 
20
  "errors": "replace",
21
  "model_max_length": 1000000000000000019884624838656,
22
  "pad_token": null,
23
+ "padding_side": "left",
24
  "special_tokens_map_file": null,
25
  "tokenizer_class": "GPT2Tokenizer",
26
  "unk_token": {