styalai commited on
Commit
8a4e23b
·
verified ·
1 Parent(s): 7719b9b

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -35,9 +35,9 @@
35
  "single_word": false
36
  },
37
  "sep_token": {
38
- "content": "</s>",
39
  "lstrip": false,
40
- "normalized": true,
41
  "rstrip": false,
42
  "single_word": false
43
  },
 
35
  "single_word": false
36
  },
37
  "sep_token": {
38
+ "content": "<|end|>",
39
  "lstrip": false,
40
+ "normalized": false,
41
  "rstrip": false,
42
  "single_word": false
43
  },
tokenizer.json CHANGED
@@ -3,6 +3,15 @@
3
  "truncation": null,
4
  "padding": null,
5
  "added_tokens": [
 
 
 
 
 
 
 
 
 
6
  {
7
  "id": 3,
8
  "content": "<|user|>",
 
3
  "truncation": null,
4
  "padding": null,
5
  "added_tokens": [
6
+ {
7
+ "id": 2,
8
+ "content": "<|end|>",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
  {
16
  "id": 3,
17
  "content": "<|user|>",
tokenizer_config.json CHANGED
@@ -1,6 +1,14 @@
1
  {
2
  "add_prefix_space": false,
3
  "added_tokens_decoder": {
 
 
 
 
 
 
 
 
4
  "3": {
5
  "content": "<|user|>",
6
  "lstrip": false,
@@ -74,7 +82,7 @@
74
  "mask_token": "<mask>",
75
  "model_max_length": 1000000000000000019884624838656,
76
  "pad_token": "<pad>",
77
- "sep_token": "</s>",
78
  "tokenizer_class": "RobertaTokenizer",
79
  "trim_offsets": true,
80
  "unk_token": "<unk>"
 
1
  {
2
  "add_prefix_space": false,
3
  "added_tokens_decoder": {
4
+ "2": {
5
+ "content": "<|end|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
  "3": {
13
  "content": "<|user|>",
14
  "lstrip": false,
 
82
  "mask_token": "<mask>",
83
  "model_max_length": 1000000000000000019884624838656,
84
  "pad_token": "<pad>",
85
+ "sep_token": "<|end|>",
86
  "tokenizer_class": "RobertaTokenizer",
87
  "trim_offsets": true,
88
  "unk_token": "<unk>"