styalai commited on
Commit
d261a41
·
verified ·
1 Parent(s): e095ed4

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +9 -3
  2. tokenizer.json +18 -0
  3. tokenizer_config.json +17 -1
special_tokens_map.json CHANGED
@@ -1,5 +1,11 @@
1
  {
2
- "bos_token": "<|user|>",
 
 
 
 
 
 
3
  "cls_token": {
4
  "content": "<s>",
5
  "lstrip": false,
@@ -8,9 +14,9 @@
8
  "single_word": false
9
  },
10
  "eos_token": {
11
- "content": "</s>",
12
  "lstrip": false,
13
- "normalized": true,
14
  "rstrip": false,
15
  "single_word": false
16
  },
 
1
  {
2
+ "bos_token": {
3
+ "content": "<|user|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
  "cls_token": {
10
  "content": "<s>",
11
  "lstrip": false,
 
14
  "single_word": false
15
  },
16
  "eos_token": {
17
+ "content": "<|end|>",
18
  "lstrip": false,
19
+ "normalized": false,
20
  "rstrip": false,
21
  "single_word": false
22
  },
tokenizer.json CHANGED
@@ -3,6 +3,24 @@
3
  "truncation": null,
4
  "padding": null,
5
  "added_tokens": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  {
7
  "id": 8000,
8
  "content": "<s>",
 
3
  "truncation": null,
4
  "padding": null,
5
  "added_tokens": [
6
+ {
7
+ "id": 2,
8
+ "content": "<|end|>",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 3,
17
+ "content": "<|user|>",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
  {
25
  "id": 8000,
26
  "content": "<s>",
tokenizer_config.json CHANGED
@@ -1,6 +1,22 @@
1
  {
2
  "add_prefix_space": false,
3
  "added_tokens_decoder": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  "8000": {
5
  "content": "<s>",
6
  "lstrip": false,
@@ -45,7 +61,7 @@
45
  "bos_token": "<|user|>",
46
  "clean_up_tokenization_spaces": true,
47
  "cls_token": "<s>",
48
- "eos_token": "</s>",
49
  "errors": "replace",
50
  "mask_token": "<mask>",
51
  "model_max_length": 1000000000000000019884624838656,
 
1
  {
2
  "add_prefix_space": false,
3
  "added_tokens_decoder": {
4
+ "2": {
5
+ "content": "<|end|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "3": {
13
+ "content": "<|user|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
  "8000": {
21
  "content": "<s>",
22
  "lstrip": false,
 
61
  "bos_token": "<|user|>",
62
  "clean_up_tokenization_spaces": true,
63
  "cls_token": "<s>",
64
+ "eos_token": "<|end|>",
65
  "errors": "replace",
66
  "mask_token": "<mask>",
67
  "model_max_length": 1000000000000000019884624838656,