Samuael commited on
Commit
5ef105c
·
1 Parent(s): a65819c

Upload tokenizer

Browse files
added_tokens.json CHANGED
@@ -1,6 +1,7 @@
1
  {
2
- "</s>": 305,
3
- "<mask>": 307,
4
- "<s>": 304,
5
- "<unk>": 306
 
6
  }
 
1
  {
2
+ "</EOS>": 304,
3
+ "</s>": 306,
4
+ "<mask>": 308,
5
+ "<s>": 307,
6
+ "<unk>": 305
7
  }
special_tokens_map.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "bos_token": {
3
- "content": "<s>",
4
  "lstrip": false,
5
  "normalized": true,
6
  "rstrip": false,
@@ -14,7 +14,7 @@
14
  "single_word": false
15
  },
16
  "eos_token": {
17
- "content": "</s>",
18
  "lstrip": false,
19
  "normalized": true,
20
  "rstrip": false,
 
1
  {
2
  "bos_token": {
3
+ "content": "<BOS>",
4
  "lstrip": false,
5
  "normalized": true,
6
  "rstrip": false,
 
14
  "single_word": false
15
  },
16
  "eos_token": {
17
+ "content": "</EOS>",
18
  "lstrip": false,
19
  "normalized": true,
20
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -9,8 +9,16 @@
9
  "single_word": false,
10
  "special": true
11
  },
 
 
 
 
 
 
 
 
12
  "304": {
13
- "content": "<s>",
14
  "lstrip": false,
15
  "normalized": true,
16
  "rstrip": false,
@@ -18,7 +26,7 @@
18
  "special": true
19
  },
20
  "305": {
21
- "content": "</s>",
22
  "lstrip": false,
23
  "normalized": true,
24
  "rstrip": false,
@@ -26,7 +34,7 @@
26
  "special": true
27
  },
28
  "306": {
29
- "content": "<unk>",
30
  "lstrip": false,
31
  "normalized": true,
32
  "rstrip": false,
@@ -34,6 +42,14 @@
34
  "special": true
35
  },
36
  "307": {
 
 
 
 
 
 
 
 
37
  "content": "<mask>",
38
  "lstrip": true,
39
  "normalized": true,
@@ -42,10 +58,10 @@
42
  "special": true
43
  }
44
  },
45
- "bos_token": "<s>",
46
  "clean_up_tokenization_spaces": true,
47
  "cls_token": "<s>",
48
- "eos_token": "</s>",
49
  "errors": "replace",
50
  "mask_token": "<mask>",
51
  "model_max_length": 1000000000000000019884624838656,
 
9
  "single_word": false,
10
  "special": true
11
  },
12
+ "2": {
13
+ "content": "<BOS>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
  "304": {
21
+ "content": "</EOS>",
22
  "lstrip": false,
23
  "normalized": true,
24
  "rstrip": false,
 
26
  "special": true
27
  },
28
  "305": {
29
+ "content": "<unk>",
30
  "lstrip": false,
31
  "normalized": true,
32
  "rstrip": false,
 
34
  "special": true
35
  },
36
  "306": {
37
+ "content": "</s>",
38
  "lstrip": false,
39
  "normalized": true,
40
  "rstrip": false,
 
42
  "special": true
43
  },
44
  "307": {
45
+ "content": "<s>",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "308": {
53
  "content": "<mask>",
54
  "lstrip": true,
55
  "normalized": true,
 
58
  "special": true
59
  }
60
  },
61
+ "bos_token": "<BOS>",
62
  "clean_up_tokenization_spaces": true,
63
  "cls_token": "<s>",
64
+ "eos_token": "</EOS>",
65
  "errors": "replace",
66
  "mask_token": "<mask>",
67
  "model_max_length": 1000000000000000019884624838656,