cipher982 commited on
Commit
d485abc
·
1 Parent(s): 9c7a101

Upload tokenizer

Browse files
Files changed (3) hide show
  1. added_tokens.json +6 -0
  2. special_tokens_map.json +7 -0
  3. tokenizer.json +42 -1
added_tokens.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "<END_A>": 32003,
3
+ "<END_Q>": 32001,
4
+ "<START_A>": 32002,
5
+ "<START_Q>": 32000
6
+ }
special_tokens_map.json CHANGED
@@ -1,4 +1,10 @@
1
  {
 
 
 
 
 
 
2
  "bos_token": {
3
  "content": "<s>",
4
  "lstrip": false,
@@ -13,6 +19,7 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
 
16
  "unk_token": {
17
  "content": "<unk>",
18
  "lstrip": false,
 
1
  {
2
+ "additional_special_tokens": [
3
+ "<START_Q>",
4
+ "<END_Q>",
5
+ "<START_A>",
6
+ "<END_A>"
7
+ ],
8
  "bos_token": {
9
  "content": "<s>",
10
  "lstrip": false,
 
19
  "rstrip": false,
20
  "single_word": false
21
  },
22
+ "pad_token": "</s>",
23
  "unk_token": {
24
  "content": "<unk>",
25
  "lstrip": false,
tokenizer.json CHANGED
@@ -1,6 +1,11 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
@@ -29,6 +34,42 @@
29
  "rstrip": false,
30
  "normalized": false,
31
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  }
33
  ],
34
  "normalizer": {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 4096,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
  "padding": null,
10
  "added_tokens": [
11
  {
 
34
  "rstrip": false,
35
  "normalized": false,
36
  "special": true
37
+ },
38
+ {
39
+ "id": 32000,
40
+ "content": "<START_Q>",
41
+ "single_word": false,
42
+ "lstrip": false,
43
+ "rstrip": false,
44
+ "normalized": false,
45
+ "special": true
46
+ },
47
+ {
48
+ "id": 32001,
49
+ "content": "<END_Q>",
50
+ "single_word": false,
51
+ "lstrip": false,
52
+ "rstrip": false,
53
+ "normalized": false,
54
+ "special": true
55
+ },
56
+ {
57
+ "id": 32002,
58
+ "content": "<START_A>",
59
+ "single_word": false,
60
+ "lstrip": false,
61
+ "rstrip": false,
62
+ "normalized": false,
63
+ "special": true
64
+ },
65
+ {
66
+ "id": 32003,
67
+ "content": "<END_A>",
68
+ "single_word": false,
69
+ "lstrip": false,
70
+ "rstrip": false,
71
+ "normalized": false,
72
+ "special": true
73
  }
74
  ],
75
  "normalizer": {