infi commited on
Commit
a1de04e
·
1 Parent(s): 859935e

Upload tokenizer

Browse files
Files changed (2) hide show
  1. added_tokens.json +12 -1
  2. tokenizer_config.json +88 -0
added_tokens.json CHANGED
@@ -2,5 +2,16 @@
2
  "\n": 64001,
3
  "<mask>": 64000,
4
  "<token_echap>": 64003,
5
- "<token_schap>": 64002
 
 
 
 
 
 
 
 
 
 
 
6
  }
 
2
  "\n": 64001,
3
  "<mask>": 64000,
4
  "<token_echap>": 64003,
5
+ "<token_schap>": 64002,
6
+ "<|answer|>": 64010,
7
+ "<|chap|>": 64004,
8
+ "<|endbox|>": 64008,
9
+ "<|para|>": 64009,
10
+ "<|question|>": 64012,
11
+ "<|section|>": 64005,
12
+ "<|startbox|>": 64007,
13
+ "<|subsection|>": 64006,
14
+ "<|teaser|>": 64014,
15
+ "<|title|>": 64011,
16
+ "<|topic|>": 64013
17
  }
tokenizer_config.json CHANGED
@@ -63,6 +63,94 @@
63
  "rstrip": false,
64
  "single_word": false,
65
  "special": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  }
67
  },
68
  "bos_token": "<s>",
 
63
  "rstrip": false,
64
  "single_word": false,
65
  "special": false
66
+ },
67
+ "64004": {
68
+ "content": "<|chap|>",
69
+ "lstrip": false,
70
+ "normalized": true,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": false
74
+ },
75
+ "64005": {
76
+ "content": "<|section|>",
77
+ "lstrip": false,
78
+ "normalized": true,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": false
82
+ },
83
+ "64006": {
84
+ "content": "<|subsection|>",
85
+ "lstrip": false,
86
+ "normalized": true,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": false
90
+ },
91
+ "64007": {
92
+ "content": "<|startbox|>",
93
+ "lstrip": false,
94
+ "normalized": true,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": false
98
+ },
99
+ "64008": {
100
+ "content": "<|endbox|>",
101
+ "lstrip": false,
102
+ "normalized": true,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": false
106
+ },
107
+ "64009": {
108
+ "content": "<|para|>",
109
+ "lstrip": false,
110
+ "normalized": true,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": false
114
+ },
115
+ "64010": {
116
+ "content": "<|answer|>",
117
+ "lstrip": false,
118
+ "normalized": true,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": false
122
+ },
123
+ "64011": {
124
+ "content": "<|title|>",
125
+ "lstrip": false,
126
+ "normalized": true,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": false
130
+ },
131
+ "64012": {
132
+ "content": "<|question|>",
133
+ "lstrip": false,
134
+ "normalized": true,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": false
138
+ },
139
+ "64013": {
140
+ "content": "<|topic|>",
141
+ "lstrip": false,
142
+ "normalized": true,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": false
146
+ },
147
+ "64014": {
148
+ "content": "<|teaser|>",
149
+ "lstrip": false,
150
+ "normalized": true,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": false
154
  }
155
  },
156
  "bos_token": "<s>",