Transformers
added_tokens.json CHANGED
@@ -1,23 +1,13 @@
1
  {
2
- "</cell>": 128264,
3
  "</citation>": 128013,
4
  "</code>": 128011,
5
- "</header>": 128260,
6
  "</judgement>": 128020,
7
- "</row>": 128262,
8
- "</table>": 128258,
9
  "</think>": 128009,
10
  "</tool_call>": 128005,
11
  "</tool_response>": 128007,
12
- "<cell>": 128263,
13
  "<citation>": 128012,
14
  "<code>": 128010,
15
- "<empty>": 128265,
16
- "<header>": 128259,
17
  "<judgement>": 128019,
18
- "<mask>": 128256,
19
- "<row>": 128261,
20
- "<table>": 128257,
21
  "<think>": 128008,
22
  "<tool_call>": 128004,
23
  "<tool_response>": 128006,
@@ -37,7 +27,7 @@
37
  "<|object_ref_end|>": 128016,
38
  "<|object_ref_start|>": 128015,
39
  "<|repo_name|>": 128030,
40
- "<|reserved_special_token_0|>": 128032,
41
  "<|reserved_special_token_100|>": 128132,
42
  "<|reserved_special_token_101|>": 128133,
43
  "<|reserved_special_token_102|>": 128134,
@@ -148,7 +138,7 @@
148
  "<|reserved_special_token_198|>": 128230,
149
  "<|reserved_special_token_199|>": 128231,
150
  "<|reserved_special_token_19|>": 128051,
151
- "<|reserved_special_token_1|>": 128033,
152
  "<|reserved_special_token_200|>": 128232,
153
  "<|reserved_special_token_201|>": 128233,
154
  "<|reserved_special_token_202|>": 128234,
@@ -183,7 +173,7 @@
183
  "<|reserved_special_token_27|>": 128059,
184
  "<|reserved_special_token_28|>": 128060,
185
  "<|reserved_special_token_29|>": 128061,
186
- "<|reserved_special_token_2|>": 128034,
187
  "<|reserved_special_token_30|>": 128062,
188
  "<|reserved_special_token_31|>": 128063,
189
  "<|reserved_special_token_32|>": 128064,
@@ -194,7 +184,7 @@
194
  "<|reserved_special_token_37|>": 128069,
195
  "<|reserved_special_token_38|>": 128070,
196
  "<|reserved_special_token_39|>": 128071,
197
- "<|reserved_special_token_3|>": 128035,
198
  "<|reserved_special_token_40|>": 128072,
199
  "<|reserved_special_token_41|>": 128073,
200
  "<|reserved_special_token_42|>": 128074,
@@ -205,7 +195,7 @@
205
  "<|reserved_special_token_47|>": 128079,
206
  "<|reserved_special_token_48|>": 128080,
207
  "<|reserved_special_token_49|>": 128081,
208
- "<|reserved_special_token_4|>": 128036,
209
  "<|reserved_special_token_50|>": 128082,
210
  "<|reserved_special_token_51|>": 128083,
211
  "<|reserved_special_token_52|>": 128084,
@@ -216,7 +206,7 @@
216
  "<|reserved_special_token_57|>": 128089,
217
  "<|reserved_special_token_58|>": 128090,
218
  "<|reserved_special_token_59|>": 128091,
219
- "<|reserved_special_token_5|>": 128037,
220
  "<|reserved_special_token_60|>": 128092,
221
  "<|reserved_special_token_61|>": 128093,
222
  "<|reserved_special_token_62|>": 128094,
@@ -227,7 +217,7 @@
227
  "<|reserved_special_token_67|>": 128099,
228
  "<|reserved_special_token_68|>": 128100,
229
  "<|reserved_special_token_69|>": 128101,
230
- "<|reserved_special_token_6|>": 128038,
231
  "<|reserved_special_token_70|>": 128102,
232
  "<|reserved_special_token_71|>": 128103,
233
  "<|reserved_special_token_72|>": 128104,
@@ -238,7 +228,7 @@
238
  "<|reserved_special_token_77|>": 128109,
239
  "<|reserved_special_token_78|>": 128110,
240
  "<|reserved_special_token_79|>": 128111,
241
- "<|reserved_special_token_7|>": 128039,
242
  "<|reserved_special_token_80|>": 128112,
243
  "<|reserved_special_token_81|>": 128113,
244
  "<|reserved_special_token_82|>": 128114,
@@ -249,7 +239,7 @@
249
  "<|reserved_special_token_87|>": 128119,
250
  "<|reserved_special_token_88|>": 128120,
251
  "<|reserved_special_token_89|>": 128121,
252
- "<|reserved_special_token_8|>": 128040,
253
  "<|reserved_special_token_90|>": 128122,
254
  "<|reserved_special_token_91|>": 128123,
255
  "<|reserved_special_token_92|>": 128124,
 
1
  {
 
2
  "</citation>": 128013,
3
  "</code>": 128011,
 
4
  "</judgement>": 128020,
 
 
5
  "</think>": 128009,
6
  "</tool_call>": 128005,
7
  "</tool_response>": 128007,
 
8
  "<citation>": 128012,
9
  "<code>": 128010,
 
 
10
  "<judgement>": 128019,
 
 
 
11
  "<think>": 128008,
12
  "<tool_call>": 128004,
13
  "<tool_response>": 128006,
 
27
  "<|object_ref_end|>": 128016,
28
  "<|object_ref_start|>": 128015,
29
  "<|repo_name|>": 128030,
30
+ "<|table_start|>": 128032,
31
  "<|reserved_special_token_100|>": 128132,
32
  "<|reserved_special_token_101|>": 128133,
33
  "<|reserved_special_token_102|>": 128134,
 
138
  "<|reserved_special_token_198|>": 128230,
139
  "<|reserved_special_token_199|>": 128231,
140
  "<|reserved_special_token_19|>": 128051,
141
+ "<|table_end|>": 128033,
142
  "<|reserved_special_token_200|>": 128232,
143
  "<|reserved_special_token_201|>": 128233,
144
  "<|reserved_special_token_202|>": 128234,
 
173
  "<|reserved_special_token_27|>": 128059,
174
  "<|reserved_special_token_28|>": 128060,
175
  "<|reserved_special_token_29|>": 128061,
176
+ "<|table_header_start|>": 128034,
177
  "<|reserved_special_token_30|>": 128062,
178
  "<|reserved_special_token_31|>": 128063,
179
  "<|reserved_special_token_32|>": 128064,
 
184
  "<|reserved_special_token_37|>": 128069,
185
  "<|reserved_special_token_38|>": 128070,
186
  "<|reserved_special_token_39|>": 128071,
187
+ "<|table_header_end|>": 128035,
188
  "<|reserved_special_token_40|>": 128072,
189
  "<|reserved_special_token_41|>": 128073,
190
  "<|reserved_special_token_42|>": 128074,
 
195
  "<|reserved_special_token_47|>": 128079,
196
  "<|reserved_special_token_48|>": 128080,
197
  "<|reserved_special_token_49|>": 128081,
198
+ "<|table_row_start|>": 128036,
199
  "<|reserved_special_token_50|>": 128082,
200
  "<|reserved_special_token_51|>": 128083,
201
  "<|reserved_special_token_52|>": 128084,
 
206
  "<|reserved_special_token_57|>": 128089,
207
  "<|reserved_special_token_58|>": 128090,
208
  "<|reserved_special_token_59|>": 128091,
209
+ "<|table_row_end|>": 128037,
210
  "<|reserved_special_token_60|>": 128092,
211
  "<|reserved_special_token_61|>": 128093,
212
  "<|reserved_special_token_62|>": 128094,
 
217
  "<|reserved_special_token_67|>": 128099,
218
  "<|reserved_special_token_68|>": 128100,
219
  "<|reserved_special_token_69|>": 128101,
220
+ "<|table_cell_start|>": 128038,
221
  "<|reserved_special_token_70|>": 128102,
222
  "<|reserved_special_token_71|>": 128103,
223
  "<|reserved_special_token_72|>": 128104,
 
228
  "<|reserved_special_token_77|>": 128109,
229
  "<|reserved_special_token_78|>": 128110,
230
  "<|reserved_special_token_79|>": 128111,
231
+ "<|table_cell_end|>": 128039,
232
  "<|reserved_special_token_80|>": 128112,
233
  "<|reserved_special_token_81|>": 128113,
234
  "<|reserved_special_token_82|>": 128114,
 
239
  "<|reserved_special_token_87|>": 128119,
240
  "<|reserved_special_token_88|>": 128120,
241
  "<|reserved_special_token_89|>": 128121,
242
+ "<|table_pad|>": 128040,
243
  "<|reserved_special_token_90|>": 128122,
244
  "<|reserved_special_token_91|>": 128123,
245
  "<|reserved_special_token_92|>": 128124,
special_tokens_map.json CHANGED
@@ -1,69 +1,4 @@
1
  {
2
- "additional_special_tokens": [
3
- {
4
- "content": "<table>",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false
9
- },
10
- {
11
- "content": "</table>",
12
- "lstrip": false,
13
- "normalized": false,
14
- "rstrip": false,
15
- "single_word": false
16
- },
17
- {
18
- "content": "<header>",
19
- "lstrip": false,
20
- "normalized": false,
21
- "rstrip": false,
22
- "single_word": false
23
- },
24
- {
25
- "content": "</header>",
26
- "lstrip": false,
27
- "normalized": false,
28
- "rstrip": false,
29
- "single_word": false
30
- },
31
- {
32
- "content": "<row>",
33
- "lstrip": false,
34
- "normalized": false,
35
- "rstrip": false,
36
- "single_word": false
37
- },
38
- {
39
- "content": "</row>",
40
- "lstrip": false,
41
- "normalized": false,
42
- "rstrip": false,
43
- "single_word": false
44
- },
45
- {
46
- "content": "<cell>",
47
- "lstrip": false,
48
- "normalized": false,
49
- "rstrip": false,
50
- "single_word": false
51
- },
52
- {
53
- "content": "</cell>",
54
- "lstrip": false,
55
- "normalized": false,
56
- "rstrip": false,
57
- "single_word": false
58
- },
59
- {
60
- "content": "<empty>",
61
- "lstrip": false,
62
- "normalized": false,
63
- "rstrip": false,
64
- "single_word": false
65
- }
66
- ],
67
  "bos_token": {
68
  "content": "<|begin_of_text|>",
69
  "lstrip": false,
@@ -78,13 +13,6 @@
78
  "rstrip": false,
79
  "single_word": false
80
  },
81
- "mask_token": {
82
- "content": "<mask>",
83
- "lstrip": false,
84
- "normalized": false,
85
- "rstrip": false,
86
- "single_word": false
87
- },
88
  "pad_token": {
89
  "content": "<|end_of_text|>",
90
  "lstrip": false,
 
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "bos_token": {
3
  "content": "<|begin_of_text|>",
4
  "lstrip": false,
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
 
 
 
 
 
 
 
16
  "pad_token": {
17
  "content": "<|end_of_text|>",
18
  "lstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:849280aad4f0bc56ac303b5415adb979de8ea15fd32c4dfd379c633ef0310f23
3
- size 17211324
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf32d7719f1732eef0b7d9752ec708a65efe14c1d8901b9243fa0dc8af325ece
3
+ size 17209390
tokenizer_config.json CHANGED
@@ -259,7 +259,7 @@
259
  "special": true
260
  },
261
  "128032": {
262
- "content": "<|reserved_special_token_0|>",
263
  "lstrip": false,
264
  "normalized": false,
265
  "rstrip": false,
@@ -267,7 +267,7 @@
267
  "special": true
268
  },
269
  "128033": {
270
- "content": "<|reserved_special_token_1|>",
271
  "lstrip": false,
272
  "normalized": false,
273
  "rstrip": false,
@@ -275,7 +275,7 @@
275
  "special": true
276
  },
277
  "128034": {
278
- "content": "<|reserved_special_token_2|>",
279
  "lstrip": false,
280
  "normalized": false,
281
  "rstrip": false,
@@ -283,7 +283,7 @@
283
  "special": true
284
  },
285
  "128035": {
286
- "content": "<|reserved_special_token_3|>",
287
  "lstrip": false,
288
  "normalized": false,
289
  "rstrip": false,
@@ -291,7 +291,7 @@
291
  "special": true
292
  },
293
  "128036": {
294
- "content": "<|reserved_special_token_4|>",
295
  "lstrip": false,
296
  "normalized": false,
297
  "rstrip": false,
@@ -299,7 +299,7 @@
299
  "special": true
300
  },
301
  "128037": {
302
- "content": "<|reserved_special_token_5|>",
303
  "lstrip": false,
304
  "normalized": false,
305
  "rstrip": false,
@@ -307,7 +307,7 @@
307
  "special": true
308
  },
309
  "128038": {
310
- "content": "<|reserved_special_token_6|>",
311
  "lstrip": false,
312
  "normalized": false,
313
  "rstrip": false,
@@ -315,7 +315,7 @@
315
  "special": true
316
  },
317
  "128039": {
318
- "content": "<|reserved_special_token_7|>",
319
  "lstrip": false,
320
  "normalized": false,
321
  "rstrip": false,
@@ -323,7 +323,7 @@
323
  "special": true
324
  },
325
  "128040": {
326
- "content": "<|reserved_special_token_8|>",
327
  "lstrip": false,
328
  "normalized": false,
329
  "rstrip": false,
@@ -2049,110 +2049,19 @@
2049
  "rstrip": false,
2050
  "single_word": false,
2051
  "special": true
2052
- },
2053
- "128256": {
2054
- "content": "<mask>",
2055
- "lstrip": false,
2056
- "normalized": false,
2057
- "rstrip": false,
2058
- "single_word": false,
2059
- "special": true
2060
- },
2061
- "128257": {
2062
- "content": "<table>",
2063
- "lstrip": false,
2064
- "normalized": false,
2065
- "rstrip": false,
2066
- "single_word": false,
2067
- "special": true
2068
- },
2069
- "128258": {
2070
- "content": "</table>",
2071
- "lstrip": false,
2072
- "normalized": false,
2073
- "rstrip": false,
2074
- "single_word": false,
2075
- "special": true
2076
- },
2077
- "128259": {
2078
- "content": "<header>",
2079
- "lstrip": false,
2080
- "normalized": false,
2081
- "rstrip": false,
2082
- "single_word": false,
2083
- "special": true
2084
- },
2085
- "128260": {
2086
- "content": "</header>",
2087
- "lstrip": false,
2088
- "normalized": false,
2089
- "rstrip": false,
2090
- "single_word": false,
2091
- "special": true
2092
- },
2093
- "128261": {
2094
- "content": "<row>",
2095
- "lstrip": false,
2096
- "normalized": false,
2097
- "rstrip": false,
2098
- "single_word": false,
2099
- "special": true
2100
- },
2101
- "128262": {
2102
- "content": "</row>",
2103
- "lstrip": false,
2104
- "normalized": false,
2105
- "rstrip": false,
2106
- "single_word": false,
2107
- "special": true
2108
- },
2109
- "128263": {
2110
- "content": "<cell>",
2111
- "lstrip": false,
2112
- "normalized": false,
2113
- "rstrip": false,
2114
- "single_word": false,
2115
- "special": true
2116
- },
2117
- "128264": {
2118
- "content": "</cell>",
2119
- "lstrip": false,
2120
- "normalized": false,
2121
- "rstrip": false,
2122
- "single_word": false,
2123
- "special": true
2124
- },
2125
- "128265": {
2126
- "content": "<empty>",
2127
- "lstrip": false,
2128
- "normalized": false,
2129
- "rstrip": false,
2130
- "single_word": false,
2131
- "special": true
2132
  }
2133
  },
2134
- "additional_special_tokens": [
2135
- "<table>",
2136
- "</table>",
2137
- "<header>",
2138
- "</header>",
2139
- "<row>",
2140
- "</row>",
2141
- "<cell>",
2142
- "</cell>",
2143
- "<empty>"
2144
- ],
2145
  "bos_token": "<|begin_of_text|>",
2146
  "clean_up_tokenization_spaces": true,
2147
  "eos_token": "<|im_end|>",
2148
  "errors": "replace",
2149
  "extra_special_tokens": {},
2150
- "mask_token": "<mask>",
2151
  "model_input_names": [
2152
  "input_ids",
2153
  "attention_mask"
2154
  ],
2155
  "model_max_length": 131072,
 
2156
  "pad_token": "<|end_of_text|>",
2157
  "padding_side": "left",
2158
  "split_special_tokens": false,
 
259
  "special": true
260
  },
261
  "128032": {
262
+ "content": "<|table_start|>",
263
  "lstrip": false,
264
  "normalized": false,
265
  "rstrip": false,
 
267
  "special": true
268
  },
269
  "128033": {
270
+ "content": "<|table_end|>",
271
  "lstrip": false,
272
  "normalized": false,
273
  "rstrip": false,
 
275
  "special": true
276
  },
277
  "128034": {
278
+ "content": "<|table_header_start|>",
279
  "lstrip": false,
280
  "normalized": false,
281
  "rstrip": false,
 
283
  "special": true
284
  },
285
  "128035": {
286
+ "content": "<|table_header_end|>",
287
  "lstrip": false,
288
  "normalized": false,
289
  "rstrip": false,
 
291
  "special": true
292
  },
293
  "128036": {
294
+ "content": "<|table_row_start|>",
295
  "lstrip": false,
296
  "normalized": false,
297
  "rstrip": false,
 
299
  "special": true
300
  },
301
  "128037": {
302
+ "content": "<|table_row_end|>",
303
  "lstrip": false,
304
  "normalized": false,
305
  "rstrip": false,
 
307
  "special": true
308
  },
309
  "128038": {
310
+ "content": "<|table_cell_start|>",
311
  "lstrip": false,
312
  "normalized": false,
313
  "rstrip": false,
 
315
  "special": true
316
  },
317
  "128039": {
318
+ "content": "<|table_cell_end|>",
319
  "lstrip": false,
320
  "normalized": false,
321
  "rstrip": false,
 
323
  "special": true
324
  },
325
  "128040": {
326
+ "content": "<|table_pad|>",
327
  "lstrip": false,
328
  "normalized": false,
329
  "rstrip": false,
 
2049
  "rstrip": false,
2050
  "single_word": false,
2051
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2052
  }
2053
  },
 
 
 
 
 
 
 
 
 
 
 
2054
  "bos_token": "<|begin_of_text|>",
2055
  "clean_up_tokenization_spaces": true,
2056
  "eos_token": "<|im_end|>",
2057
  "errors": "replace",
2058
  "extra_special_tokens": {},
 
2059
  "model_input_names": [
2060
  "input_ids",
2061
  "attention_mask"
2062
  ],
2063
  "model_max_length": 131072,
2064
+ "mask_token": "<|end_of_text|>",
2065
  "pad_token": "<|end_of_text|>",
2066
  "padding_side": "left",
2067
  "split_special_tokens": false,