bencyc1129 commited on
Commit
b2ad0c1
·
verified ·
1 Parent(s): ebdd823

Upload tokenizer

Browse files
added_tokens.json CHANGED
@@ -1,7 +1,3 @@
1
  {
2
- "</D>": 28998,
3
- "</P>": 29000,
4
- "<D>": 28997,
5
- "<N>": 28996,
6
- "<P>": 28999
7
  }
 
1
  {
2
+ "<N>": 28996
 
 
 
 
3
  }
special_tokens_map.json CHANGED
@@ -6,34 +6,6 @@
6
  "normalized": false,
7
  "rstrip": false,
8
  "single_word": false
9
- },
10
- {
11
- "content": "<D>",
12
- "lstrip": false,
13
- "normalized": false,
14
- "rstrip": false,
15
- "single_word": false
16
- },
17
- {
18
- "content": "</D>",
19
- "lstrip": false,
20
- "normalized": false,
21
- "rstrip": false,
22
- "single_word": false
23
- },
24
- {
25
- "content": "<P>",
26
- "lstrip": false,
27
- "normalized": false,
28
- "rstrip": false,
29
- "single_word": false
30
- },
31
- {
32
- "content": "</P>",
33
- "lstrip": false,
34
- "normalized": false,
35
- "rstrip": false,
36
- "single_word": false
37
  }
38
  ],
39
  "cls_token": "[CLS]",
 
6
  "normalized": false,
7
  "rstrip": false,
8
  "single_word": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  }
10
  ],
11
  "cls_token": "[CLS]",
tokenizer.json CHANGED
@@ -56,42 +56,6 @@
56
  "rstrip": false,
57
  "normalized": false,
58
  "special": true
59
- },
60
- {
61
- "id": 28997,
62
- "content": "<D>",
63
- "single_word": false,
64
- "lstrip": false,
65
- "rstrip": false,
66
- "normalized": false,
67
- "special": true
68
- },
69
- {
70
- "id": 28998,
71
- "content": "</D>",
72
- "single_word": false,
73
- "lstrip": false,
74
- "rstrip": false,
75
- "normalized": false,
76
- "special": true
77
- },
78
- {
79
- "id": 28999,
80
- "content": "<P>",
81
- "single_word": false,
82
- "lstrip": false,
83
- "rstrip": false,
84
- "normalized": false,
85
- "special": true
86
- },
87
- {
88
- "id": 29000,
89
- "content": "</P>",
90
- "single_word": false,
91
- "lstrip": false,
92
- "rstrip": false,
93
- "normalized": false,
94
- "special": true
95
  }
96
  ],
97
  "normalizer": {
 
56
  "rstrip": false,
57
  "normalized": false,
58
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  }
60
  ],
61
  "normalizer": {
tokenizer_config.json CHANGED
@@ -47,46 +47,10 @@
47
  "rstrip": false,
48
  "single_word": false,
49
  "special": true
50
- },
51
- "28997": {
52
- "content": "<D>",
53
- "lstrip": false,
54
- "normalized": false,
55
- "rstrip": false,
56
- "single_word": false,
57
- "special": true
58
- },
59
- "28998": {
60
- "content": "</D>",
61
- "lstrip": false,
62
- "normalized": false,
63
- "rstrip": false,
64
- "single_word": false,
65
- "special": true
66
- },
67
- "28999": {
68
- "content": "<P>",
69
- "lstrip": false,
70
- "normalized": false,
71
- "rstrip": false,
72
- "single_word": false,
73
- "special": true
74
- },
75
- "29000": {
76
- "content": "</P>",
77
- "lstrip": false,
78
- "normalized": false,
79
- "rstrip": false,
80
- "single_word": false,
81
- "special": true
82
  }
83
  },
84
  "additional_special_tokens": [
85
- "<N>",
86
- "<D>",
87
- "</D>",
88
- "<P>",
89
- "</P>"
90
  ],
91
  "clean_up_tokenization_spaces": true,
92
  "cls_token": "[CLS]",
 
47
  "rstrip": false,
48
  "single_word": false,
49
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  }
51
  },
52
  "additional_special_tokens": [
53
+ "<N>"
 
 
 
 
54
  ],
55
  "clean_up_tokenization_spaces": true,
56
  "cls_token": "[CLS]",