cipher982 commited on
Commit
1b2876d
·
1 Parent(s): 2c96a7e

Upload tokenizer

Browse files
Files changed (3) hide show
  1. added_tokens.json +3 -0
  2. special_tokens_map.json +4 -1
  3. tokenizer.json +27 -0
added_tokens.json CHANGED
@@ -1,6 +1,9 @@
1
  {
 
 
2
  "<END_A>": 32003,
3
  "<END_Q>": 32001,
 
4
  "<START_A>": 32002,
5
  "<START_Q>": 32000
6
  }
 
1
  {
2
+ "<ASC>": 32006,
3
+ "<DIM>": 32004,
4
  "<END_A>": 32003,
5
  "<END_Q>": 32001,
6
+ "<MET>": 32005,
7
  "<START_A>": 32002,
8
  "<START_Q>": 32000
9
  }
special_tokens_map.json CHANGED
@@ -3,7 +3,10 @@
3
  "<START_Q>",
4
  "<END_Q>",
5
  "<START_A>",
6
- "<END_A>"
 
 
 
7
  ],
8
  "bos_token": {
9
  "content": "<s>",
 
3
  "<START_Q>",
4
  "<END_Q>",
5
  "<START_A>",
6
+ "<END_A>",
7
+ "<DIM>",
8
+ "<MET>",
9
+ "<ASC>"
10
  ],
11
  "bos_token": {
12
  "content": "<s>",
tokenizer.json CHANGED
@@ -70,6 +70,33 @@
70
  "rstrip": false,
71
  "normalized": false,
72
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  }
74
  ],
75
  "normalizer": {
 
70
  "rstrip": false,
71
  "normalized": false,
72
  "special": true
73
+ },
74
+ {
75
+ "id": 32004,
76
+ "content": "<DIM>",
77
+ "single_word": false,
78
+ "lstrip": false,
79
+ "rstrip": false,
80
+ "normalized": false,
81
+ "special": true
82
+ },
83
+ {
84
+ "id": 32005,
85
+ "content": "<MET>",
86
+ "single_word": false,
87
+ "lstrip": false,
88
+ "rstrip": false,
89
+ "normalized": false,
90
+ "special": true
91
+ },
92
+ {
93
+ "id": 32006,
94
+ "content": "<ASC>",
95
+ "single_word": false,
96
+ "lstrip": false,
97
+ "rstrip": false,
98
+ "normalized": false,
99
+ "special": true
100
  }
101
  ],
102
  "normalizer": {