Upload tokenizer.json
Browse files- tokenizer.json +28 -0
tokenizer.json
CHANGED
|
@@ -98,6 +98,34 @@
|
|
| 98 |
},
|
| 99 |
"content":""
|
| 100 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
{
|
| 102 |
"type":"BPEDecoder",
|
| 103 |
"suffix":"</w>"
|
|
|
|
| 98 |
},
|
| 99 |
"content":""
|
| 100 |
},
|
| 101 |
+
{
|
| 102 |
+
"type":"Replace",
|
| 103 |
+
"pattern":{
|
| 104 |
+
"String":"__start__"
|
| 105 |
+
},
|
| 106 |
+
"content":"__start__</w>"
|
| 107 |
+
},
|
| 108 |
+
{
|
| 109 |
+
"type":"Replace",
|
| 110 |
+
"pattern":{
|
| 111 |
+
"String":"__end__"
|
| 112 |
+
},
|
| 113 |
+
"content":"__end__</w>"
|
| 114 |
+
},
|
| 115 |
+
{
|
| 116 |
+
"type":"Replace",
|
| 117 |
+
"pattern":{
|
| 118 |
+
"String":"__unk__"
|
| 119 |
+
},
|
| 120 |
+
"content":"__unk__</w>"
|
| 121 |
+
},
|
| 122 |
+
{
|
| 123 |
+
"type":"Replace",
|
| 124 |
+
"pattern":{
|
| 125 |
+
"String":"__null__"
|
| 126 |
+
},
|
| 127 |
+
"content":"__null__</w>"
|
| 128 |
+
},
|
| 129 |
{
|
| 130 |
"type":"BPEDecoder",
|
| 131 |
"suffix":"</w>"
|