Upload tokenizer
Browse files- tokenizer.json +63 -1
tokenizer.json
CHANGED
|
@@ -952,7 +952,69 @@
|
|
| 952 |
"replacement": "▁",
|
| 953 |
"add_prefix_space": true
|
| 954 |
},
|
| 955 |
-
"post_processor":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 956 |
"decoder": {
|
| 957 |
"type": "Metaspace",
|
| 958 |
"replacement": "▁",
|
|
|
|
| 952 |
"replacement": "▁",
|
| 953 |
"add_prefix_space": true
|
| 954 |
},
|
| 955 |
+
"post_processor": {
|
| 956 |
+
"type": "TemplateProcessing",
|
| 957 |
+
"single": [
|
| 958 |
+
{
|
| 959 |
+
"Sequence": {
|
| 960 |
+
"id": "A",
|
| 961 |
+
"type_id": 0
|
| 962 |
+
}
|
| 963 |
+
},
|
| 964 |
+
{
|
| 965 |
+
"SpecialToken": {
|
| 966 |
+
"id": "</s>",
|
| 967 |
+
"type_id": 0
|
| 968 |
+
}
|
| 969 |
+
}
|
| 970 |
+
],
|
| 971 |
+
"pair": [
|
| 972 |
+
{
|
| 973 |
+
"Sequence": {
|
| 974 |
+
"id": "A",
|
| 975 |
+
"type_id": 0
|
| 976 |
+
}
|
| 977 |
+
},
|
| 978 |
+
{
|
| 979 |
+
"SpecialToken": {
|
| 980 |
+
"id": "<extra_id_98>",
|
| 981 |
+
"type_id": 0
|
| 982 |
+
}
|
| 983 |
+
},
|
| 984 |
+
{
|
| 985 |
+
"Sequence": {
|
| 986 |
+
"id": "B",
|
| 987 |
+
"type_id": 1
|
| 988 |
+
}
|
| 989 |
+
},
|
| 990 |
+
{
|
| 991 |
+
"SpecialToken": {
|
| 992 |
+
"id": "</s>",
|
| 993 |
+
"type_id": 1
|
| 994 |
+
}
|
| 995 |
+
}
|
| 996 |
+
],
|
| 997 |
+
"special_tokens": {
|
| 998 |
+
"</s>": {
|
| 999 |
+
"id": "</s>",
|
| 1000 |
+
"ids": [
|
| 1001 |
+
1
|
| 1002 |
+
],
|
| 1003 |
+
"tokens": [
|
| 1004 |
+
"</s>"
|
| 1005 |
+
]
|
| 1006 |
+
},
|
| 1007 |
+
"<extra_id_98>": {
|
| 1008 |
+
"id": "<extra_id_98>",
|
| 1009 |
+
"ids": [
|
| 1010 |
+
128001
|
| 1011 |
+
],
|
| 1012 |
+
"tokens": [
|
| 1013 |
+
"<extra_id_98>"
|
| 1014 |
+
]
|
| 1015 |
+
}
|
| 1016 |
+
}
|
| 1017 |
+
},
|
| 1018 |
"decoder": {
|
| 1019 |
"type": "Metaspace",
|
| 1020 |
"replacement": "▁",
|