Add processor for Whisper Medium Sanskrit model
Browse files- tokenizer.json +0 -42
- tokenizer_config.json +2 -2
tokenizer.json
CHANGED
|
@@ -14492,18 +14492,6 @@
|
|
| 14492 |
"type_id": 0
|
| 14493 |
}
|
| 14494 |
},
|
| 14495 |
-
{
|
| 14496 |
-
"SpecialToken": {
|
| 14497 |
-
"id": "<|sa|>",
|
| 14498 |
-
"type_id": 0
|
| 14499 |
-
}
|
| 14500 |
-
},
|
| 14501 |
-
{
|
| 14502 |
-
"SpecialToken": {
|
| 14503 |
-
"id": "<|transcribe|>",
|
| 14504 |
-
"type_id": 0
|
| 14505 |
-
}
|
| 14506 |
-
},
|
| 14507 |
{
|
| 14508 |
"SpecialToken": {
|
| 14509 |
"id": "<|notimestamps|>",
|
|
@@ -14530,18 +14518,6 @@
|
|
| 14530 |
"type_id": 0
|
| 14531 |
}
|
| 14532 |
},
|
| 14533 |
-
{
|
| 14534 |
-
"SpecialToken": {
|
| 14535 |
-
"id": "<|sa|>",
|
| 14536 |
-
"type_id": 0
|
| 14537 |
-
}
|
| 14538 |
-
},
|
| 14539 |
-
{
|
| 14540 |
-
"SpecialToken": {
|
| 14541 |
-
"id": "<|transcribe|>",
|
| 14542 |
-
"type_id": 0
|
| 14543 |
-
}
|
| 14544 |
-
},
|
| 14545 |
{
|
| 14546 |
"SpecialToken": {
|
| 14547 |
"id": "<|notimestamps|>",
|
|
@@ -14586,15 +14562,6 @@
|
|
| 14586 |
"<|notimestamps|>"
|
| 14587 |
]
|
| 14588 |
},
|
| 14589 |
-
"<|sa|>": {
|
| 14590 |
-
"id": "<|sa|>",
|
| 14591 |
-
"ids": [
|
| 14592 |
-
50344
|
| 14593 |
-
],
|
| 14594 |
-
"tokens": [
|
| 14595 |
-
"<|sa|>"
|
| 14596 |
-
]
|
| 14597 |
-
},
|
| 14598 |
"<|startoftranscript|>": {
|
| 14599 |
"id": "<|startoftranscript|>",
|
| 14600 |
"ids": [
|
|
@@ -14603,15 +14570,6 @@
|
|
| 14603 |
"tokens": [
|
| 14604 |
"<|startoftranscript|>"
|
| 14605 |
]
|
| 14606 |
-
},
|
| 14607 |
-
"<|transcribe|>": {
|
| 14608 |
-
"id": "<|transcribe|>",
|
| 14609 |
-
"ids": [
|
| 14610 |
-
50359
|
| 14611 |
-
],
|
| 14612 |
-
"tokens": [
|
| 14613 |
-
"<|transcribe|>"
|
| 14614 |
-
]
|
| 14615 |
}
|
| 14616 |
}
|
| 14617 |
},
|
|
|
|
| 14492 |
"type_id": 0
|
| 14493 |
}
|
| 14494 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14495 |
{
|
| 14496 |
"SpecialToken": {
|
| 14497 |
"id": "<|notimestamps|>",
|
|
|
|
| 14518 |
"type_id": 0
|
| 14519 |
}
|
| 14520 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14521 |
{
|
| 14522 |
"SpecialToken": {
|
| 14523 |
"id": "<|notimestamps|>",
|
|
|
|
| 14562 |
"<|notimestamps|>"
|
| 14563 |
]
|
| 14564 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14565 |
"<|startoftranscript|>": {
|
| 14566 |
"id": "<|startoftranscript|>",
|
| 14567 |
"ids": [
|
|
|
|
| 14570 |
"tokens": [
|
| 14571 |
"<|startoftranscript|>"
|
| 14572 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14573 |
}
|
| 14574 |
}
|
| 14575 |
},
|
tokenizer_config.json
CHANGED
|
@@ -115,13 +115,13 @@
|
|
| 115 |
"<|notimestamps|>"
|
| 116 |
],
|
| 117 |
"is_local": true,
|
| 118 |
-
"language":
|
| 119 |
"model_max_length": 1024,
|
| 120 |
"pad_token": "<|endoftext|>",
|
| 121 |
"predict_timestamps": false,
|
| 122 |
"processor_class": "WhisperProcessor",
|
| 123 |
"return_attention_mask": false,
|
| 124 |
-
"task":
|
| 125 |
"tokenizer_class": "WhisperTokenizer",
|
| 126 |
"unk_token": "<|endoftext|>"
|
| 127 |
}
|
|
|
|
| 115 |
"<|notimestamps|>"
|
| 116 |
],
|
| 117 |
"is_local": true,
|
| 118 |
+
"language": null,
|
| 119 |
"model_max_length": 1024,
|
| 120 |
"pad_token": "<|endoftext|>",
|
| 121 |
"predict_timestamps": false,
|
| 122 |
"processor_class": "WhisperProcessor",
|
| 123 |
"return_attention_mask": false,
|
| 124 |
+
"task": null,
|
| 125 |
"tokenizer_class": "WhisperTokenizer",
|
| 126 |
"unk_token": "<|endoftext|>"
|
| 127 |
}
|