Upload processing_borealis.py with huggingface_hub
Browse files- processing_borealis.py +2 -2
processing_borealis.py
CHANGED
|
@@ -23,10 +23,10 @@ class BorealisProcessor(ProcessorMixin):
|
|
| 23 |
feature_extractor_class = "WhisperFeatureExtractor"
|
| 24 |
tokenizer_class = "AutoTokenizer"
|
| 25 |
|
| 26 |
-
# Audio tokens
|
| 27 |
audio_token = "<|AUDIO|>"
|
| 28 |
audio_bos_token = "<|start_of_audio|>"
|
| 29 |
-
audio_eos_token = "<|
|
| 30 |
|
| 31 |
def __init__(
|
| 32 |
self,
|
|
|
|
| 23 |
feature_extractor_class = "WhisperFeatureExtractor"
|
| 24 |
tokenizer_class = "AutoTokenizer"
|
| 25 |
|
| 26 |
+
# Audio tokens (checkpoint has only 2 special tokens: 151669 and 151670)
|
| 27 |
audio_token = "<|AUDIO|>"
|
| 28 |
audio_bos_token = "<|start_of_audio|>"
|
| 29 |
+
audio_eos_token = "<|start_of_audio|>" # Reuse bos token since only 2 audio tokens in vocab
|
| 30 |
|
| 31 |
def __init__(
|
| 32 |
self,
|