AlexWortega commited on
Commit
3b34148
·
verified ·
1 Parent(s): 54551ae

Upload processing_borealis.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. processing_borealis.py +2 -2
processing_borealis.py CHANGED
@@ -23,10 +23,10 @@ class BorealisProcessor(ProcessorMixin):
23
  feature_extractor_class = "WhisperFeatureExtractor"
24
  tokenizer_class = "AutoTokenizer"
25
 
26
- # Audio tokens
27
  audio_token = "<|AUDIO|>"
28
  audio_bos_token = "<|start_of_audio|>"
29
- audio_eos_token = "<|end_of_audio|>"
30
 
31
  def __init__(
32
  self,
 
23
  feature_extractor_class = "WhisperFeatureExtractor"
24
  tokenizer_class = "AutoTokenizer"
25
 
26
+ # Audio tokens (checkpoint has only 2 special tokens: 151669 and 151670)
27
  audio_token = "<|AUDIO|>"
28
  audio_bos_token = "<|start_of_audio|>"
29
+ audio_eos_token = "<|start_of_audio|>" # Reuse bos token since only 2 audio tokens in vocab
30
 
31
  def __init__(
32
  self,