saga / processing_saga.py
AndreasEefsen's picture
Upload folder using huggingface_hub
5056f92 verified
import numpy as np
from transformers import ProcessorMixin
class SagaProcessor(ProcessorMixin):
attributes = ["feature_extractor", "tokenizer"]
def __init__(self, feature_extractor, tokenizer, **kwargs):
super().__init__(feature_extractor, tokenizer, **kwargs)
self.target_sr = 16000
def process_audio(self, audio, sampling_rate):
if int(sampling_rate) == self.target_sr:
return audio
src_len = audio.shape[0]
dst_len = int(round(src_len * (float(self.target_sr) / float(sampling_rate))))
if dst_len <= 1:
return np.zeros((0,), dtype=np.float32)
src_x = np.linspace(0.0, 1.0, num=src_len, endpoint=False)
dst_x = np.linspace(0.0, 1.0, num=dst_len, endpoint=False)
return np.interp(dst_x, src_x, audio).astype(np.float32)
def get_prompt(self):
messages = [
{"role": "system", "content": ""},
{"role": "user", "content": [{"type": "audio", "audio": ""}]},
]
prompt = self.tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=False,
)
return prompt + "language Danish<asr_text>"