import numpy as np from transformers import ProcessorMixin class SagaProcessor(ProcessorMixin): attributes = ["feature_extractor", "tokenizer"] def __init__(self, feature_extractor, tokenizer, **kwargs): super().__init__(feature_extractor, tokenizer, **kwargs) self.target_sr = 16000 def process_audio(self, audio, sampling_rate): if int(sampling_rate) == self.target_sr: return audio src_len = audio.shape[0] dst_len = int(round(src_len * (float(self.target_sr) / float(sampling_rate)))) if dst_len <= 1: return np.zeros((0,), dtype=np.float32) src_x = np.linspace(0.0, 1.0, num=src_len, endpoint=False) dst_x = np.linspace(0.0, 1.0, num=dst_len, endpoint=False) return np.interp(dst_x, src_x, audio).astype(np.float32) def get_prompt(self): messages = [ {"role": "system", "content": ""}, {"role": "user", "content": [{"type": "audio", "audio": ""}]}, ] prompt = self.tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=False, ) return prompt + "language Danish"