Spaces:
Runtime error
Runtime error
| from typing import Dict, List, Any | |
| from datasets import load_dataset | |
| from transformers import AutoProcessor, MusicgenForConditionalGeneration | |
| import torch, numpy as np | |
| import io | |
| import soundfile as sf | |
| from audiocraft.models import MusicGen | |
| import yaml | |
| import math | |
| import torchaudio | |
| import torch | |
| from audiocraft.utils.notebook import display_audio | |
| def get_bip_bip( | |
| bip_duration=0.125, frequency=440, duration=0.5, sample_rate=32000, device="cuda"): | |
| """Generates a series of bip bip at the given frequency.""" | |
| t = torch.arange( | |
| int(duration * sample_rate), device="cuda", dtype=torch.float) / sample_rate | |
| wav = torch.cos(2 * math.pi * 440 * t)[None] | |
| tp = (t % (2 * bip_duration)) / (2 * bip_duration) | |
| envelope = (tp >= 0.5).float() | |
| return wav * envelope | |
| def load_conf(conf): | |
| with open(conf,'r') as f: | |
| conf= yaml.safeload(f) | |
| return conf | |
| class generator: | |
| def __init__(self, conf_file): | |
| """ | |
| conf{ | |
| model | |
| sampling_rate | |
| } | |
| """ | |
| self.conf = load_conf(conf_file) | |
| self.processor = AutoProcessor.from_pretrained(self.conf['model']) | |
| self.model = MusicGen.get_pretrained(self.conf['model']) | |
| self.model.set_generation_params( | |
| use_sampling=True, | |
| top_k=250, | |
| duration=self.conf['duration'] | |
| ) | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| self.model.to(device) | |
| self.sampling_rate = self.model.config.audio_encoder.sampling_rate | |
| def preprocess(self, text, audio): | |
| audio = audio[: int(len(audio) // self.conf['nth_slice_prompt'])] | |
| def generate(self, text:list, audio: np.array, **kwargs): | |
| """ | |
| text: ["modern melodic electronic dance music", "80s blues track with groovy saxophone"] | |
| audio (np.array) | |
| """ | |
| # inputs = self.processor( | |
| # audio=audio, | |
| # sampling_rate=self.conf["sampling_rate"], | |
| # text=text, | |
| # padding=True, | |
| # return_tensors="pt", | |
| # ) | |
| output = self.model.generate_with_chroma( | |
| descriptions=[ | |
| text | |
| ], | |
| melody_wavs=audio, | |
| melody_sample_rate=self.conf['sampling_rate'], | |
| progress=True | |
| ) | |
| return output | |
| class EndpointHandler: | |
| def __init__(self, path=""): | |
| # load model and processor from path | |
| self.processor = AutoProcessor.from_pretrained(path) | |
| self.model = MusicgenForConditionalGeneration.from_pretrained( | |
| path, torch_dtype=torch.float16).to("cuda") | |
| self.generator = generator('conf.yaml') | |
| def __call__(self, data: Dict[str, Any]) -> Dict[str, str]: | |
| """ | |
| Args: | |
| data (:dict:): | |
| The payload with the text prompt and generation parameters. | |
| """ | |
| prompt_duration = 2 | |
| # process input | |
| text = data.pop("text", data) | |
| audio = data.pop("audio", data) | |
| parameters = data.pop("parameters", None) | |
| audio, sr = sf.read(io.BytesIO(audio)) | |
| output = self.generate(text, audio, sr) | |
| # # pass inputs with all kwargs in data | |
| # if parameters is not None: | |
| # with torch.autocast("cuda"): | |
| # outputs = self.model.generate(**inputs, **parameters) | |
| # else: | |
| # with torch.autocast("cuda"): | |
| # outputs = self.model.generate(**inputs,) | |
| # postprocess the prediction | |
| prediction = output.squeeze().cpu().numpy().tolist() | |
| return [{"generated_audio": prediction}] | |