|
|
from transformers import AutoProcessor, MusicgenForConditionalGeneration |
|
|
from IPython.display import Audio |
|
|
import scipy |
|
|
import torch |
|
|
import streamlit as st |
|
|
|
|
|
|
|
|
def mu_gen(prompt): |
|
|
processor = AutoProcessor.from_pretrained("facebook/musicgen-small") |
|
|
model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small") |
|
|
|
|
|
device = torch.device("cpu") |
|
|
model.to(device) |
|
|
|
|
|
inputs = processor( |
|
|
text = [str(prompt)], |
|
|
padding=True, |
|
|
return_tensors="pt", |
|
|
) |
|
|
|
|
|
inputs = {key: value.to(device) for key, value in inputs.items()} |
|
|
|
|
|
|
|
|
audio_values = model.generate(**inputs, max_new_tokens=256) |
|
|
sampling_rate = model.config.audio_encoder.sampling_rate |
|
|
|
|
|
|
|
|
result = Audio(audio_values[0].numpy(), rate=sampling_rate) |
|
|
|
|
|
return result |
|
|
|
|
|
|
|
|
def main(): |
|
|
st.title("Text to Music Generator") |
|
|
|
|
|
|
|
|
prompt = st.text_input("Enter a text prompt", "") |
|
|
|
|
|
if st.button("Generate Music"): |
|
|
if prompt: |
|
|
|
|
|
generated_music = mu_gen(prompt) |
|
|
|
|
|
|
|
|
st.audio(generated_music, format="audio/wav") |
|
|
else: |
|
|
st.warning("Please enter a text prompt.") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |