import gradio as gr import torch import numpy as np from transformers import AutoProcessor, MusicgenForConditionalGeneration # Load MusicGen model processor = AutoProcessor.from_pretrained("facebook/musicgen-small") model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small") device = "cuda" if torch.cuda.is_available() else "cpu" model.to(device) def generate_music(prompt, duration): inputs = processor( text=[prompt], padding=True, return_tensors="pt" ).to(device) # Generate audio - MusicGen expects `max_new_tokens` to be tied to duration frames # 50 audio frames = 1 second, so duration * 50 frames generated_audio = model.generate( **inputs, do_sample=True, guidance_scale=3, max_new_tokens=duration * 50 # 50 tokens/sec ) # Convert tensor to numpy array audio = generated_audio[0].cpu().numpy() sampling_rate = 32000 # MusicGen small uses 32kHz output return (audio, sampling_rate) # Gradio UI demo = gr.Interface( fn=generate_music, inputs=[ gr.Textbox(lines=2, label="Describe your vibe (e.g., 'lofi chill with rain and piano')"), gr.Slider(5, 30, value=10, step=5, label="Duration (seconds)") ], outputs=gr.Audio(label="Generated Music"), title="🎷 LoFiJazz Agent", description="AI agent that composes lo-fi and jazz-style music based on your vibe prompt." ) demo.launch()