import gradio as gr import torchaudio import tempfile from audiocraft.models import MusicGen from bark import SAMPLE_RATE as BARK_SAMPLE_RATE, generate_audio as bark_generate_audio from pydub import AudioSegment # Load MusicGen model musicgen = MusicGen.get_pretrained('facebook/musicgen-small') # Function to generate vocals and music def generate_song(lyrics, genre_prompt): # Step 1: Generate vocals from Bark vocals = bark_generate_audio(lyrics, history_prompt="v2/en_speaker_6") # Save Bark vocals temporarily vocals_path = tempfile.mktemp(suffix=".wav") torchaudio.save(vocals_path, vocals.squeeze(0).cpu(), BARK_SAMPLE_RATE) # Step 2: Generate music from MusicGen musicgen.set_generation_params(duration=15) # Set song length (in seconds) music = musicgen.generate([genre_prompt]) # Generate instrumental based on genre music_path = tempfile.mktemp(suffix=".wav") torchaudio.save(music_path, music[0].cpu(), 32000) # Step 3: Mix the two audio files using pydub vocals_seg = AudioSegment.from_wav(vocals_path) music_seg = AudioSegment.from_wav(music_path) # Overlay vocals over the music mixed = music_seg.overlay(vocals_seg.set_frame_rate(32000).set_channels(1)) output_path = tempfile.mktemp(suffix=".wav") mixed.export(output_path, format="wav") return output_path # Gradio interface iface = gr.Interface( fn=generate_song, inputs=[ gr.Textbox(label="Enter Lyrics", lines=4), gr.Textbox(label="Enter Genre (e.g., 'hip-hop with 808s')") ], outputs=gr.Audio(label="Generated Song") ) # Launch the Gradio interface iface. launch()