File size: 1,655 Bytes
a100b38 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import gradio as gr
import torchaudio
import tempfile
from audiocraft.models import MusicGen
from bark import SAMPLE_RATE as BARK_SAMPLE_RATE, generate_audio as bark_generate_audio
from pydub import AudioSegment
# Load MusicGen model
musicgen = MusicGen.get_pretrained('facebook/musicgen-small')
# Function to generate vocals and music
def generate_song(lyrics, genre_prompt):
# Step 1: Generate vocals from Bark
vocals = bark_generate_audio(lyrics, history_prompt="v2/en_speaker_6")
# Save Bark vocals temporarily
vocals_path = tempfile.mktemp(suffix=".wav")
torchaudio.save(vocals_path, vocals.squeeze(0).cpu(), BARK_SAMPLE_RATE)
# Step 2: Generate music from MusicGen
musicgen.set_generation_params(duration=15) # Set song length (in seconds)
music = musicgen.generate([genre_prompt]) # Generate instrumental based on genre
music_path = tempfile.mktemp(suffix=".wav")
torchaudio.save(music_path, music[0].cpu(), 32000)
# Step 3: Mix the two audio files using pydub
vocals_seg = AudioSegment.from_wav(vocals_path)
music_seg = AudioSegment.from_wav(music_path)
# Overlay vocals over the music
mixed = music_seg.overlay(vocals_seg.set_frame_rate(32000).set_channels(1))
output_path = tempfile.mktemp(suffix=".wav")
mixed.export(output_path, format="wav")
return output_path
# Gradio interface
iface = gr.Interface(
fn=generate_song,
inputs=[
gr.Textbox(label="Enter Lyrics", lines=4),
gr.Textbox(label="Enter Genre (e.g., 'hip-hop with 808s')")
],
outputs=gr.Audio(label="Generated Song")
)
# Launch the Gradio interface
iface.
launch() |