Spaces:

thepatch
/

stable-melodyflow

Running on Zero

App Files Files Community

thecollabagepatch commited on Apr 17, 2024

Commit

8bdf8d9

1 Parent(s): 32f56a6

breaking up functions

Browse files

Files changed (1) hide show

app.py +119 -114

app.py CHANGED Viewed

@@ -11,6 +11,8 @@ from audiocraft.data.audio import audio_write
 from pydub import AudioSegment
 import spaces
 # Utility Functions
 def peak_normalize(y, target_peak=0.97):
@@ -72,115 +74,115 @@ def calculate_duration(bpm, min_duration=29, max_duration=30):
     return duration
 @spaces.GPU(duration=120)
-def generate_music(seed, use_chords, chord_progression, prompt_duration, musicgen_model, num_iterations, bpm):
-    while True:
-        try:
-            if seed == "":
-                seed = random.randint(1, 10000)
-            ml = MusicLangPredictor('musiclang/musiclang-v2')
-            try:
-                seed = int(seed)
-            except ValueError:
-                seed = random.randint(1, 10000)
-            nb_tokens = 1024
-            temperature = 0.9
-            top_p = 1.0
-            if use_chords and chord_progression.strip():
-                score = ml.predict_chords(
-                    chord_progression,
-                    time_signature=(4, 4),
-                    temperature=temperature,
-                    topp=top_p,
-                    rng_seed=seed
-                )
-            else:
-                score = ml.predict(
-                    nb_tokens=nb_tokens,
-                    temperature=temperature,
-                    topp=top_p,
-                    rng_seed=seed
-                )
-            midi_filename = f"output_{seed}.mid"
-            wav_filename = midi_filename.replace(".mid", ".wav")
-            score.to_midi(midi_filename, tempo=bpm, time_signature=(4, 4))
-            subprocess.run(["fluidsynth", "-ni", "font.sf2", midi_filename, "-F", wav_filename, "-r", "44100"])
-            # Load the generated audio
-            song, sr = torchaudio.load(wav_filename)
-            song = song.to(device)
-            # Use the user-provided BPM value for duration calculation
-            duration = calculate_duration(bpm)
-            # Create slices from the song using the user-provided BPM value
-            slices = create_slices(song, sr, 35, bpm, num_slices=5)
-            # Load the model
-            model_name = musicgen_model.split(" ")[0]
-            model_continue = MusicGen.get_pretrained(model_name)
-            # Setting generation parameters
-            model_continue.set_generation_params(
-                use_sampling=True,
-                top_k=250,
-                top_p=0.0,
-                temperature=1.0,
-                duration=duration,
-                cfg_coef=3
-            )
-            all_audio_files = []
-            for i in range(num_iterations):
-                slice_idx = i % len(slices)
-                print(f"Running iteration {i + 1} using slice {slice_idx}...")
-                prompt_waveform = slices[slice_idx][..., :int(prompt_duration * sr)]
-                prompt_waveform = preprocess_audio(prompt_waveform)
-                output = model_continue.generate_continuation(prompt_waveform, prompt_sample_rate=sr, progress=True)
-                output = output.cpu()  # Move the output tensor back to CPU
-                # Make sure the output tensor has at most 2 dimensions
-                if len(output.size()) > 2:
-                    output = output.squeeze()
-                filename_without_extension = f'continue_{i}'
-                filename_with_extension = f'{filename_without_extension}.wav'
-                audio_write(filename_with_extension, output, model_continue.sample_rate, strategy="loudness", loudness_compressor=True)
-                all_audio_files.append(f'{filename_without_extension}.wav.wav')  # Assuming the library appends an extra .wav
-            # Combine all audio files
-            combined_audio = AudioSegment.empty()
-            for filename in all_audio_files:
-                combined_audio += AudioSegment.from_wav(filename)
-            combined_audio_filename = f"combined_audio_{seed}.mp3"
-            combined_audio.export(combined_audio_filename, format="mp3")
-            # Clean up temporary files
-            os.remove(midi_filename)
-            os.remove(wav_filename)
-            for filename in all_audio_files:
-                os.remove(filename)
-            return combined_audio_filename
-        except IndexError:
-            # Retry with a new random seed if an IndexError is raised
-            seed = random.randint(1, 10000)
-# Check if CUDA is available
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # Define the expandable sections
 musiclang_blurb = """
@@ -221,6 +223,11 @@ with gr.Blocks() as iface:
             seed = gr.Textbox(label="seed (leave blank for random)", value="")
             use_chords = gr.Checkbox(label="control chord progression", value=False)
             chord_progression = gr.Textbox(label="chord progression (e.g., Am CM Dm E7 Am)", visible=True)
             prompt_duration = gr.Dropdown(label="prompt duration (seconds)", choices=list(range(1, 11)), value=7)
             musicgen_models = [
                 "thepatch/vanya_ai_dnb_0.1 (small)",
@@ -229,14 +236,12 @@ with gr.Blocks() as iface:
                 "thepatch/bleeps-medium (medium)",
                 "thepatch/hoenn_lofi (large)"
             ]
             musicgen_model = gr.Dropdown(label="musicGen model", choices=musicgen_models, value=musicgen_models[0])
-            num_iterations = gr.Slider(label="number of iterations", minimum=1, maximum=10, step=1, value=3)
-            bpm = gr.Slider(label="BPM", minimum=60, maximum=200, step=1, value=140)
-            generate_button = gr.Button("generate music")
-        with gr.Column():
-            output_audio = gr.Audio(label="your track")
-    generate_button.click(generate_music, inputs=[seed, use_chords, chord_progression, prompt_duration, musicgen_model, num_iterations, bpm], outputs=output_audio)
 iface.launch()

 from pydub import AudioSegment
 import spaces
+# Check if CUDA is available
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # Utility Functions
 def peak_normalize(y, target_peak=0.97):
     return duration
+@spaces.GPU(duration=60)
+def generate_midi(seed, use_chords, chord_progression, bpm):
+    if seed == "":
+        seed = random.randint(1, 10000)
+    ml = MusicLangPredictor('musiclang/musiclang-v2')
+    try:
+        seed = int(seed)
+    except ValueError:
+        seed = random.randint(1, 10000)
+    nb_tokens = 1024
+    temperature = 0.9
+    top_p = 1.0
+    if use_chords and chord_progression.strip():
+        score = ml.predict_chords(
+            chord_progression,
+            time_signature=(4, 4),
+            temperature=temperature,
+            topp=top_p,
+            rng_seed=seed
+        )
+    else:
+        score = ml.predict(
+            nb_tokens=nb_tokens,
+            temperature=temperature,
+            topp=top_p,
+            rng_seed=seed
+        )
+    midi_filename = f"output_{seed}.mid"
+    wav_filename = midi_filename.replace(".mid", ".wav")
+    score.to_midi(midi_filename, tempo=bpm, time_signature=(4, 4))
+    subprocess.run(["fluidsynth", "-ni", "font.sf2", midi_filename, "-F", wav_filename, "-r", "44100"])
+    # Clean up temporary MIDI file
+    os.remove(midi_filename)
+    return wav_filename
 @spaces.GPU(duration=120)
+def generate_music(wav_filename, prompt_duration, musicgen_model, num_iterations, bpm):
+    # Load the generated audio
+    song, sr = torchaudio.load(wav_filename)
+    song = song.to(device)
+    # Use the user-provided BPM value for duration calculation
+    duration = calculate_duration(bpm)
+    # Create slices from the song using the user-provided BPM value
+    slices = create_slices(song, sr, 35, bpm, num_slices=5)
+    # Load the model
+    model_name = musicgen_model.split(" ")[0]
+    model_continue = MusicGen.get_pretrained(model_name)
+    # Setting generation parameters
+    model_continue.set_generation_params(
+        use_sampling=True,
+        top_k=250,
+        top_p=0.0,
+        temperature=1.0,
+        duration=duration,
+        cfg_coef=3
+    )
+    all_audio_files = []
+    for i in range(num_iterations):
+        slice_idx = i % len(slices)
+        print(f"Running iteration {i + 1} using slice {slice_idx}...")
+        prompt_waveform = slices[slice_idx][..., :int(prompt_duration * sr)]
+        prompt_waveform = preprocess_audio(prompt_waveform)
+        output = model_continue.generate_continuation(prompt_waveform, prompt_sample_rate=sr, progress=True)
+        output = output.cpu()  # Move the output tensor back to CPU
+        # Make sure the output tensor has at most 2 dimensions
+        if len(output.size()) > 2:
+            output = output.squeeze()
+        filename_without_extension = f'continue_{i}'
+        filename_with_extension = f'{filename_without_extension}.wav'
+        audio_write(filename_with_extension, output, model_continue.sample_rate, strategy="loudness", loudness_compressor=True)
+        all_audio_files.append(f'{filename_without_extension}.wav.wav')  # Assuming the library appends an extra .wav
+    # Combine all audio files
+    combined_audio = AudioSegment.empty()
+    for filename in all_audio_files:
+        combined_audio += AudioSegment.from_wav(filename)
+    combined_audio_filename = f"combined_audio_{random.randint(1, 10000)}.mp3"
+    combined_audio.export(combined_audio_filename, format="mp3")
+    # Clean up temporary files
+    os.remove(wav_filename)
+    for filename in all_audio_files:
+        os.remove(filename)
+    return combined_audio_filename
 # Define the expandable sections
 musiclang_blurb = """
             seed = gr.Textbox(label="seed (leave blank for random)", value="")
             use_chords = gr.Checkbox(label="control chord progression", value=False)
             chord_progression = gr.Textbox(label="chord progression (e.g., Am CM Dm E7 Am)", visible=True)
+            bpm = gr.Slider(label="BPM", minimum=60, maximum=200, step=1, value=110)
+            generate_midi_button = gr.Button("Generate MIDI")
+            midi_audio = gr.Audio(label="Generated MIDI Audio")
+        with gr.Column():
             prompt_duration = gr.Dropdown(label="prompt duration (seconds)", choices=list(range(1, 11)), value=7)
             musicgen_models = [
                 "thepatch/vanya_ai_dnb_0.1 (small)",
                 "thepatch/bleeps-medium (medium)",
                 "thepatch/hoenn_lofi (large)"
             ]
             musicgen_model = gr.Dropdown(label="musicGen model", choices=musicgen_models, value=musicgen_models[0])
+            num_iterations = gr.Slider(label="number of iterations", minimum=1, maximum=3, step=1, value=3)
+            generate_music_button = gr.Button("Generate Music")
+            output_audio = gr.Audio(label="Generated Music")
+    generate_midi_button.click(generate_midi, inputs=[seed, use_chords, chord_progression, bpm], outputs=midi_audio)
+    generate_music_button.click(generate_music, inputs=[midi_audio, prompt_duration, musicgen_model, num_iterations, bpm], outputs=output_audio)
 iface.launch()