Audio-To-MIDI-And-Advanced-Renderer

Sleeping

avans06 commited on Aug 17, 2025

Commit

44e9039

1 Parent(s): 2dbdd2e

feat(ui): Implement interactive preview for sound sources

This commit introduces a live audio preview system, allowing users to instantly audition their selected SoundFont or 8-bit Synthesizer settings without running a full render. This significantly improves the sound design workflow.

Files changed (1) hide show

app.py +148 -5

app.py CHANGED Viewed

@@ -536,6 +536,134 @@ def analyze_midi_velocity(midi_path):
         print("No notes found in this MIDI.")
 def scale_instrument_velocity(instrument, scale=0.8):
     for note in instrument.notes:
         note.velocity = max(1, min(127, int(note.velocity * scale)))
@@ -2471,11 +2599,20 @@ if __name__ == "__main__":
                         value=RENDER_TYPE_DESCRIPTIONS["Render as-is"], # Set initial value
                         elem_classes="description-box" # Optional: for CSS styling
                     )
-                    # --- SoundFont Bank with 8-bit option ---
-                    soundfont_bank = gr.Dropdown(
-                        [SYNTH_8_BIT_LABEL] + list(soundfonts_dict.keys()),
-                        label="SoundFont / Synthesizer",
-                        value=list(soundfonts_dict.keys())[0] if soundfonts_dict else SYNTH_8_BIT_LABEL)
                     render_sample_rate = gr.Radio(
                         ["16000", "32000", "44100"],
                         label="Audio Sample Rate",
@@ -2787,6 +2924,12 @@ if __name__ == "__main__":
             inputs=s8bit_echo_sustain,
             outputs=echo_sustain_settings
         )
     # Launch the Gradio app
     app.queue().launch(inbrowser=True, debug=True)

         print("No notes found in this MIDI.")
+def preview_sound_source(sound_source_name: str, *args):
+    """
+    Generates a short audio preview for either a selected SoundFont or the
+    8-bit Synthesizer, using the Super Mario Bros. theme as a test melody.
+    This function acts as a router:
+    - If a SoundFont is selected, it uses FluidSynth.
+    - If the 8-bit Synthesizer is selected, it uses the internal `synthesize_8bit_style`
+      function, capturing the current UI settings for an accurate preview.
+    Args:
+        sound_source_name (str): The name of the SoundFont or the 8-bit synth label.
+        *args: Captures all current UI settings, which are passed to build an
+               AppParameters object for the 8-bit synth preview.
+    Returns:
+        A Gradio-compatible audio tuple (sample_rate, numpy_array).
+    """
+    srate = 44100  # Use a standard sample rate for all previews.
+    # 1. Create a MIDI object in memory.
+    preview_midi = pretty_midi.PrettyMIDI()
+    # Use a lead instrument. Program 81 (Lead 2, sawtooth) is a good, bright default.
+    instrument = pretty_midi.Instrument(program=81, is_drum=False, name="Preview Lead")
+    # 2. Define the melody: Super Mario Bros. theme intro
+    #    - tempo: A brisk 200 BPM, so each 0.15s step is a 16th note.
+    #    - notes: A list of tuples (pitch, duration_in_steps)
+    tempo = 200.0
+    time_per_step = 60.0 / tempo / 2 # 16th note duration at this tempo
+        # (Pitch, Duration in steps)
+        # MIDI Pitch 60 = C4 (Middle C)
+    melody_data = [
+        (76, 1), (76, 2), (76, 2), (72, 1), (76, 2), # E E E C E
+        (79, 4), (67, 4)                              # G G(low)
+    ]
+    current_time = 0.0
+    for pitch, duration_steps in melody_data:
+        start_time = current_time
+        end_time = start_time + (duration_steps * time_per_step)
+        # Add a tiny gap between notes to ensure they re-trigger clearly
+        note_end_time = end_time - 0.01
+        note = pretty_midi.Note(
+            velocity=120, # Use a high velocity for a bright, clear sound
+            pitch=pitch,
+            start=start_time,
+            end=note_end_time
+        )
+        instrument.notes.append(note)
+        current_time = end_time
+    preview_midi.instruments.append(instrument)
+    # --- ROUTING LOGIC: Decide which synthesizer to use ---
+    # CASE 1: 8-bit Synthesizer Preview
+    if sound_source_name == SYNTH_8_BIT_LABEL:
+        print("Generating preview for: 8-bit Synthesizer")
+        try:
+            # Create a temporary AppParameters object from the current UI settings
+            params = AppParameters(**dict(zip(ALL_PARAM_KEYS, args)))
+            # Use the internal synthesizer to render the preview MIDI
+            audio_waveform = synthesize_8bit_style(midi_data=preview_midi, fs=srate, params=params)
+            # Normalize and prepare for Gradio
+            peak_val = np.max(np.abs(audio_waveform))
+            if peak_val > 0:
+                audio_waveform /= peak_val
+            # The synth returns (channels, samples), Gradio needs (samples, channels)
+            audio_out = (audio_waveform.T * 32767).astype(np.int16)
+            print("8-bit preview generated successfully.")
+            return (srate, audio_out)
+        except Exception as e:
+            print(f"An error occurred during 8-bit preview generation: {e}")
+            return None
+    # CASE 2: SoundFont Preview
+    else:
+        soundfont_path = soundfonts_dict.get(sound_source_name)
+        if not soundfont_path or not os.path.exists(soundfont_path):
+            print(f"Preview failed: SoundFont file not found at '{soundfont_path}'")
+            raise gr.Error(f"Could not find the SoundFont file for '{sound_source_name}'.")
+        try:
+            print(f"Generating preview for: {sound_source_name}")
+            # Convert the in-memory MIDI object to a binary stream.
+            midi_io = io.BytesIO()
+            preview_midi.write(midi_io)
+            midi_data = midi_io.getvalue()
+            # Use the existing rendering function to generate the audio.
+            # Ensure the output is a tuple (sample_rate, numpy_array)
+            audio_out = midi_to_colab_audio(
+                midi_data,
+                soundfont_path=soundfont_path,
+                sample_rate=srate,
+                output_for_gradio=True
+            )
+            # Ensure the returned value is exactly what Gradio expects.
+            # The function `midi_to_colab_audio` should return a NumPy array.
+            # We must wrap it in a tuple with the sample rate.
+            if isinstance(audio_out, np.ndarray):
+                print("SoundFont preview generated successfully.")
+                return (srate, audio_out)
+            else:
+                # If the rendering function fails, it might return something else.
+                # We handle this to prevent the Gradio error.
+                print("Preview failed: Rendering function did not return valid audio data.")
+                return None
+        except Exception as e:
+            # Catch any other errors, including from FluidSynth, and report them.
+            print(f"An error occurred during SoundFont preview generation: {e}")
+            # It's better to return None than to crash the UI.
+            # The error will be visible in the console.
+            return None
 def scale_instrument_velocity(instrument, scale=0.8):
     for note in instrument.notes:
         note.velocity = max(1, min(127, int(note.velocity * scale)))
                         value=RENDER_TYPE_DESCRIPTIONS["Render as-is"], # Set initial value
                         elem_classes="description-box" # Optional: for CSS styling
                     )
+                    # --- SoundFont Bank with Preview Button ---
+                    with gr.Row(elem_id="soundfont_selector_row"):
+                        soundfont_bank = gr.Dropdown(
+                            [SYNTH_8_BIT_LABEL] + list(soundfonts_dict.keys()),
+                            label="SoundFont / Synthesizer",
+                            value=list(soundfonts_dict.keys())[0] if soundfonts_dict else SYNTH_8_BIT_LABEL,
+                            scale=4 # Give the dropdown more space
+                        )
+                        # The preview button, with a speaker icon for clarity.
+                        preview_sf_button = gr.Button("🔊 Preview", scale=1)
+                    # This audio player is dedicated to playing the preview clips.
+                    # It's not interactive, as it's for output only.
+                    preview_sf_player = gr.Audio(label="SoundFont Preview", interactive=False, show_label=False)
                     render_sample_rate = gr.Radio(
                         ["16000", "32000", "44100"],
                         label="Audio Sample Rate",
             inputs=s8bit_echo_sustain,
             outputs=echo_sustain_settings
         )
+        # --- Event listener for the unified sound source preview button ---
+        preview_sf_button.click(
+            fn=preview_sound_source,
+            inputs=[soundfont_bank] + all_settings_components,
+            outputs=[preview_sf_player]
+        )
     # Launch the Gradio app
     app.queue().launch(inbrowser=True, debug=True)