feat(ui): Implement interactive preview for sound sources
Browse filesThis commit introduces a live audio preview system, allowing users to instantly audition their selected SoundFont or 8-bit Synthesizer settings without running a full render. This significantly improves the sound design workflow.
app.py
CHANGED
|
@@ -536,6 +536,134 @@ def analyze_midi_velocity(midi_path):
|
|
| 536 |
print("No notes found in this MIDI.")
|
| 537 |
|
| 538 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 539 |
def scale_instrument_velocity(instrument, scale=0.8):
|
| 540 |
for note in instrument.notes:
|
| 541 |
note.velocity = max(1, min(127, int(note.velocity * scale)))
|
|
@@ -2471,11 +2599,20 @@ if __name__ == "__main__":
|
|
| 2471 |
value=RENDER_TYPE_DESCRIPTIONS["Render as-is"], # Set initial value
|
| 2472 |
elem_classes="description-box" # Optional: for CSS styling
|
| 2473 |
)
|
| 2474 |
-
# --- SoundFont Bank with
|
| 2475 |
-
|
| 2476 |
-
|
| 2477 |
-
|
| 2478 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2479 |
render_sample_rate = gr.Radio(
|
| 2480 |
["16000", "32000", "44100"],
|
| 2481 |
label="Audio Sample Rate",
|
|
@@ -2787,6 +2924,12 @@ if __name__ == "__main__":
|
|
| 2787 |
inputs=s8bit_echo_sustain,
|
| 2788 |
outputs=echo_sustain_settings
|
| 2789 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2790 |
|
| 2791 |
# Launch the Gradio app
|
| 2792 |
app.queue().launch(inbrowser=True, debug=True)
|
|
|
|
| 536 |
print("No notes found in this MIDI.")
|
| 537 |
|
| 538 |
|
| 539 |
+
def preview_sound_source(sound_source_name: str, *args):
|
| 540 |
+
"""
|
| 541 |
+
Generates a short audio preview for either a selected SoundFont or the
|
| 542 |
+
8-bit Synthesizer, using the Super Mario Bros. theme as a test melody.
|
| 543 |
+
|
| 544 |
+
This function acts as a router:
|
| 545 |
+
- If a SoundFont is selected, it uses FluidSynth.
|
| 546 |
+
- If the 8-bit Synthesizer is selected, it uses the internal `synthesize_8bit_style`
|
| 547 |
+
function, capturing the current UI settings for an accurate preview.
|
| 548 |
+
|
| 549 |
+
Args:
|
| 550 |
+
sound_source_name (str): The name of the SoundFont or the 8-bit synth label.
|
| 551 |
+
*args: Captures all current UI settings, which are passed to build an
|
| 552 |
+
AppParameters object for the 8-bit synth preview.
|
| 553 |
+
|
| 554 |
+
Returns:
|
| 555 |
+
A Gradio-compatible audio tuple (sample_rate, numpy_array).
|
| 556 |
+
"""
|
| 557 |
+
srate = 44100 # Use a standard sample rate for all previews.
|
| 558 |
+
|
| 559 |
+
# 1. Create a MIDI object in memory.
|
| 560 |
+
preview_midi = pretty_midi.PrettyMIDI()
|
| 561 |
+
|
| 562 |
+
# Use a lead instrument. Program 81 (Lead 2, sawtooth) is a good, bright default.
|
| 563 |
+
instrument = pretty_midi.Instrument(program=81, is_drum=False, name="Preview Lead")
|
| 564 |
+
|
| 565 |
+
# 2. Define the melody: Super Mario Bros. theme intro
|
| 566 |
+
# - tempo: A brisk 200 BPM, so each 0.15s step is a 16th note.
|
| 567 |
+
# - notes: A list of tuples (pitch, duration_in_steps)
|
| 568 |
+
tempo = 200.0
|
| 569 |
+
time_per_step = 60.0 / tempo / 2 # 16th note duration at this tempo
|
| 570 |
+
|
| 571 |
+
# (Pitch, Duration in steps)
|
| 572 |
+
# MIDI Pitch 60 = C4 (Middle C)
|
| 573 |
+
melody_data = [
|
| 574 |
+
(76, 1), (76, 2), (76, 2), (72, 1), (76, 2), # E E E C E
|
| 575 |
+
(79, 4), (67, 4) # G G(low)
|
| 576 |
+
]
|
| 577 |
+
|
| 578 |
+
current_time = 0.0
|
| 579 |
+
for pitch, duration_steps in melody_data:
|
| 580 |
+
start_time = current_time
|
| 581 |
+
end_time = start_time + (duration_steps * time_per_step)
|
| 582 |
+
|
| 583 |
+
# Add a tiny gap between notes to ensure they re-trigger clearly
|
| 584 |
+
note_end_time = end_time - 0.01
|
| 585 |
+
|
| 586 |
+
note = pretty_midi.Note(
|
| 587 |
+
velocity=120, # Use a high velocity for a bright, clear sound
|
| 588 |
+
pitch=pitch,
|
| 589 |
+
start=start_time,
|
| 590 |
+
end=note_end_time
|
| 591 |
+
)
|
| 592 |
+
instrument.notes.append(note)
|
| 593 |
+
current_time = end_time
|
| 594 |
+
|
| 595 |
+
preview_midi.instruments.append(instrument)
|
| 596 |
+
|
| 597 |
+
# --- ROUTING LOGIC: Decide which synthesizer to use ---
|
| 598 |
+
|
| 599 |
+
# CASE 1: 8-bit Synthesizer Preview
|
| 600 |
+
if sound_source_name == SYNTH_8_BIT_LABEL:
|
| 601 |
+
print("Generating preview for: 8-bit Synthesizer")
|
| 602 |
+
try:
|
| 603 |
+
# Create a temporary AppParameters object from the current UI settings
|
| 604 |
+
params = AppParameters(**dict(zip(ALL_PARAM_KEYS, args)))
|
| 605 |
+
|
| 606 |
+
# Use the internal synthesizer to render the preview MIDI
|
| 607 |
+
audio_waveform = synthesize_8bit_style(midi_data=preview_midi, fs=srate, params=params)
|
| 608 |
+
|
| 609 |
+
# Normalize and prepare for Gradio
|
| 610 |
+
peak_val = np.max(np.abs(audio_waveform))
|
| 611 |
+
if peak_val > 0:
|
| 612 |
+
audio_waveform /= peak_val
|
| 613 |
+
|
| 614 |
+
# The synth returns (channels, samples), Gradio needs (samples, channels)
|
| 615 |
+
audio_out = (audio_waveform.T * 32767).astype(np.int16)
|
| 616 |
+
|
| 617 |
+
print("8-bit preview generated successfully.")
|
| 618 |
+
return (srate, audio_out)
|
| 619 |
+
|
| 620 |
+
except Exception as e:
|
| 621 |
+
print(f"An error occurred during 8-bit preview generation: {e}")
|
| 622 |
+
return None
|
| 623 |
+
|
| 624 |
+
# CASE 2: SoundFont Preview
|
| 625 |
+
else:
|
| 626 |
+
soundfont_path = soundfonts_dict.get(sound_source_name)
|
| 627 |
+
if not soundfont_path or not os.path.exists(soundfont_path):
|
| 628 |
+
print(f"Preview failed: SoundFont file not found at '{soundfont_path}'")
|
| 629 |
+
raise gr.Error(f"Could not find the SoundFont file for '{sound_source_name}'.")
|
| 630 |
+
|
| 631 |
+
try:
|
| 632 |
+
print(f"Generating preview for: {sound_source_name}")
|
| 633 |
+
# Convert the in-memory MIDI object to a binary stream.
|
| 634 |
+
midi_io = io.BytesIO()
|
| 635 |
+
preview_midi.write(midi_io)
|
| 636 |
+
midi_data = midi_io.getvalue()
|
| 637 |
+
|
| 638 |
+
# Use the existing rendering function to generate the audio.
|
| 639 |
+
# Ensure the output is a tuple (sample_rate, numpy_array)
|
| 640 |
+
audio_out = midi_to_colab_audio(
|
| 641 |
+
midi_data,
|
| 642 |
+
soundfont_path=soundfont_path,
|
| 643 |
+
sample_rate=srate,
|
| 644 |
+
output_for_gradio=True
|
| 645 |
+
)
|
| 646 |
+
|
| 647 |
+
# Ensure the returned value is exactly what Gradio expects.
|
| 648 |
+
# The function `midi_to_colab_audio` should return a NumPy array.
|
| 649 |
+
# We must wrap it in a tuple with the sample rate.
|
| 650 |
+
if isinstance(audio_out, np.ndarray):
|
| 651 |
+
print("SoundFont preview generated successfully.")
|
| 652 |
+
return (srate, audio_out)
|
| 653 |
+
else:
|
| 654 |
+
# If the rendering function fails, it might return something else.
|
| 655 |
+
# We handle this to prevent the Gradio error.
|
| 656 |
+
print("Preview failed: Rendering function did not return valid audio data.")
|
| 657 |
+
return None
|
| 658 |
+
|
| 659 |
+
except Exception as e:
|
| 660 |
+
# Catch any other errors, including from FluidSynth, and report them.
|
| 661 |
+
print(f"An error occurred during SoundFont preview generation: {e}")
|
| 662 |
+
# It's better to return None than to crash the UI.
|
| 663 |
+
# The error will be visible in the console.
|
| 664 |
+
return None
|
| 665 |
+
|
| 666 |
+
|
| 667 |
def scale_instrument_velocity(instrument, scale=0.8):
|
| 668 |
for note in instrument.notes:
|
| 669 |
note.velocity = max(1, min(127, int(note.velocity * scale)))
|
|
|
|
| 2599 |
value=RENDER_TYPE_DESCRIPTIONS["Render as-is"], # Set initial value
|
| 2600 |
elem_classes="description-box" # Optional: for CSS styling
|
| 2601 |
)
|
| 2602 |
+
# --- SoundFont Bank with Preview Button ---
|
| 2603 |
+
with gr.Row(elem_id="soundfont_selector_row"):
|
| 2604 |
+
soundfont_bank = gr.Dropdown(
|
| 2605 |
+
[SYNTH_8_BIT_LABEL] + list(soundfonts_dict.keys()),
|
| 2606 |
+
label="SoundFont / Synthesizer",
|
| 2607 |
+
value=list(soundfonts_dict.keys())[0] if soundfonts_dict else SYNTH_8_BIT_LABEL,
|
| 2608 |
+
scale=4 # Give the dropdown more space
|
| 2609 |
+
)
|
| 2610 |
+
# The preview button, with a speaker icon for clarity.
|
| 2611 |
+
preview_sf_button = gr.Button("🔊 Preview", scale=1)
|
| 2612 |
+
|
| 2613 |
+
# This audio player is dedicated to playing the preview clips.
|
| 2614 |
+
# It's not interactive, as it's for output only.
|
| 2615 |
+
preview_sf_player = gr.Audio(label="SoundFont Preview", interactive=False, show_label=False)
|
| 2616 |
render_sample_rate = gr.Radio(
|
| 2617 |
["16000", "32000", "44100"],
|
| 2618 |
label="Audio Sample Rate",
|
|
|
|
| 2924 |
inputs=s8bit_echo_sustain,
|
| 2925 |
outputs=echo_sustain_settings
|
| 2926 |
)
|
| 2927 |
+
# --- Event listener for the unified sound source preview button ---
|
| 2928 |
+
preview_sf_button.click(
|
| 2929 |
+
fn=preview_sound_source,
|
| 2930 |
+
inputs=[soundfont_bank] + all_settings_components,
|
| 2931 |
+
outputs=[preview_sf_player]
|
| 2932 |
+
)
|
| 2933 |
|
| 2934 |
# Launch the Gradio app
|
| 2935 |
app.queue().launch(inbrowser=True, debug=True)
|