feat(synth): Implement Echo Sustain for plucky envelopes
Browse filesAdds an "Echo Sustain" effect to the 8-bit synthesizer. This feature fills the silent tail of long notes with decaying pulses when using a plucky envelope, preventing them from sounding choppy or empty.
app.py
CHANGED
|
@@ -148,6 +148,10 @@ class AppParameters:
|
|
| 148 |
s8bit_fm_modulation_depth: float = 0.0
|
| 149 |
s8bit_fm_modulation_rate: float = 0.0
|
| 150 |
s8bit_adaptive_decay: bool = False
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
|
| 152 |
|
| 153 |
# =================================================================================================
|
|
@@ -382,7 +386,7 @@ def synthesize_8bit_style(*, midi_data: pretty_midi.PrettyMIDI, fs: int, params
|
|
| 382 |
envelope[:attack_samples] = np.linspace(0, start_amp, attack_samples)
|
| 383 |
if decay_samples > 0:
|
| 384 |
envelope[attack_samples:attack_samples+decay_samples] = np.linspace(start_amp, 0, decay_samples)
|
| 385 |
-
|
| 386 |
else: # Sustained
|
| 387 |
envelope = np.linspace(start_amp, 0, num_samples)
|
| 388 |
|
|
@@ -418,6 +422,65 @@ def synthesize_8bit_style(*, midi_data: pretty_midi.PrettyMIDI, fs: int, params
|
|
| 418 |
# Apply envelope to the (potentially combined) waveform
|
| 419 |
note_waveform *= envelope
|
| 420 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 421 |
start_sample = int(note.start * fs)
|
| 422 |
end_sample = start_sample + num_samples
|
| 423 |
if end_sample > waveform.shape[1]:
|
|
@@ -2290,13 +2353,15 @@ if __name__ == "__main__":
|
|
| 2290 |
with gr.Accordion("▶️ Configure Global Settings (for both Single File and Batch)", open=True):
|
| 2291 |
with gr.Row():
|
| 2292 |
with gr.Column(scale=1):
|
| 2293 |
-
gr.Markdown("### Transcription Settings"
|
|
|
|
|
|
|
| 2294 |
# --- Transcription Method Selector ---
|
| 2295 |
transcription_method = gr.Radio(["General Purpose", "Piano-Specific"], label="Audio Transcription Method", value="General Purpose",
|
| 2296 |
info="Choose 'General Purpose' for most music (vocals, etc.). Choose 'Piano-Specific' only for solo piano recordings.")
|
| 2297 |
# --- Stereo Processing Checkbox ---
|
| 2298 |
enable_stereo_processing = gr.Checkbox(label="Enable Stereo Transcription", value=False,
|
| 2299 |
-
info="
|
| 2300 |
|
| 2301 |
# --- Vocal Separation Checkboxes ---
|
| 2302 |
with gr.Group():
|
|
@@ -2504,7 +2569,7 @@ if __name__ == "__main__":
|
|
| 2504 |
info="Controls vibrato continuity across notes. Low values (0) reset vibrato on each note (bouncy). High values (1) create a smooth, connected 'singing' vibrato."
|
| 2505 |
)
|
| 2506 |
# --- New accordion for advanced effects ---
|
| 2507 |
-
with gr.Accordion("Advanced Synthesis & FX", open=
|
| 2508 |
s8bit_noise_level = gr.Slider(
|
| 2509 |
0.0, 1.0, value=0.0, step=0.05,
|
| 2510 |
label="Noise Level",
|
|
@@ -2525,6 +2590,30 @@ if __name__ == "__main__":
|
|
| 2525 |
label="FM Rate",
|
| 2526 |
info="Frequency Modulation speed. Low values create a slow 'wobble'. High values create fast modulation, resulting in bright, dissonant harmonics."
|
| 2527 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2528 |
|
| 2529 |
# Create a dictionary mapping key names to the actual Gradio components
|
| 2530 |
ui_component_map = locals()
|
|
@@ -2611,6 +2700,12 @@ if __name__ == "__main__":
|
|
| 2611 |
inputs=render_type,
|
| 2612 |
outputs=render_type_info
|
| 2613 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2614 |
|
| 2615 |
# Launch the Gradio app
|
| 2616 |
app.queue().launch(inbrowser=True, debug=True)
|
|
|
|
| 148 |
s8bit_fm_modulation_depth: float = 0.0
|
| 149 |
s8bit_fm_modulation_rate: float = 0.0
|
| 150 |
s8bit_adaptive_decay: bool = False
|
| 151 |
+
s8bit_echo_sustain: bool = False
|
| 152 |
+
s8bit_echo_rate_hz: float = 5.0
|
| 153 |
+
s8bit_echo_decay_factor: float = 0.6
|
| 154 |
+
s8bit_echo_trigger_threshold: float = 2.5
|
| 155 |
|
| 156 |
|
| 157 |
# =================================================================================================
|
|
|
|
| 386 |
envelope[:attack_samples] = np.linspace(0, start_amp, attack_samples)
|
| 387 |
if decay_samples > 0:
|
| 388 |
envelope[attack_samples:attack_samples+decay_samples] = np.linspace(start_amp, 0, decay_samples)
|
| 389 |
+
|
| 390 |
else: # Sustained
|
| 391 |
envelope = np.linspace(start_amp, 0, num_samples)
|
| 392 |
|
|
|
|
| 422 |
# Apply envelope to the (potentially combined) waveform
|
| 423 |
note_waveform *= envelope
|
| 424 |
|
| 425 |
+
# =========================================================================
|
| 426 |
+
# === Echo Sustain Logic for Long Plucky Notes (Now works correctly) ===
|
| 427 |
+
# =========================================================================
|
| 428 |
+
# This feature fills the silent tail of long notes with decaying echoes.
|
| 429 |
+
# It is applied only for Plucky envelopes and after the main envelope has been applied.
|
| 430 |
+
if params.s8bit_envelope_type == 'Plucky (AD Envelope)' and params.s8bit_echo_sustain and num_samples > 0:
|
| 431 |
+
|
| 432 |
+
# The duration of the initial pluck is determined by its decay time.
|
| 433 |
+
initial_pluck_duration_s = params.s8bit_decay_time_s
|
| 434 |
+
initial_pluck_samples = int(initial_pluck_duration_s * fs)
|
| 435 |
+
|
| 436 |
+
# Check if the note is long enough to even need echoes.
|
| 437 |
+
if num_samples > initial_pluck_samples * params.s8bit_echo_trigger_threshold: # Only trigger if there's significant empty space.
|
| 438 |
+
|
| 439 |
+
# Calculate the properties of the echoes.
|
| 440 |
+
echo_delay_samples = int(fs / params.s8bit_echo_rate_hz)
|
| 441 |
+
if echo_delay_samples > 0: # Prevent infinite loops
|
| 442 |
+
echo_amplitude = start_amp * params.s8bit_echo_decay_factor
|
| 443 |
+
|
| 444 |
+
# Start placing echoes after the first pluck has finished.
|
| 445 |
+
current_sample_offset = initial_pluck_samples
|
| 446 |
+
|
| 447 |
+
while current_sample_offset < num_samples:
|
| 448 |
+
# Ensure there's space for a new echo.
|
| 449 |
+
if current_sample_offset + echo_delay_samples <= num_samples:
|
| 450 |
+
|
| 451 |
+
# Create a very short, plucky envelope for the echo.
|
| 452 |
+
echo_attack_samples = min(int(0.002 * fs), echo_delay_samples) # 2ms attack
|
| 453 |
+
echo_decay_samples = echo_delay_samples - echo_attack_samples
|
| 454 |
+
|
| 455 |
+
if echo_decay_samples > 0:
|
| 456 |
+
# Create the small echo envelope shape.
|
| 457 |
+
echo_envelope = np.zeros(echo_delay_samples)
|
| 458 |
+
echo_envelope[:echo_attack_samples] = np.linspace(0, echo_amplitude, echo_attack_samples)
|
| 459 |
+
echo_envelope[echo_attack_samples:] = np.linspace(echo_amplitude, 0, echo_decay_samples)
|
| 460 |
+
|
| 461 |
+
# Create a temporary waveform for the echo and apply the envelope.
|
| 462 |
+
# It reuses the main note's frequency and oscillator phase.
|
| 463 |
+
# Re-calculating phase here is simpler than tracking, for additive synthesis
|
| 464 |
+
phase_inc_echo = 2 * np.pi * freq / fs
|
| 465 |
+
phase_echo = np.cumsum(np.full(echo_delay_samples, phase_inc_echo))
|
| 466 |
+
|
| 467 |
+
if params.s8bit_waveform_type == 'Square':
|
| 468 |
+
echo_waveform_segment = signal.square(phase_echo, duty=params.s8bit_pulse_width)
|
| 469 |
+
elif params.s8bit_waveform_type == 'Sawtooth':
|
| 470 |
+
echo_waveform_segment = signal.sawtooth(phase_echo)
|
| 471 |
+
else: # Triangle
|
| 472 |
+
echo_waveform_segment = signal.sawtooth(phase_echo, width=0.5)
|
| 473 |
+
|
| 474 |
+
# Add the enveloped echo on top of the already-enveloped main waveform
|
| 475 |
+
note_waveform[current_sample_offset : current_sample_offset + echo_delay_samples] += echo_waveform_segment * echo_envelope
|
| 476 |
+
|
| 477 |
+
# Prepare for the next echo.
|
| 478 |
+
echo_amplitude *= params.s8bit_echo_decay_factor
|
| 479 |
+
|
| 480 |
+
current_sample_offset += echo_delay_samples
|
| 481 |
+
# --- END of Echo Sustain Logic ---
|
| 482 |
+
|
| 483 |
+
|
| 484 |
start_sample = int(note.start * fs)
|
| 485 |
end_sample = start_sample + num_samples
|
| 486 |
if end_sample > waveform.shape[1]:
|
|
|
|
| 2353 |
with gr.Accordion("▶️ Configure Global Settings (for both Single File and Batch)", open=True):
|
| 2354 |
with gr.Row():
|
| 2355 |
with gr.Column(scale=1):
|
| 2356 |
+
gr.Markdown("### Transcription Settings\n"
|
| 2357 |
+
"> _**Note:** This entire section is for audio-to-MIDI conversion. All settings here are ignored if a MIDI file is uploaded._"
|
| 2358 |
+
)
|
| 2359 |
# --- Transcription Method Selector ---
|
| 2360 |
transcription_method = gr.Radio(["General Purpose", "Piano-Specific"], label="Audio Transcription Method", value="General Purpose",
|
| 2361 |
info="Choose 'General Purpose' for most music (vocals, etc.). Choose 'Piano-Specific' only for solo piano recordings.")
|
| 2362 |
# --- Stereo Processing Checkbox ---
|
| 2363 |
enable_stereo_processing = gr.Checkbox(label="Enable Stereo Transcription", value=False,
|
| 2364 |
+
info="For stereo audio files only. When enabled, transcribes left and right channels independently, then merges them. Note: This will double the transcription time.")
|
| 2365 |
|
| 2366 |
# --- Vocal Separation Checkboxes ---
|
| 2367 |
with gr.Group():
|
|
|
|
| 2569 |
info="Controls vibrato continuity across notes. Low values (0) reset vibrato on each note (bouncy). High values (1) create a smooth, connected 'singing' vibrato."
|
| 2570 |
)
|
| 2571 |
# --- New accordion for advanced effects ---
|
| 2572 |
+
with gr.Accordion("Advanced Synthesis & FX", open=True):
|
| 2573 |
s8bit_noise_level = gr.Slider(
|
| 2574 |
0.0, 1.0, value=0.0, step=0.05,
|
| 2575 |
label="Noise Level",
|
|
|
|
| 2590 |
label="FM Rate",
|
| 2591 |
info="Frequency Modulation speed. Low values create a slow 'wobble'. High values create fast modulation, resulting in bright, dissonant harmonics."
|
| 2592 |
)
|
| 2593 |
+
# This outer group ensures the checkbox and its settings are visually linked.
|
| 2594 |
+
with gr.Group():
|
| 2595 |
+
s8bit_echo_sustain = gr.Checkbox(
|
| 2596 |
+
value=True, # Default to off as it's a special effect.
|
| 2597 |
+
label="Enable Echo Sustain for Long Notes",
|
| 2598 |
+
info="For 'Plucky' envelope only. Fills the silent tail of long, sustained notes with quiet, repeating pulses. Fixes 'choppy' sound on long piano notes."
|
| 2599 |
+
)
|
| 2600 |
+
# This inner group contains the sliders and is controlled by the checkbox above.
|
| 2601 |
+
with gr.Group(visible=True) as echo_sustain_settings:
|
| 2602 |
+
s8bit_echo_rate_hz = gr.Slider(
|
| 2603 |
+
1.0, 20.0, value=5.0, step=0.5,
|
| 2604 |
+
label="Echo Rate (Hz)",
|
| 2605 |
+
info="How many echoes (pulses) per second. Higher values create a faster, 'tremolo'-like effect."
|
| 2606 |
+
)
|
| 2607 |
+
s8bit_echo_decay_factor = gr.Slider(
|
| 2608 |
+
0.1, 0.95, value=0.45, step=0.05,
|
| 2609 |
+
label="Echo Decay Factor",
|
| 2610 |
+
info="How quickly the echoes fade. A value of 0.6 means each echo is 60% of the previous one's volume. Lower is faster."
|
| 2611 |
+
)
|
| 2612 |
+
s8bit_echo_trigger_threshold = gr.Slider(
|
| 2613 |
+
1.1, 30.0, value=20, step=0.1,
|
| 2614 |
+
label="Echo Trigger Threshold (x Decay Time)",
|
| 2615 |
+
info="Controls how long a note must be to trigger echoes. This value is a multiplier of the 'Decay Time'. Example: If 'Decay Time' is 0.1s and this threshold is set to 10.0, only notes longer than 1.0s (0.1 * 10.0) will produce echoes."
|
| 2616 |
+
)
|
| 2617 |
|
| 2618 |
# Create a dictionary mapping key names to the actual Gradio components
|
| 2619 |
ui_component_map = locals()
|
|
|
|
| 2700 |
inputs=render_type,
|
| 2701 |
outputs=render_type_info
|
| 2702 |
)
|
| 2703 |
+
# --- New event listener for the Echo Sustain UI ---
|
| 2704 |
+
s8bit_echo_sustain.change(
|
| 2705 |
+
fn=lambda x: gr.update(visible=x), # A simple lambda function to update visibility.
|
| 2706 |
+
inputs=s8bit_echo_sustain,
|
| 2707 |
+
outputs=echo_sustain_settings
|
| 2708 |
+
)
|
| 2709 |
|
| 2710 |
# Launch the Gradio app
|
| 2711 |
app.queue().launch(inbrowser=True, debug=True)
|