Audio-To-MIDI-And-Advanced-Renderer

Sleeping

avans06 commited on Aug 16, 2025

Commit

0f9efaa

1 Parent(s): b22ce2a

feat(synth): Implement Echo Sustain for plucky envelopes

Adds an "Echo Sustain" effect to the 8-bit synthesizer. This feature fills the silent tail of long notes with decaying pulses when using a plucky envelope, preventing them from sounding choppy or empty.

Files changed (1) hide show

app.py +99 -4

app.py CHANGED Viewed

@@ -148,6 +148,10 @@ class AppParameters:
     s8bit_fm_modulation_depth: float = 0.0
     s8bit_fm_modulation_rate: float = 0.0
     s8bit_adaptive_decay: bool = False
 # =================================================================================================
@@ -382,7 +386,7 @@ def synthesize_8bit_style(*, midi_data: pretty_midi.PrettyMIDI,  fs: int, params
                     envelope[:attack_samples] = np.linspace(0, start_amp, attack_samples)
                     if decay_samples > 0:
                         envelope[attack_samples:attack_samples+decay_samples] = np.linspace(start_amp, 0, decay_samples)
             else: # Sustained
                 envelope = np.linspace(start_amp, 0, num_samples)
@@ -418,6 +422,65 @@ def synthesize_8bit_style(*, midi_data: pretty_midi.PrettyMIDI,  fs: int, params
             # Apply envelope to the (potentially combined) waveform
             note_waveform *= envelope
             start_sample = int(note.start * fs)
             end_sample = start_sample + num_samples
             if end_sample > waveform.shape[1]:
@@ -2290,13 +2353,15 @@ if __name__ == "__main__":
         with gr.Accordion("▶️ Configure Global Settings (for both Single File and Batch)", open=True):
             with gr.Row():
                 with gr.Column(scale=1):
-                    gr.Markdown("### Transcription Settings")
                     # --- Transcription Method Selector ---
                     transcription_method = gr.Radio(["General Purpose", "Piano-Specific"], label="Audio Transcription Method", value="General Purpose",
                             info="Choose 'General Purpose' for most music (vocals, etc.). Choose 'Piano-Specific' only for solo piano recordings.")
                     # --- Stereo Processing Checkbox ---
                     enable_stereo_processing = gr.Checkbox(label="Enable Stereo Transcription", value=False,
-                            info="If checked, left/right audio channels are transcribed separately and merged. Doubles processing time.")
                     # --- Vocal Separation Checkboxes ---
                     with gr.Group():
@@ -2504,7 +2569,7 @@ if __name__ == "__main__":
                             info="Controls vibrato continuity across notes. Low values (0) reset vibrato on each note (bouncy). High values (1) create a smooth, connected 'singing' vibrato."
                         )
                         # --- New accordion for advanced effects ---
-                        with gr.Accordion("Advanced Synthesis & FX", open=False):
                             s8bit_noise_level = gr.Slider(
                                 0.0, 1.0, value=0.0, step=0.05,
                                 label="Noise Level",
@@ -2525,6 +2590,30 @@ if __name__ == "__main__":
                                 label="FM Rate",
                                 info="Frequency Modulation speed. Low values create a slow 'wobble'. High values create fast modulation, resulting in bright, dissonant harmonics."
                             )
         # Create a dictionary mapping key names to the actual Gradio components
         ui_component_map = locals()
@@ -2611,6 +2700,12 @@ if __name__ == "__main__":
             inputs=render_type,
             outputs=render_type_info
         )
     # Launch the Gradio app
     app.queue().launch(inbrowser=True, debug=True)

     s8bit_fm_modulation_depth: float = 0.0
     s8bit_fm_modulation_rate: float = 0.0
     s8bit_adaptive_decay: bool = False
+    s8bit_echo_sustain: bool = False
+    s8bit_echo_rate_hz: float = 5.0
+    s8bit_echo_decay_factor: float = 0.6
+    s8bit_echo_trigger_threshold: float = 2.5
 # =================================================================================================
                     envelope[:attack_samples] = np.linspace(0, start_amp, attack_samples)
                     if decay_samples > 0:
                         envelope[attack_samples:attack_samples+decay_samples] = np.linspace(start_amp, 0, decay_samples)
             else: # Sustained
                 envelope = np.linspace(start_amp, 0, num_samples)
             # Apply envelope to the (potentially combined) waveform
             note_waveform *= envelope
+            # =========================================================================
+            # === Echo Sustain Logic for Long Plucky Notes (Now works correctly) ===
+            # =========================================================================
+            # This feature fills the silent tail of long notes with decaying echoes.
+            # It is applied only for Plucky envelopes and after the main envelope has been applied.
+            if params.s8bit_envelope_type == 'Plucky (AD Envelope)' and params.s8bit_echo_sustain and num_samples > 0:
+                # The duration of the initial pluck is determined by its decay time.
+                initial_pluck_duration_s = params.s8bit_decay_time_s
+                initial_pluck_samples = int(initial_pluck_duration_s * fs)
+                # Check if the note is long enough to even need echoes.
+                if num_samples > initial_pluck_samples * params.s8bit_echo_trigger_threshold: # Only trigger if there's significant empty space.
+                    # Calculate the properties of the echoes.
+                    echo_delay_samples = int(fs / params.s8bit_echo_rate_hz)
+                    if echo_delay_samples > 0: # Prevent infinite loops
+                        echo_amplitude = start_amp * params.s8bit_echo_decay_factor
+                        # Start placing echoes after the first pluck has finished.
+                        current_sample_offset = initial_pluck_samples
+                        while current_sample_offset < num_samples:
+                            # Ensure there's space for a new echo.
+                            if current_sample_offset + echo_delay_samples <= num_samples:
+                                # Create a very short, plucky envelope for the echo.
+                                echo_attack_samples = min(int(0.002 * fs), echo_delay_samples) # 2ms attack
+                                echo_decay_samples = echo_delay_samples - echo_attack_samples
+                                if echo_decay_samples > 0:
+                                    # Create the small echo envelope shape.
+                                    echo_envelope = np.zeros(echo_delay_samples)
+                                    echo_envelope[:echo_attack_samples] = np.linspace(0, echo_amplitude, echo_attack_samples)
+                                    echo_envelope[echo_attack_samples:] = np.linspace(echo_amplitude, 0, echo_decay_samples)
+                                    # Create a temporary waveform for the echo and apply the envelope.
+                                    # It reuses the main note's frequency and oscillator phase.
+                                    # Re-calculating phase here is simpler than tracking, for additive synthesis
+                                    phase_inc_echo = 2 * np.pi * freq / fs
+                                    phase_echo = np.cumsum(np.full(echo_delay_samples, phase_inc_echo))
+                                    if params.s8bit_waveform_type == 'Square':
+                                        echo_waveform_segment = signal.square(phase_echo, duty=params.s8bit_pulse_width)
+                                    elif params.s8bit_waveform_type == 'Sawtooth':
+                                        echo_waveform_segment = signal.sawtooth(phase_echo)
+                                    else: # Triangle
+                                        echo_waveform_segment = signal.sawtooth(phase_echo, width=0.5)
+                                    # Add the enveloped echo on top of the already-enveloped main waveform
+                                    note_waveform[current_sample_offset : current_sample_offset + echo_delay_samples] += echo_waveform_segment * echo_envelope
+                                    # Prepare for the next echo.
+                                    echo_amplitude *= params.s8bit_echo_decay_factor
+                            current_sample_offset += echo_delay_samples
+            # --- END of Echo Sustain Logic ---
             start_sample = int(note.start * fs)
             end_sample = start_sample + num_samples
             if end_sample > waveform.shape[1]:
         with gr.Accordion("▶️ Configure Global Settings (for both Single File and Batch)", open=True):
             with gr.Row():
                 with gr.Column(scale=1):
+                    gr.Markdown("### Transcription Settings\n"
+                        "> _**Note:** This entire section is for audio-to-MIDI conversion. All settings here are ignored if a MIDI file is uploaded._"
+                    )
                     # --- Transcription Method Selector ---
                     transcription_method = gr.Radio(["General Purpose", "Piano-Specific"], label="Audio Transcription Method", value="General Purpose",
                             info="Choose 'General Purpose' for most music (vocals, etc.). Choose 'Piano-Specific' only for solo piano recordings.")
                     # --- Stereo Processing Checkbox ---
                     enable_stereo_processing = gr.Checkbox(label="Enable Stereo Transcription", value=False,
+                            info="For stereo audio files only. When enabled, transcribes left and right channels independently, then merges them. Note: This will double the transcription time.")
                     # --- Vocal Separation Checkboxes ---
                     with gr.Group():
                             info="Controls vibrato continuity across notes. Low values (0) reset vibrato on each note (bouncy). High values (1) create a smooth, connected 'singing' vibrato."
                         )
                         # --- New accordion for advanced effects ---
+                        with gr.Accordion("Advanced Synthesis & FX", open=True):
                             s8bit_noise_level = gr.Slider(
                                 0.0, 1.0, value=0.0, step=0.05,
                                 label="Noise Level",
                                 label="FM Rate",
                                 info="Frequency Modulation speed. Low values create a slow 'wobble'. High values create fast modulation, resulting in bright, dissonant harmonics."
                             )
+                            # This outer group ensures the checkbox and its settings are visually linked.
+                            with gr.Group():
+                                s8bit_echo_sustain = gr.Checkbox(
+                                    value=True, # Default to off as it's a special effect.
+                                    label="Enable Echo Sustain for Long Notes",
+                                    info="For 'Plucky' envelope only. Fills the silent tail of long, sustained notes with quiet, repeating pulses. Fixes 'choppy' sound on long piano notes."
+                                )
+                                # This inner group contains the sliders and is controlled by the checkbox above.
+                                with gr.Group(visible=True) as echo_sustain_settings:
+                                    s8bit_echo_rate_hz = gr.Slider(
+                                        1.0, 20.0, value=5.0, step=0.5,
+                                        label="Echo Rate (Hz)",
+                                        info="How many echoes (pulses) per second. Higher values create a faster, 'tremolo'-like effect."
+                                    )
+                                    s8bit_echo_decay_factor = gr.Slider(
+                                        0.1, 0.95, value=0.45, step=0.05,
+                                        label="Echo Decay Factor",
+                                        info="How quickly the echoes fade. A value of 0.6 means each echo is 60% of the previous one's volume. Lower is faster."
+                                    )
+                                    s8bit_echo_trigger_threshold = gr.Slider(
+                                        1.1, 30.0, value=20, step=0.1,
+                                        label="Echo Trigger Threshold (x Decay Time)",
+                                        info="Controls how long a note must be to trigger echoes. This value is a multiplier of the 'Decay Time'. Example: If 'Decay Time' is 0.1s and this threshold is set to 10.0, only notes longer than 1.0s (0.1 * 10.0) will produce echoes."
+                                    )
         # Create a dictionary mapping key names to the actual Gradio components
         ui_component_map = locals()
             inputs=render_type,
             outputs=render_type_info
         )
+        # --- New event listener for the Echo Sustain UI ---
+        s8bit_echo_sustain.change(
+            fn=lambda x: gr.update(visible=x), # A simple lambda function to update visibility.
+            inputs=s8bit_echo_sustain,
+            outputs=echo_sustain_settings
+        )
     # Launch the Gradio app
     app.queue().launch(inbrowser=True, debug=True)