feat(synth): Add adjustable bass boost to 8-bit synthesizer
Browse filesThis commit introduces a bass enhancement feature to the 8-bit synthesizer, allowing users to add more weight and depth to the sound.
app.py
CHANGED
|
@@ -3,14 +3,14 @@
|
|
| 3 |
# Merged and Integrated Script for Audio/MIDI Processing and Rendering
|
| 4 |
#
|
| 5 |
# This script combines two functionalities:
|
| 6 |
-
# 1. Transcribing audio
|
| 7 |
# a) A general-purpose model (basic-pitch by Spotify).
|
| 8 |
# b) A model specialized for solo piano (ByteDance).
|
| 9 |
# 2. Applying advanced transformations and re-rendering MIDI files using:
|
| 10 |
# a) Standard SoundFonts via FluidSynth.
|
| 11 |
# b) A custom 8-bit style synthesizer for a chiptune sound.
|
| 12 |
#
|
| 13 |
-
# The user can upload a WAV, MP3, or MIDI file.
|
| 14 |
# - If an audio file is uploaded, it is first transcribed to MIDI using the selected method.
|
| 15 |
# - The resulting MIDI (or an uploaded MIDI) can then be processed
|
| 16 |
# with various effects and rendered into audio.
|
|
@@ -160,10 +160,11 @@ def prepare_soundfonts():
|
|
| 160 |
# =================================================================================================
|
| 161 |
# === 8-bit Style Synthesizer ===
|
| 162 |
# =================================================================================================
|
| 163 |
-
def synthesize_8bit_style(midi_data, waveform_type, envelope_type, decay_time_s, pulse_width, vibrato_rate, vibrato_depth, fs=44100):
|
| 164 |
"""
|
| 165 |
Synthesizes an 8-bit style audio waveform from a PrettyMIDI object.
|
| 166 |
This function generates waveforms manually instead of using a synthesizer like FluidSynth.
|
|
|
|
| 167 |
"""
|
| 168 |
total_duration = midi_data.get_end_time()
|
| 169 |
waveform = np.zeros(int(total_duration * fs) + fs)
|
|
@@ -181,7 +182,7 @@ def synthesize_8bit_style(midi_data, waveform_type, envelope_type, decay_time_s,
|
|
| 181 |
# --- Vibrato LFO ---
|
| 182 |
vibrato_lfo = vibrato_depth * np.sin(2 * np.pi * vibrato_rate * t)
|
| 183 |
|
| 184 |
-
# --- Waveform Generation ---
|
| 185 |
if waveform_type == 'Square':
|
| 186 |
note_waveform = signal.square(2 * np.pi * (freq + vibrato_lfo) * t, duty=pulse_width)
|
| 187 |
elif waveform_type == 'Sawtooth':
|
|
@@ -189,6 +190,18 @@ def synthesize_8bit_style(midi_data, waveform_type, envelope_type, decay_time_s,
|
|
| 189 |
elif waveform_type == 'Triangle':
|
| 190 |
note_waveform = signal.sawtooth(2 * np.pi * (freq + vibrato_lfo) * t, width=0.5)
|
| 191 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
# --- ADSR Envelope ---
|
| 193 |
start_amp = note.velocity / 127.0
|
| 194 |
envelope = np.zeros(num_samples)
|
|
@@ -204,6 +217,7 @@ def synthesize_8bit_style(midi_data, waveform_type, envelope_type, decay_time_s,
|
|
| 204 |
elif envelope_type == 'Sustained (Full Decay)' and num_samples > 0:
|
| 205 |
envelope = np.linspace(start_amp, 0, num_samples)
|
| 206 |
|
|
|
|
| 207 |
note_waveform *= envelope
|
| 208 |
|
| 209 |
start_sample = int(note.start * fs)
|
|
@@ -276,7 +290,7 @@ def TranscribePianoAudio(input_file):
|
|
| 276 |
|
| 277 |
def TranscribeGeneralAudio(input_file, onset_thresh, frame_thresh, min_note_len, min_freq, max_freq, infer_onsets_bool, melodia_trick_bool, multiple_bends_bool):
|
| 278 |
"""
|
| 279 |
-
Transcribes a general audio file
|
| 280 |
This is suitable for various instruments and vocals.
|
| 281 |
"""
|
| 282 |
print('=' * 70)
|
|
@@ -332,7 +346,8 @@ def Render_MIDI(input_midi_path,
|
|
| 332 |
render_remove_drums,
|
| 333 |
# --- 8-bit synth params ---
|
| 334 |
s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
|
| 335 |
-
s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth
|
|
|
|
| 336 |
):
|
| 337 |
"""
|
| 338 |
Processes and renders a MIDI file according to user-defined settings.
|
|
@@ -555,6 +570,7 @@ def Render_MIDI(input_midi_path,
|
|
| 555 |
midi_data_for_synth,
|
| 556 |
s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
|
| 557 |
s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth,
|
|
|
|
| 558 |
fs=srate
|
| 559 |
)
|
| 560 |
# Normalize audio
|
|
@@ -619,7 +635,8 @@ def process_and_render_file(input_file,
|
|
| 619 |
render_transpose_value, render_transpose_to_C4, render_output_as_solo_piano, render_remove_drums,
|
| 620 |
# --- 8-bit synth params ---
|
| 621 |
s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
|
| 622 |
-
s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth
|
|
|
|
| 623 |
):
|
| 624 |
"""
|
| 625 |
Main function to handle file processing. It determines the file type and calls the
|
|
@@ -662,7 +679,7 @@ def process_and_render_file(input_file,
|
|
| 662 |
render_with_sustains, merge_misaligned_notes, custom_render_patch, render_align,
|
| 663 |
render_transpose_value, render_transpose_to_C4, render_output_as_solo_piano, render_remove_drums,
|
| 664 |
s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
|
| 665 |
-
s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth)
|
| 666 |
|
| 667 |
print(f'Total processing time: {(reqtime.time() - start_time):.2f} sec')
|
| 668 |
print('*' * 70)
|
|
@@ -705,7 +722,7 @@ if __name__ == "__main__":
|
|
| 705 |
with app:
|
| 706 |
gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>Audio-to-MIDI & Advanced Renderer</h1>")
|
| 707 |
gr.Markdown(
|
| 708 |
-
"**Upload a
|
| 709 |
"This application combines piano audio transcription with a powerful MIDI transformation and rendering toolkit. "
|
| 710 |
"Based on the work of [asigalov61](https://github.com/asigalov61)."
|
| 711 |
)
|
|
@@ -720,7 +737,7 @@ if __name__ == "__main__":
|
|
| 720 |
# type="filepath" ensures the component returns a string path to the uploaded file.
|
| 721 |
# The component will show a player for supported audio types (e.g., WAV, MP3).
|
| 722 |
input_file = gr.Audio(
|
| 723 |
-
label="Input Audio
|
| 724 |
type="filepath",
|
| 725 |
sources=["upload"], waveform_options=waveform_options
|
| 726 |
)
|
|
@@ -780,7 +797,8 @@ if __name__ == "__main__":
|
|
| 780 |
s8bit_pulse_width = gr.Slider(0.01, 0.99, value=0.5, step=0.01, label="Pulse Width")
|
| 781 |
s8bit_vibrato_rate = gr.Slider(0, 20, value=5, label="Vibrato Rate (Hz)")
|
| 782 |
s8bit_vibrato_depth = gr.Slider(0, 50, value=0, label="Vibrato Depth (Hz)")
|
| 783 |
-
|
|
|
|
| 784 |
# --- Original Advanced Options (Now tied to Piano-Specific) ---
|
| 785 |
with gr.Accordion("Advanced MIDI Rendering Options", open=False) as advanced_rendering_options:
|
| 786 |
render_with_sustains = gr.Checkbox(label="Apply sustain pedal effects (if present)", value=True)
|
|
@@ -789,7 +807,7 @@ if __name__ == "__main__":
|
|
| 789 |
render_transpose_to_C4 = gr.Checkbox(label="Transpose entire score to center around C4", value=False)
|
| 790 |
render_transpose_value = gr.Slider(-12, 12, value=0, step=1, label="Transpose (semitones)")
|
| 791 |
custom_render_patch = gr.Slider(-1, 127, value=-1, step=1, label="Force MIDI Patch (-1 to disable)")
|
| 792 |
-
merge_misaligned_notes = gr.Slider(-1, 127, value=-1,
|
| 793 |
render_align = gr.Radio(
|
| 794 |
["Do not align", "Start Times", "Start Times and Durations", "Start Times and Split Durations"],
|
| 795 |
label="Align notes to musical bars",
|
|
@@ -820,7 +838,7 @@ if __name__ == "__main__":
|
|
| 820 |
render_with_sustains, merge_misaligned_notes, custom_render_patch, render_align,
|
| 821 |
render_transpose_value, render_transpose_to_C4, render_output_as_solo_piano, render_remove_drums,
|
| 822 |
s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
|
| 823 |
-
s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth
|
| 824 |
]
|
| 825 |
all_outputs = [
|
| 826 |
output_midi_md5, output_midi_title, output_midi_summary,
|
|
|
|
| 3 |
# Merged and Integrated Script for Audio/MIDI Processing and Rendering
|
| 4 |
#
|
| 5 |
# This script combines two functionalities:
|
| 6 |
+
# 1. Transcribing audio to MIDI using two methods:
|
| 7 |
# a) A general-purpose model (basic-pitch by Spotify).
|
| 8 |
# b) A model specialized for solo piano (ByteDance).
|
| 9 |
# 2. Applying advanced transformations and re-rendering MIDI files using:
|
| 10 |
# a) Standard SoundFonts via FluidSynth.
|
| 11 |
# b) A custom 8-bit style synthesizer for a chiptune sound.
|
| 12 |
#
|
| 13 |
+
# The user can upload a Audio (e.g., WAV, MP3), or MIDI file.
|
| 14 |
# - If an audio file is uploaded, it is first transcribed to MIDI using the selected method.
|
| 15 |
# - The resulting MIDI (or an uploaded MIDI) can then be processed
|
| 16 |
# with various effects and rendered into audio.
|
|
|
|
| 160 |
# =================================================================================================
|
| 161 |
# === 8-bit Style Synthesizer ===
|
| 162 |
# =================================================================================================
|
| 163 |
+
def synthesize_8bit_style(midi_data, waveform_type, envelope_type, decay_time_s, pulse_width, vibrato_rate, vibrato_depth, bass_boost_level, fs=44100):
|
| 164 |
"""
|
| 165 |
Synthesizes an 8-bit style audio waveform from a PrettyMIDI object.
|
| 166 |
This function generates waveforms manually instead of using a synthesizer like FluidSynth.
|
| 167 |
+
Includes an optional sub-octave bass booster with adjustable level.
|
| 168 |
"""
|
| 169 |
total_duration = midi_data.get_end_time()
|
| 170 |
waveform = np.zeros(int(total_duration * fs) + fs)
|
|
|
|
| 182 |
# --- Vibrato LFO ---
|
| 183 |
vibrato_lfo = vibrato_depth * np.sin(2 * np.pi * vibrato_rate * t)
|
| 184 |
|
| 185 |
+
# --- Waveform Generation (Main Oscillator) ---
|
| 186 |
if waveform_type == 'Square':
|
| 187 |
note_waveform = signal.square(2 * np.pi * (freq + vibrato_lfo) * t, duty=pulse_width)
|
| 188 |
elif waveform_type == 'Sawtooth':
|
|
|
|
| 190 |
elif waveform_type == 'Triangle':
|
| 191 |
note_waveform = signal.sawtooth(2 * np.pi * (freq + vibrato_lfo) * t, width=0.5)
|
| 192 |
|
| 193 |
+
# --- Bass Boost (Sub-Octave Oscillator) ---
|
| 194 |
+
if bass_boost_level > 0:
|
| 195 |
+
bass_freq = freq / 2.0
|
| 196 |
+
# Only add bass if the frequency is reasonably audible
|
| 197 |
+
if bass_freq > 20:
|
| 198 |
+
# Bass uses a simple square wave, no vibrato, for stability
|
| 199 |
+
bass_sub_waveform = signal.square(2 * np.pi * bass_freq * t, duty=0.5)
|
| 200 |
+
# Mix the main and bass waveforms.
|
| 201 |
+
# As bass level increases, slightly decrease main waveform volume to prevent clipping.
|
| 202 |
+
main_level = 1.0 - (0.5 * bass_boost_level)
|
| 203 |
+
note_waveform = (note_waveform * main_level) + (bass_sub_waveform * bass_boost_level)
|
| 204 |
+
|
| 205 |
# --- ADSR Envelope ---
|
| 206 |
start_amp = note.velocity / 127.0
|
| 207 |
envelope = np.zeros(num_samples)
|
|
|
|
| 217 |
elif envelope_type == 'Sustained (Full Decay)' and num_samples > 0:
|
| 218 |
envelope = np.linspace(start_amp, 0, num_samples)
|
| 219 |
|
| 220 |
+
# Apply envelope to the (potentially combined) waveform
|
| 221 |
note_waveform *= envelope
|
| 222 |
|
| 223 |
start_sample = int(note.start * fs)
|
|
|
|
| 290 |
|
| 291 |
def TranscribeGeneralAudio(input_file, onset_thresh, frame_thresh, min_note_len, min_freq, max_freq, infer_onsets_bool, melodia_trick_bool, multiple_bends_bool):
|
| 292 |
"""
|
| 293 |
+
Transcribes a general audio file into a MIDI file using basic-pitch.
|
| 294 |
This is suitable for various instruments and vocals.
|
| 295 |
"""
|
| 296 |
print('=' * 70)
|
|
|
|
| 346 |
render_remove_drums,
|
| 347 |
# --- 8-bit synth params ---
|
| 348 |
s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
|
| 349 |
+
s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth,
|
| 350 |
+
s8bit_bass_boost_level
|
| 351 |
):
|
| 352 |
"""
|
| 353 |
Processes and renders a MIDI file according to user-defined settings.
|
|
|
|
| 570 |
midi_data_for_synth,
|
| 571 |
s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
|
| 572 |
s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth,
|
| 573 |
+
s8bit_bass_boost_level,
|
| 574 |
fs=srate
|
| 575 |
)
|
| 576 |
# Normalize audio
|
|
|
|
| 635 |
render_transpose_value, render_transpose_to_C4, render_output_as_solo_piano, render_remove_drums,
|
| 636 |
# --- 8-bit synth params ---
|
| 637 |
s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
|
| 638 |
+
s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth,
|
| 639 |
+
s8bit_bass_boost_level
|
| 640 |
):
|
| 641 |
"""
|
| 642 |
Main function to handle file processing. It determines the file type and calls the
|
|
|
|
| 679 |
render_with_sustains, merge_misaligned_notes, custom_render_patch, render_align,
|
| 680 |
render_transpose_value, render_transpose_to_C4, render_output_as_solo_piano, render_remove_drums,
|
| 681 |
s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
|
| 682 |
+
s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth, s8bit_bass_boost_level)
|
| 683 |
|
| 684 |
print(f'Total processing time: {(reqtime.time() - start_time):.2f} sec')
|
| 685 |
print('*' * 70)
|
|
|
|
| 722 |
with app:
|
| 723 |
gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>Audio-to-MIDI & Advanced Renderer</h1>")
|
| 724 |
gr.Markdown(
|
| 725 |
+
"**Upload a Audio for transcription-then-rendering, or a MIDI for rendering-only.**\n\n"
|
| 726 |
"This application combines piano audio transcription with a powerful MIDI transformation and rendering toolkit. "
|
| 727 |
"Based on the work of [asigalov61](https://github.com/asigalov61)."
|
| 728 |
)
|
|
|
|
| 737 |
# type="filepath" ensures the component returns a string path to the uploaded file.
|
| 738 |
# The component will show a player for supported audio types (e.g., WAV, MP3).
|
| 739 |
input_file = gr.Audio(
|
| 740 |
+
label="Input Audio or MIDI File",
|
| 741 |
type="filepath",
|
| 742 |
sources=["upload"], waveform_options=waveform_options
|
| 743 |
)
|
|
|
|
| 797 |
s8bit_pulse_width = gr.Slider(0.01, 0.99, value=0.5, step=0.01, label="Pulse Width")
|
| 798 |
s8bit_vibrato_rate = gr.Slider(0, 20, value=5, label="Vibrato Rate (Hz)")
|
| 799 |
s8bit_vibrato_depth = gr.Slider(0, 50, value=0, label="Vibrato Depth (Hz)")
|
| 800 |
+
s8bit_bass_boost_level = gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.1, label="Bass Boost Level", info="Adjusts the volume of the sub-octave. 0 is off.")
|
| 801 |
+
|
| 802 |
# --- Original Advanced Options (Now tied to Piano-Specific) ---
|
| 803 |
with gr.Accordion("Advanced MIDI Rendering Options", open=False) as advanced_rendering_options:
|
| 804 |
render_with_sustains = gr.Checkbox(label="Apply sustain pedal effects (if present)", value=True)
|
|
|
|
| 807 |
render_transpose_to_C4 = gr.Checkbox(label="Transpose entire score to center around C4", value=False)
|
| 808 |
render_transpose_value = gr.Slider(-12, 12, value=0, step=1, label="Transpose (semitones)")
|
| 809 |
custom_render_patch = gr.Slider(-1, 127, value=-1, step=1, label="Force MIDI Patch (-1 to disable)")
|
| 810 |
+
merge_misaligned_notes = gr.Slider(-1, 127, value=-1, label="Time to merge notes in ms (-1 to disable)")
|
| 811 |
render_align = gr.Radio(
|
| 812 |
["Do not align", "Start Times", "Start Times and Durations", "Start Times and Split Durations"],
|
| 813 |
label="Align notes to musical bars",
|
|
|
|
| 838 |
render_with_sustains, merge_misaligned_notes, custom_render_patch, render_align,
|
| 839 |
render_transpose_value, render_transpose_to_C4, render_output_as_solo_piano, render_remove_drums,
|
| 840 |
s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
|
| 841 |
+
s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth, s8bit_bass_boost_level
|
| 842 |
]
|
| 843 |
all_outputs = [
|
| 844 |
output_midi_md5, output_midi_title, output_midi_summary,
|