|
|
from audiotools import AudioSignal |
|
|
from pyharp.core import ModelCard, build_endpoint |
|
|
from pyharp.media.audio import load_audio, save_audio |
|
|
from pyharp.labels import LabelList |
|
|
|
|
|
import gradio as gr |
|
|
import librosa |
|
|
import torch |
|
|
|
|
|
from pathlib import Path |
|
|
import time |
|
|
|
|
|
|
|
|
|
|
|
model_card = ModelCard( |
|
|
name="Harmonic / Percussive Separation", |
|
|
description=( |
|
|
"Remixes a track into its harmonic and percussive components using Harmonic–Percussive Source Separation (HPSS). \n" |
|
|
"Input: A mono or stereo music track (WAV or MP3). \n" |
|
|
"Output: A single remixed audio file with adjustable harmonic & percussive balance, based on the selected parameters." |
|
|
), |
|
|
author="Hugo Flores Garcia", |
|
|
tags=["example", "separator", "hpss"] |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
def hpss(signal: AudioSignal, **kwargs): |
|
|
h, p = librosa.effects.hpss(signal.audio_data.squeeze().numpy(), **kwargs) |
|
|
|
|
|
if h.ndim == 1: |
|
|
h = h[None, None, :] |
|
|
p = p[None, None, :] |
|
|
elif h.ndim == 2: |
|
|
h = h[None, :, :] |
|
|
p = p[None, :, :] |
|
|
else: |
|
|
raise ValueError("Unexpected audio shape") |
|
|
|
|
|
harmonic_signal = signal.clone() |
|
|
harmonic_signal.audio_data = torch.from_numpy(h) |
|
|
|
|
|
percussive_signal = signal.clone() |
|
|
percussive_signal.audio_data = torch.from_numpy(p) |
|
|
|
|
|
return harmonic_signal, percussive_signal |
|
|
|
|
|
MIN_DB = -120 |
|
|
|
|
|
def process_fn(audio_file_path, |
|
|
harmonic_db: float, |
|
|
percussive_db: float, |
|
|
kernel_size: int = 31, |
|
|
margin: float = 1.0): |
|
|
sig = load_audio(audio_file_path) |
|
|
|
|
|
harmonic, percussive = hpss(sig, kernel_size=int(kernel_size), margin=margin) |
|
|
|
|
|
def clip(db): |
|
|
return -float("inf") if db == MIN_DB else db |
|
|
|
|
|
|
|
|
output_sig = ( |
|
|
harmonic.volume_change(clip(harmonic_db)) + |
|
|
percussive.volume_change(clip(percussive_db)) |
|
|
) |
|
|
|
|
|
|
|
|
input_path = Path(audio_file_path) |
|
|
stem = input_path.stem |
|
|
filename = f"{stem}_hpss.wav" |
|
|
output_audio_path = save_audio(output_sig, filename) |
|
|
output_labels = LabelList() |
|
|
|
|
|
return output_audio_path, output_labels |
|
|
|
|
|
|
|
|
|
|
|
input_audio = gr.Audio(type="filepath", label="Input Audio").harp_required(True) |
|
|
|
|
|
harmonic_slider = gr.Slider( |
|
|
minimum=MIN_DB, maximum=24, |
|
|
step=1, value=0, |
|
|
label="Harmonic Level (dB)", |
|
|
info="Boost or reduce tonal parts like chords and pads." |
|
|
) |
|
|
|
|
|
percussive_slider = gr.Slider( |
|
|
minimum=MIN_DB, maximum=24, |
|
|
step=1, value=0, |
|
|
label="Percussive Level (dB)", |
|
|
info="Control the loudness of drums and other rhythmic hits." |
|
|
) |
|
|
|
|
|
kernel_slider = gr.Slider( |
|
|
minimum=1, maximum=101, |
|
|
step=1, value=31, |
|
|
label="Time Resolution", |
|
|
info="How much time/spectral context is used to classify energy as harmonic or percussive. \n Lower = more detailed, reacts faster to rhythm, higher = smoother and slower" |
|
|
) |
|
|
|
|
|
margin_slider = gr.Slider( |
|
|
minimum=0.5, maximum=5.0, |
|
|
step=0.1, value=1.0, |
|
|
label="Separation Strength", |
|
|
info="How strongly harmonic & percussive components must differ to be separated. \n Higher = cleaner isolation, lower = more natural blend." |
|
|
) |
|
|
|
|
|
output_audio = gr.Audio(type="filepath", label="Output Audio") |
|
|
output_labels = gr.JSON(label="Labels") |
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
|
|
|
endpoint = build_endpoint( |
|
|
model_card=model_card, |
|
|
input_components=[ |
|
|
input_audio, |
|
|
harmonic_slider, |
|
|
percussive_slider, |
|
|
kernel_slider, |
|
|
margin_slider |
|
|
], |
|
|
output_components=[ |
|
|
output_audio, |
|
|
output_labels |
|
|
], |
|
|
process_fn=process_fn |
|
|
) |
|
|
|
|
|
|
|
|
endpoint["controls_button"] |
|
|
endpoint["controls_data"] |
|
|
input_audio.render() |
|
|
harmonic_slider.render() |
|
|
percussive_slider.render() |
|
|
kernel_slider.render() |
|
|
margin_slider.render() |
|
|
output_audio.render() |
|
|
output_labels.render() |
|
|
endpoint["process_button"] |
|
|
endpoint["cancel_button"] |
|
|
|
|
|
demo.queue() |
|
|
demo.launch(share=True, show_error=True) |
|
|
|