from audiotools import AudioSignal from pyharp.core import ModelCard, build_endpoint from pyharp.media.audio import load_audio, save_audio from pyharp.labels import LabelList import gradio as gr import librosa import torch from pathlib import Path import time # ModelCard model_card = ModelCard( name="Harmonic / Percussive Separation", description=( "Remixes a track into its harmonic and percussive components using Harmonic–Percussive Source Separation (HPSS). \n" "Input: A mono or stereo music track (WAV or MP3). \n" "Output: A single remixed audio file with adjustable harmonic & percussive balance, based on the selected parameters." ), author="Hugo Flores Garcia", tags=["example", "separator", "hpss"] ) # Model Logic def hpss(signal: AudioSignal, **kwargs): h, p = librosa.effects.hpss(signal.audio_data.squeeze().numpy(), **kwargs) if h.ndim == 1: h = h[None, None, :] p = p[None, None, :] elif h.ndim == 2: h = h[None, :, :] p = p[None, :, :] else: raise ValueError("Unexpected audio shape") harmonic_signal = signal.clone() harmonic_signal.audio_data = torch.from_numpy(h) percussive_signal = signal.clone() percussive_signal.audio_data = torch.from_numpy(p) return harmonic_signal, percussive_signal MIN_DB = -120 def process_fn(audio_file_path, harmonic_db: float, percussive_db: float, kernel_size: int = 31, margin: float = 1.0): sig = load_audio(audio_file_path) harmonic, percussive = hpss(sig, kernel_size=int(kernel_size), margin=margin) def clip(db): return -float("inf") if db == MIN_DB else db # Mix signals with gain output_sig = ( harmonic.volume_change(clip(harmonic_db)) + percussive.volume_change(clip(percussive_db)) ) #filename input_path = Path(audio_file_path) stem = input_path.stem filename = f"{stem}_hpss.wav" output_audio_path = save_audio(output_sig, filename) output_labels = LabelList() # Empty labels return output_audio_path, output_labels # Gradio Components input_audio = gr.Audio(type="filepath", label="Input Audio").harp_required(True) harmonic_slider = gr.Slider( minimum=MIN_DB, maximum=24, step=1, value=0, label="Harmonic Level (dB)", info="Boost or reduce tonal parts like chords and pads." ) percussive_slider = gr.Slider( minimum=MIN_DB, maximum=24, step=1, value=0, label="Percussive Level (dB)", info="Control the loudness of drums and other rhythmic hits." ) kernel_slider = gr.Slider( minimum=1, maximum=101, step=1, value=31, label="Time Resolution", info="How much time/spectral context is used to classify energy as harmonic or percussive. \n Lower = more detailed, reacts faster to rhythm, higher = smoother and slower" ) margin_slider = gr.Slider( minimum=0.5, maximum=5.0, step=0.1, value=1.0, label="Separation Strength", info="How strongly harmonic & percussive components must differ to be separated. \n Higher = cleaner isolation, lower = more natural blend." ) output_audio = gr.Audio(type="filepath", label="Output Audio") output_labels = gr.JSON(label="Labels") # Launch the App with gr.Blocks() as demo: # Build endpoint inside Blocks context endpoint = build_endpoint( model_card=model_card, input_components=[ input_audio, harmonic_slider, percussive_slider, kernel_slider, margin_slider ], output_components=[ output_audio, output_labels ], process_fn=process_fn ) # Layout endpoint["controls_button"] endpoint["controls_data"] input_audio.render() harmonic_slider.render() percussive_slider.render() kernel_slider.render() margin_slider.render() output_audio.render() output_labels.render() endpoint["process_button"] endpoint["cancel_button"] demo.queue() demo.launch(share=True, show_error=True)