harmonic-percussive-separation

Sleeping

File size: 4,135 Bytes

from audiotools import AudioSignal
from pyharp.core import ModelCard, build_endpoint
from pyharp.media.audio import load_audio, save_audio
from pyharp.labels import LabelList

import gradio as gr
import librosa
import torch

from pathlib import Path
import time

# ModelCard

model_card = ModelCard(
    name="Harmonic / Percussive Separation",
    description=(
        "Remixes a track into its harmonic and percussive components using Harmonic–Percussive Source Separation (HPSS). \n"
        "Input: A mono or stereo music track (WAV or MP3). \n"
        "Output: A single remixed audio file with adjustable harmonic & percussive balance, based on the selected parameters."
    ),
    author="Hugo Flores Garcia",
    tags=["example", "separator", "hpss"]
)


# Model Logic
def hpss(signal: AudioSignal, **kwargs):
    h, p = librosa.effects.hpss(signal.audio_data.squeeze().numpy(), **kwargs)

    if h.ndim == 1:
        h = h[None, None, :]
        p = p[None, None, :]
    elif h.ndim == 2:
        h = h[None, :, :]
        p = p[None, :, :]
    else:
        raise ValueError("Unexpected audio shape")

    harmonic_signal = signal.clone()
    harmonic_signal.audio_data = torch.from_numpy(h)

    percussive_signal = signal.clone()
    percussive_signal.audio_data = torch.from_numpy(p)

    return harmonic_signal, percussive_signal

MIN_DB = -120

def process_fn(audio_file_path,
               harmonic_db: float, 
               percussive_db: float, 
               kernel_size: int = 31, 
               margin: float = 1.0):
    sig = load_audio(audio_file_path)
    
    harmonic, percussive = hpss(sig, kernel_size=int(kernel_size), margin=margin)

    def clip(db):
        return -float("inf") if db == MIN_DB else db

    # Mix signals with gain
    output_sig = (
        harmonic.volume_change(clip(harmonic_db)) +
        percussive.volume_change(clip(percussive_db))
    )

    #filename
    input_path = Path(audio_file_path)
    stem = input_path.stem 
    filename = f"{stem}_hpss.wav"
    output_audio_path = save_audio(output_sig, filename)
    output_labels = LabelList() # Empty labels

    return output_audio_path, output_labels


# Gradio Components
input_audio = gr.Audio(type="filepath", label="Input Audio").harp_required(True)

harmonic_slider = gr.Slider(
    minimum=MIN_DB, maximum=24, 
    step=1, value=0, 
    label="Harmonic Level (dB)",
    info="Boost or reduce tonal parts like chords and pads."
)

percussive_slider = gr.Slider(
    minimum=MIN_DB, maximum=24, 
    step=1, value=0, 
    label="Percussive Level (dB)",
    info="Control the loudness of drums and other rhythmic hits."
)

kernel_slider = gr.Slider(
    minimum=1, maximum=101, 
    step=1, value=31, 
    label="Time Resolution",
    info="How much time/spectral context is used to classify energy as harmonic or percussive. \n Lower = more detailed, reacts faster to rhythm, higher = smoother and slower"
)

margin_slider = gr.Slider(
    minimum=0.5, maximum=5.0, 
    step=0.1, value=1.0, 
    label="Separation Strength",
    info="How strongly harmonic & percussive components must differ to be separated. \n Higher = cleaner isolation, lower = more natural blend."
)

output_audio = gr.Audio(type="filepath", label="Output Audio")
output_labels = gr.JSON(label="Labels")


# Launch the App 
with gr.Blocks() as demo:
    # Build endpoint inside Blocks context
    endpoint = build_endpoint(
        model_card=model_card,
        input_components=[
            input_audio,
            harmonic_slider,
            percussive_slider,
            kernel_slider,
            margin_slider
        ],
        output_components=[
            output_audio,
            output_labels
        ],
        process_fn=process_fn
    )

    # Layout
    endpoint["controls_button"]
    endpoint["controls_data"]
    input_audio.render()
    harmonic_slider.render()
    percussive_slider.render()
    kernel_slider.render()
    margin_slider.render()
    output_audio.render()
    output_labels.render()
    endpoint["process_button"]
    endpoint["cancel_button"]

demo.queue()
demo.launch(share=True, show_error=True)