saumya-pailwan's picture
desc fix
9b667d9 verified
from audiotools import AudioSignal
from pyharp.core import ModelCard, build_endpoint
from pyharp.media.audio import load_audio, save_audio
from pyharp.labels import LabelList
import gradio as gr
import librosa
import torch
from pathlib import Path
import time
# ModelCard
model_card = ModelCard(
name="Harmonic / Percussive Separation",
description=(
"Remixes a track into its harmonic and percussive components using Harmonic–Percussive Source Separation (HPSS). \n"
"Input: A mono or stereo music track (WAV or MP3). \n"
"Output: A single remixed audio file with adjustable harmonic & percussive balance, based on the selected parameters."
),
author="Hugo Flores Garcia",
tags=["example", "separator", "hpss"]
)
# Model Logic
def hpss(signal: AudioSignal, **kwargs):
h, p = librosa.effects.hpss(signal.audio_data.squeeze().numpy(), **kwargs)
if h.ndim == 1:
h = h[None, None, :]
p = p[None, None, :]
elif h.ndim == 2:
h = h[None, :, :]
p = p[None, :, :]
else:
raise ValueError("Unexpected audio shape")
harmonic_signal = signal.clone()
harmonic_signal.audio_data = torch.from_numpy(h)
percussive_signal = signal.clone()
percussive_signal.audio_data = torch.from_numpy(p)
return harmonic_signal, percussive_signal
MIN_DB = -120
def process_fn(audio_file_path,
harmonic_db: float,
percussive_db: float,
kernel_size: int = 31,
margin: float = 1.0):
sig = load_audio(audio_file_path)
harmonic, percussive = hpss(sig, kernel_size=int(kernel_size), margin=margin)
def clip(db):
return -float("inf") if db == MIN_DB else db
# Mix signals with gain
output_sig = (
harmonic.volume_change(clip(harmonic_db)) +
percussive.volume_change(clip(percussive_db))
)
#filename
input_path = Path(audio_file_path)
stem = input_path.stem
filename = f"{stem}_hpss.wav"
output_audio_path = save_audio(output_sig, filename)
output_labels = LabelList() # Empty labels
return output_audio_path, output_labels
# Gradio Components
input_audio = gr.Audio(type="filepath", label="Input Audio").harp_required(True)
harmonic_slider = gr.Slider(
minimum=MIN_DB, maximum=24,
step=1, value=0,
label="Harmonic Level (dB)",
info="Boost or reduce tonal parts like chords and pads."
)
percussive_slider = gr.Slider(
minimum=MIN_DB, maximum=24,
step=1, value=0,
label="Percussive Level (dB)",
info="Control the loudness of drums and other rhythmic hits."
)
kernel_slider = gr.Slider(
minimum=1, maximum=101,
step=1, value=31,
label="Time Resolution",
info="How much time/spectral context is used to classify energy as harmonic or percussive. \n Lower = more detailed, reacts faster to rhythm, higher = smoother and slower"
)
margin_slider = gr.Slider(
minimum=0.5, maximum=5.0,
step=0.1, value=1.0,
label="Separation Strength",
info="How strongly harmonic & percussive components must differ to be separated. \n Higher = cleaner isolation, lower = more natural blend."
)
output_audio = gr.Audio(type="filepath", label="Output Audio")
output_labels = gr.JSON(label="Labels")
# Launch the App
with gr.Blocks() as demo:
# Build endpoint inside Blocks context
endpoint = build_endpoint(
model_card=model_card,
input_components=[
input_audio,
harmonic_slider,
percussive_slider,
kernel_slider,
margin_slider
],
output_components=[
output_audio,
output_labels
],
process_fn=process_fn
)
# Layout
endpoint["controls_button"]
endpoint["controls_data"]
input_audio.render()
harmonic_slider.render()
percussive_slider.render()
kernel_slider.render()
margin_slider.render()
output_audio.render()
output_labels.render()
endpoint["process_button"]
endpoint["cancel_button"]
demo.queue()
demo.launch(share=True, show_error=True)