harmonic-percussive-separation

Sleeping

App Files Files Community

harmonic-percussive-separation / app.py

saumya-pailwan

desc fix

9b667d9 verified 4 months ago

raw

history blame contribute delete

4.14 kB

	from audiotools import AudioSignal
	from pyharp.core import ModelCard, build_endpoint
	from pyharp.media.audio import load_audio, save_audio
	from pyharp.labels import LabelList

	import gradio as gr
	import librosa
	import torch

	from pathlib import Path
	import time

	# ModelCard

	model_card = ModelCard(
	name="Harmonic / Percussive Separation",
	description=(
	"Remixes a track into its harmonic and percussive components using Harmonic–Percussive Source Separation (HPSS). \n"
	"Input: A mono or stereo music track (WAV or MP3). \n"
	"Output: A single remixed audio file with adjustable harmonic & percussive balance, based on the selected parameters."
	),
	author="Hugo Flores Garcia",
	tags=["example", "separator", "hpss"]
	)


	# Model Logic
	def hpss(signal: AudioSignal, **kwargs):
	h, p = librosa.effects.hpss(signal.audio_data.squeeze().numpy(), **kwargs)

	if h.ndim == 1:
	h = h[None, None, :]
	p = p[None, None, :]
	elif h.ndim == 2:
	h = h[None, :, :]
	p = p[None, :, :]
	else:
	raise ValueError("Unexpected audio shape")

	harmonic_signal = signal.clone()
	harmonic_signal.audio_data = torch.from_numpy(h)

	percussive_signal = signal.clone()
	percussive_signal.audio_data = torch.from_numpy(p)

	return harmonic_signal, percussive_signal

	MIN_DB = -120

	def process_fn(audio_file_path,
	harmonic_db: float,
	percussive_db: float,
	kernel_size: int = 31,
	margin: float = 1.0):
	sig = load_audio(audio_file_path)

	harmonic, percussive = hpss(sig, kernel_size=int(kernel_size), margin=margin)

	def clip(db):
	return -float("inf") if db == MIN_DB else db

	# Mix signals with gain
	output_sig = (
	harmonic.volume_change(clip(harmonic_db)) +
	percussive.volume_change(clip(percussive_db))
	)

	#filename
	input_path = Path(audio_file_path)
	stem = input_path.stem
	filename = f"{stem}_hpss.wav"
	output_audio_path = save_audio(output_sig, filename)
	output_labels = LabelList() # Empty labels

	return output_audio_path, output_labels


	# Gradio Components
	input_audio = gr.Audio(type="filepath", label="Input Audio").harp_required(True)

	harmonic_slider = gr.Slider(
	minimum=MIN_DB, maximum=24,
	step=1, value=0,
	label="Harmonic Level (dB)",
	info="Boost or reduce tonal parts like chords and pads."
	)

	percussive_slider = gr.Slider(
	minimum=MIN_DB, maximum=24,
	step=1, value=0,
	label="Percussive Level (dB)",
	info="Control the loudness of drums and other rhythmic hits."
	)

	kernel_slider = gr.Slider(
	minimum=1, maximum=101,
	step=1, value=31,
	label="Time Resolution",
	info="How much time/spectral context is used to classify energy as harmonic or percussive. \n Lower = more detailed, reacts faster to rhythm, higher = smoother and slower"
	)

	margin_slider = gr.Slider(
	minimum=0.5, maximum=5.0,
	step=0.1, value=1.0,
	label="Separation Strength",
	info="How strongly harmonic & percussive components must differ to be separated. \n Higher = cleaner isolation, lower = more natural blend."
	)

	output_audio = gr.Audio(type="filepath", label="Output Audio")
	output_labels = gr.JSON(label="Labels")


	# Launch the App
	with gr.Blocks() as demo:
	# Build endpoint inside Blocks context
	endpoint = build_endpoint(
	model_card=model_card,
	input_components=[
	input_audio,
	harmonic_slider,
	percussive_slider,
	kernel_slider,
	margin_slider
	],
	output_components=[
	output_audio,
	output_labels
	],
	process_fn=process_fn
	)

	# Layout
	endpoint["controls_button"]
	endpoint["controls_data"]
	input_audio.render()
	harmonic_slider.render()
	percussive_slider.render()
	kernel_slider.render()
	margin_slider.render()
	output_audio.render()
	output_labels.render()
	endpoint["process_button"]
	endpoint["cancel_button"]

	demo.queue()
	demo.launch(share=True, show_error=True)