File size: 4,135 Bytes
e0e580e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fb99548
9b667d9
 
fb99548
 
e0e580e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6868e9e
 
 
 
 
 
e0e580e
 
 
 
 
 
 
 
 
 
fb99548
 
e0e580e
 
 
 
 
fb99548
 
e0e580e
 
 
 
 
be9583e
 
e0e580e
 
 
 
 
be9583e
 
e0e580e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
from audiotools import AudioSignal
from pyharp.core import ModelCard, build_endpoint
from pyharp.media.audio import load_audio, save_audio
from pyharp.labels import LabelList

import gradio as gr
import librosa
import torch

from pathlib import Path
import time

# ModelCard

model_card = ModelCard(
    name="Harmonic / Percussive Separation",
    description=(
        "Remixes a track into its harmonic and percussive components using Harmonic–Percussive Source Separation (HPSS). \n"
        "Input: A mono or stereo music track (WAV or MP3). \n"
        "Output: A single remixed audio file with adjustable harmonic & percussive balance, based on the selected parameters."
    ),
    author="Hugo Flores Garcia",
    tags=["example", "separator", "hpss"]
)


# Model Logic
def hpss(signal: AudioSignal, **kwargs):
    h, p = librosa.effects.hpss(signal.audio_data.squeeze().numpy(), **kwargs)

    if h.ndim == 1:
        h = h[None, None, :]
        p = p[None, None, :]
    elif h.ndim == 2:
        h = h[None, :, :]
        p = p[None, :, :]
    else:
        raise ValueError("Unexpected audio shape")

    harmonic_signal = signal.clone()
    harmonic_signal.audio_data = torch.from_numpy(h)

    percussive_signal = signal.clone()
    percussive_signal.audio_data = torch.from_numpy(p)

    return harmonic_signal, percussive_signal

MIN_DB = -120

def process_fn(audio_file_path,
               harmonic_db: float, 
               percussive_db: float, 
               kernel_size: int = 31, 
               margin: float = 1.0):
    sig = load_audio(audio_file_path)
    
    harmonic, percussive = hpss(sig, kernel_size=int(kernel_size), margin=margin)

    def clip(db):
        return -float("inf") if db == MIN_DB else db

    # Mix signals with gain
    output_sig = (
        harmonic.volume_change(clip(harmonic_db)) +
        percussive.volume_change(clip(percussive_db))
    )

    #filename
    input_path = Path(audio_file_path)
    stem = input_path.stem 
    filename = f"{stem}_hpss.wav"
    output_audio_path = save_audio(output_sig, filename)
    output_labels = LabelList() # Empty labels

    return output_audio_path, output_labels


# Gradio Components
input_audio = gr.Audio(type="filepath", label="Input Audio").harp_required(True)

harmonic_slider = gr.Slider(
    minimum=MIN_DB, maximum=24, 
    step=1, value=0, 
    label="Harmonic Level (dB)",
    info="Boost or reduce tonal parts like chords and pads."
)

percussive_slider = gr.Slider(
    minimum=MIN_DB, maximum=24, 
    step=1, value=0, 
    label="Percussive Level (dB)",
    info="Control the loudness of drums and other rhythmic hits."
)

kernel_slider = gr.Slider(
    minimum=1, maximum=101, 
    step=1, value=31, 
    label="Time Resolution",
    info="How much time/spectral context is used to classify energy as harmonic or percussive. \n Lower = more detailed, reacts faster to rhythm, higher = smoother and slower"
)

margin_slider = gr.Slider(
    minimum=0.5, maximum=5.0, 
    step=0.1, value=1.0, 
    label="Separation Strength",
    info="How strongly harmonic & percussive components must differ to be separated. \n Higher = cleaner isolation, lower = more natural blend."
)

output_audio = gr.Audio(type="filepath", label="Output Audio")
output_labels = gr.JSON(label="Labels")


# Launch the App 
with gr.Blocks() as demo:
    # Build endpoint inside Blocks context
    endpoint = build_endpoint(
        model_card=model_card,
        input_components=[
            input_audio,
            harmonic_slider,
            percussive_slider,
            kernel_slider,
            margin_slider
        ],
        output_components=[
            output_audio,
            output_labels
        ],
        process_fn=process_fn
    )

    # Layout
    endpoint["controls_button"]
    endpoint["controls_data"]
    input_audio.render()
    harmonic_slider.render()
    percussive_slider.render()
    kernel_slider.render()
    margin_slider.render()
    output_audio.render()
    output_labels.render()
    endpoint["process_button"]
    endpoint["cancel_button"]

demo.queue()
demo.launch(share=True, show_error=True)