File size: 1,907 Bytes
6a30923
 
 
 
6fa576f
6a30923
 
6fa576f
6a30923
 
 
 
 
 
 
 
 
 
 
 
 
6fa576f
 
6a30923
 
 
 
6fa576f
 
 
 
 
 
6a30923
 
 
 
af9fe9b
6a30923
 
 
 
 
 
23ab25b
6a30923
6fa576f
6a30923
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import gradio as gr
import numpy as np
from spleeter.separator import Separator
import soundfile as sf
import base64

# Spleeter separation
def spleeter_separate(audio):
    separator = Separator('spleeter:5stems')
    prediction = separator.separate(audio)
    return prediction['vocals'], prediction['accompaniment'], prediction['bass'], prediction['drums'], prediction['other']

# Function to control the volume of each stem
def adjust_volume(stems, volumes):
    adjusted_stems = []
    for stem, volume in zip(stems, volumes):
        adjusted_stem = stem * volume
        adjusted_stems.append(adjusted_stem)
    return adjusted_stems

# Function to handle the separation and volume adjustment
def process_audio(audio, volumes):
    stems = spleeter_separate(audio)
    adjusted_stems = adjust_volume(stems, volumes)
    reconstructed_audio = sum(adjusted_stems)
    return reconstructed_audio.astype(np.float32)

# Gradio interface
def separate_audio(audio, vocals, accompaniment, bass, drums, other):
    audio = np.frombuffer(base64.b64decode(audio), dtype=np.float32)
    volumes = [vocals, accompaniment, bass, drums, other]
    reconstructed_audio = process_audio(audio, volumes)
    return base64.b64encode(reconstructed_audio.tobytes()).decode()

iface = gr.Interface(
    fn=separate_audio,
    inputs=[
        gr.inputs.Audio(label="Audio file"),
        gr.inputs.Slider(0.0, 1.0, step=0.1, label="Vocals"),
        gr.inputs.Slider(0.0, 1.0, step=0.1, label="Accompaniment"),
        gr.inputs.Slider(0.0, 1.0, step=0.1, label="Bass"),
        gr.inputs.Slider(0.0, 1.0, step=0.1, label="Drums"),
        gr.inputs.Slider(0.0, 1.0, step=0.1, label="Other")
    ],
    outputs=gr.outputs.Audio(label="Separated Audio", type="numpy"),
    title="Song Stem Separation",
    description="Isolate vocals, accompaniment, bass, and drums of any song using the Spleeter model."
)

iface.launch()