Spaces:
Build error
Build error
File size: 5,782 Bytes
89f10eb | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 | import gradio as gr
import librosa
import soundfile as sf
import os
import tempfile
import shutil
import torch
from demucs.pretrained import get_model as get_demucs_model
from demucs.apply import apply_model
from spleeter.separator import Separator
from matchering import match
from so_vits_svc_fork.inference.core import Svc
import whisper
import madmom
# --- 1. Audio Separation (Demucs/Spleeter) ---
def separate_audio(audio):
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
tmp.write(audio.read())
tmp_path = tmp.name
# Demucs
model = get_demucs_model('htdemucs')
wav, sr = librosa.load(tmp_path, sr=44100, mono=False)
sources = apply_model(model, torch.tensor(wav).unsqueeze(0), device='cpu', split=True)
out_dir = tempfile.mkdtemp()
stems = {}
for i, name in enumerate(model.sources):
out_path = os.path.join(out_dir, f"{name}.wav")
sf.write(out_path, sources[0, i].cpu().numpy().T, sr)
stems[name] = out_path
return stems
# --- 2. Pattern Extraction & Genre Detection ---
def extract_pattern(audio):
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
tmp.write(audio.read())
tmp_path = tmp.name
y, sr = librosa.load(tmp_path, sr=None)
tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
onset_env = librosa.onset.onset_strength(y=y, sr=sr)
onsets = librosa.onset.onset_detect(onset_envelope=onset_env, sr=sr)
# Genre detection (replace with ML model if needed)
genre = "dj bantengan" if tempo > 120 else "pop"
return {
"tempo": float(tempo),
"beats": beats.tolist(),
"onsets": onsets.tolist(),
"genre": genre
}
# --- 3. Genre-Aware Pattern Generator (Magenta/MusicGen style transfer) ---
def generate_pattern(reference_audio, creativity=0.2):
# TODO: Integrate with MusicGen/Magenta for real pattern generation
# For now, return extracted pattern as placeholder
return extract_pattern(reference_audio)
# --- 4. Mixing/Mastering (Matchering) ---
def mix_and_master(input_audio, reference_audio):
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_in, \
tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_ref:
tmp_in.write(input_audio.read())
tmp_ref.write(reference_audio.read())
in_path = tmp_in.name
ref_path = tmp_ref.name
out_path = in_path.replace(".wav", "_mastered.wav")
match(in_path, ref_path, out_path)
return out_path
# --- 5. Vocal Processing (so-vits-svc, Spleeter) ---
def change_vocal(audio, model_path):
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
tmp.write(audio.read())
tmp_path = tmp.name
svc = Svc(model_path)
out_wav_path = svc.infer(tmp_path)
return out_wav_path
# --- 6. Denoising (RNNoise, Demucs) ---
def denoise_audio(audio):
# TODO: Integrate with RNNoise or Demucs for real denoising
# For now, just return input
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
tmp.write(audio.read())
tmp_path = tmp.name
return tmp_path
# --- 7. Multi-vocal Lyric Detection (Whisper) ---
def detect_lyrics(audio):
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
tmp.write(audio.read())
tmp_path = tmp.name
model = whisper.load_model("base")
result = model.transcribe(tmp_path)
# For multi-vocal, you can use Spleeter/Demucs to split vocals, then transcribe each
return {"lyrics": result["text"]}
# --- Gradio UI ---
with gr.Blocks() as demo:
gr.Markdown("# DAW AI Ultra-Premium Pipeline (All-in-One, Real Pipeline)")
with gr.Tab("Separate Audio"):
audio_in = gr.Audio(type="file", label="Input Audio")
out = gr.JSON(label="Separated Stems (vocals, drums, bass, other)")
btn = gr.Button("Separate")
btn.click(separate_audio, inputs=audio_in, outputs=out)
with gr.Tab("Extract Pattern"):
audio_in2 = gr.Audio(type="file", label="Input Audio")
out2 = gr.JSON(label="Pattern Info")
btn2 = gr.Button("Extract")
btn2.click(extract_pattern, inputs=audio_in2, outputs=out2)
with gr.Tab("Generate Pattern"):
ref_audio = gr.Audio(type="file", label="Reference Audio")
creativity = gr.Slider(0, 1, value=0.2, label="Creativity")
out3 = gr.JSON(label="Generated Pattern")
btn3 = gr.Button("Generate")
btn3.click(generate_pattern, inputs=[ref_audio, creativity], outputs=out3)
with gr.Tab("Mix/Master"):
audio_in3 = gr.Audio(type="file", label="Input Audio")
ref_audio2 = gr.Audio(type="file", label="Reference Audio")
out4 = gr.Audio(label="Mastered Output")
btn4 = gr.Button("Master")
btn4.click(mix_and_master, inputs=[audio_in3, ref_audio2], outputs=out4)
with gr.Tab("Vocal Change"):
audio_in4 = gr.Audio(type="file", label="Input Vocal Audio")
model_path = gr.Textbox(label="Voice Model Path")
out5 = gr.Audio(label="Changed Vocal Output")
btn5 = gr.Button("Change Vocal")
btn5.click(change_vocal, inputs=[audio_in4, model_path], outputs=out5)
with gr.Tab("Denoise"):
audio_in5 = gr.Audio(type="file", label="Input Audio")
out6 = gr.Audio(label="Denoised Output")
btn6 = gr.Button("Denoise")
btn6.click(denoise_audio, inputs=audio_in5, outputs=out6)
with gr.Tab("Detect Lyrics (Multi-Vocal)"):
audio_in6 = gr.Audio(type="file", label="Input Audio")
out7 = gr.JSON(label="Detected Lyrics per Vocal")
btn7 = gr.Button("Detect Lyrics")
btn7.click(detect_lyrics, inputs=audio_in6, outputs=out7)
demo.launch() |