Spaces:

Arif571
/

AudioMobilePro

Build error

App Files Files Community

AudioMobilePro / app.py

Arif571

Create app.py

89f10eb verified 9 months ago

raw

history blame contribute delete

5.78 kB

	import gradio as gr
	import librosa
	import soundfile as sf
	import os
	import tempfile
	import shutil
	import torch

	from demucs.pretrained import get_model as get_demucs_model
	from demucs.apply import apply_model
	from spleeter.separator import Separator
	from matchering import match
	from so_vits_svc_fork.inference.core import Svc
	import whisper
	import madmom

	# --- 1. Audio Separation (Demucs/Spleeter) ---
	def separate_audio(audio):
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
	tmp.write(audio.read())
	tmp_path = tmp.name
	# Demucs
	model = get_demucs_model('htdemucs')
	wav, sr = librosa.load(tmp_path, sr=44100, mono=False)
	sources = apply_model(model, torch.tensor(wav).unsqueeze(0), device='cpu', split=True)
	out_dir = tempfile.mkdtemp()
	stems = {}
	for i, name in enumerate(model.sources):
	out_path = os.path.join(out_dir, f"{name}.wav")
	sf.write(out_path, sources[0, i].cpu().numpy().T, sr)
	stems[name] = out_path
	return stems

	# --- 2. Pattern Extraction & Genre Detection ---
	def extract_pattern(audio):
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
	tmp.write(audio.read())
	tmp_path = tmp.name
	y, sr = librosa.load(tmp_path, sr=None)
	tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
	onset_env = librosa.onset.onset_strength(y=y, sr=sr)
	onsets = librosa.onset.onset_detect(onset_envelope=onset_env, sr=sr)
	# Genre detection (replace with ML model if needed)
	genre = "dj bantengan" if tempo > 120 else "pop"
	return {
	"tempo": float(tempo),
	"beats": beats.tolist(),
	"onsets": onsets.tolist(),
	"genre": genre
	}

	# --- 3. Genre-Aware Pattern Generator (Magenta/MusicGen style transfer) ---
	def generate_pattern(reference_audio, creativity=0.2):
	# TODO: Integrate with MusicGen/Magenta for real pattern generation
	# For now, return extracted pattern as placeholder
	return extract_pattern(reference_audio)

	# --- 4. Mixing/Mastering (Matchering) ---
	def mix_and_master(input_audio, reference_audio):
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_in, \
	tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_ref:
	tmp_in.write(input_audio.read())
	tmp_ref.write(reference_audio.read())
	in_path = tmp_in.name
	ref_path = tmp_ref.name
	out_path = in_path.replace(".wav", "_mastered.wav")
	match(in_path, ref_path, out_path)
	return out_path

	# --- 5. Vocal Processing (so-vits-svc, Spleeter) ---
	def change_vocal(audio, model_path):
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
	tmp.write(audio.read())
	tmp_path = tmp.name
	svc = Svc(model_path)
	out_wav_path = svc.infer(tmp_path)
	return out_wav_path

	# --- 6. Denoising (RNNoise, Demucs) ---
	def denoise_audio(audio):
	# TODO: Integrate with RNNoise or Demucs for real denoising
	# For now, just return input
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
	tmp.write(audio.read())
	tmp_path = tmp.name
	return tmp_path

	# --- 7. Multi-vocal Lyric Detection (Whisper) ---
	def detect_lyrics(audio):
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
	tmp.write(audio.read())
	tmp_path = tmp.name
	model = whisper.load_model("base")
	result = model.transcribe(tmp_path)
	# For multi-vocal, you can use Spleeter/Demucs to split vocals, then transcribe each
	return {"lyrics": result["text"]}

	# --- Gradio UI ---
	with gr.Blocks() as demo:
	gr.Markdown("# DAW AI Ultra-Premium Pipeline (All-in-One, Real Pipeline)")

	with gr.Tab("Separate Audio"):
	audio_in = gr.Audio(type="file", label="Input Audio")
	out = gr.JSON(label="Separated Stems (vocals, drums, bass, other)")
	btn = gr.Button("Separate")
	btn.click(separate_audio, inputs=audio_in, outputs=out)

	with gr.Tab("Extract Pattern"):
	audio_in2 = gr.Audio(type="file", label="Input Audio")
	out2 = gr.JSON(label="Pattern Info")
	btn2 = gr.Button("Extract")
	btn2.click(extract_pattern, inputs=audio_in2, outputs=out2)

	with gr.Tab("Generate Pattern"):
	ref_audio = gr.Audio(type="file", label="Reference Audio")
	creativity = gr.Slider(0, 1, value=0.2, label="Creativity")
	out3 = gr.JSON(label="Generated Pattern")
	btn3 = gr.Button("Generate")
	btn3.click(generate_pattern, inputs=[ref_audio, creativity], outputs=out3)

	with gr.Tab("Mix/Master"):
	audio_in3 = gr.Audio(type="file", label="Input Audio")
	ref_audio2 = gr.Audio(type="file", label="Reference Audio")
	out4 = gr.Audio(label="Mastered Output")
	btn4 = gr.Button("Master")
	btn4.click(mix_and_master, inputs=[audio_in3, ref_audio2], outputs=out4)

	with gr.Tab("Vocal Change"):
	audio_in4 = gr.Audio(type="file", label="Input Vocal Audio")
	model_path = gr.Textbox(label="Voice Model Path")
	out5 = gr.Audio(label="Changed Vocal Output")
	btn5 = gr.Button("Change Vocal")
	btn5.click(change_vocal, inputs=[audio_in4, model_path], outputs=out5)

	with gr.Tab("Denoise"):
	audio_in5 = gr.Audio(type="file", label="Input Audio")
	out6 = gr.Audio(label="Denoised Output")
	btn6 = gr.Button("Denoise")
	btn6.click(denoise_audio, inputs=audio_in5, outputs=out6)

	with gr.Tab("Detect Lyrics (Multi-Vocal)"):
	audio_in6 = gr.Audio(type="file", label="Input Audio")
	out7 = gr.JSON(label="Detected Lyrics per Vocal")
	btn7 = gr.Button("Detect Lyrics")
	btn7.click(detect_lyrics, inputs=audio_in6, outputs=out7)

	demo.launch()