SaltProphet's picture
Update app.py
b1477ac verified
raw
history blame
12.4 kB
import gradio as gr
import librosa
import numpy as np
import os
import shutil
import zipfile
import tempfile
import soundfile as sf
import traceback
import subprocess # Necessary for running Spleeter
from typing import Tuple, List
# --- Configuration ---
OUTPUT_FOLDER_NAME = "PRO_LOOP_PACK"
# Mapping of model selection to Spleeter config and resulting stem types
STEM_MODELS = {
'2-Stems (Vocals/Inst)': {
'spleeter_config': '2stems',
'stems': ['vocals', 'accompaniment'], # Spleeter output names
'display_stems': ['Vocals', 'Instrumental'] # User-facing names
},
'4-Stems (Drums, Bass, Vocals, Other)': {
'spleeter_config': '4stems',
'stems': ['vocals', 'drums', 'bass', 'other'],
'display_stems': ['Vocals', 'Drums', 'Bass', 'Other']
},
'5-Stems (Drums, Bass, Vocals, Piano, Other)': {
'spleeter_config': '5stems',
'stems': ['vocals', 'drums', 'bass', 'piano', 'other'],
'display_stems': ['Vocals', 'Drums', 'Bass', 'Piano', 'Other']
},
}
LOOP_BAR_LENGTHS = [4, 6, 8]
# Key Detection Templates (as defined previously)
KEY_TEMPLATES = {
'major': [6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.16, 3.61, 3.28, 2.91],
'minor': [6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.91, 3.03, 3.34]
}
NOTES = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
# --- Utility Functions ---
def save_segment(filepath: str, audio_data: np.ndarray, sr: int):
"""Utility function to save a NumPy audio array as a WAV file."""
# Spleeter outputs 44100Hz audio, so we explicitly set the sample rate
sf.write(filepath, audio_data, sr, format='WAV', subtype='PCM_16')
def detect_key_and_mode(y: np.ndarray, sr: int) -> str:
"""Estimates the musical key (e.g., 'C Major' or 'A Minor')."""
try:
chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
chroma_mean = np.mean(chroma, axis=1)
chroma_mean /= chroma_mean.sum()
best_key = "Unknown"
max_correlation = -1.0
for i, note in enumerate(NOTES):
# Check major keys
major_template = np.roll(KEY_TEMPLATES['major'], i)
corr_major = np.dot(chroma_mean, major_template)
if corr_major > max_correlation:
max_correlation = corr_major
best_key = f"{note} Major"
# Check minor keys
minor_template = np.roll(KEY_TEMPLATES['minor'], i)
corr_minor = np.dot(chroma_mean, minor_template)
if corr_minor > max_correlation:
max_correlation = corr_minor
best_key = f"{note} Minor"
if max_correlation < 0.2:
return "KeyDetectionAmbiguous"
return best_key.replace(' ', '')
except Exception as e:
print(f"Key Detection Failed: {e}")
return "KeyDetectionFailed"
def separate_stems(audio_path: str, model_name: str, output_dir: str) -> str:
"""
Executes Spleeter separation via subprocess.
Requires 'spleeter' package to be installed in the environment.
"""
spleeter_config = STEM_MODELS[model_name]['spleeter_config']
# Spleeter output folder will be a subfolder named after the input file (without extension)
# We clean this up later.
# Spleeter command: spleeter separate -o {output_dir} -p {config} {input_file}
command = [
"spleeter", "separate",
"-o", output_dir,
"-p", f"spleeter:{spleeter_config}",
audio_path
]
try:
# Run Spleeter command
result = subprocess.run(command, check=True, capture_output=True, text=True)
print("Spleeter Output:", result.stdout)
print("Spleeter Errors:", result.stderr)
# Spleeter creates a sub-directory based on the input filename.
# We need to find that subdirectory.
base_filename = os.path.splitext(os.path.basename(audio_path))[0]
spleeter_output_path = os.path.join(output_dir, base_filename)
if not os.path.isdir(spleeter_output_path):
raise FileNotFoundError(f"Spleeter output directory not found at: {spleeter_output_path}")
return spleeter_output_path
except subprocess.CalledProcessError as e:
raise RuntimeError(f"Spleeter command failed. Check if 'spleeter' is installed. Output: {e.stdout}, Error: {e.stderr}")
except Exception as e:
raise RuntimeError(f"Error during Spleeter execution: {e}")
# --- Main Processing Function ---
def create_market_ready_pack(
audio_file_path: str,
one_shot_sensitivity: float,
stem_model_selection: str,
progress=gr.Progress()
) -> Tuple[str | None, str]:
"""
Processes the input audio file, generates loops and one-shots,
and packages them into a market-ready ZIP file.
"""
temp_dir = None
if not audio_file_path:
return None, "Error: Please upload an audio file before proceeding."
try:
# 1. Setup Temporary Directories
temp_dir = tempfile.mkdtemp()
output_root = os.path.join(temp_dir, OUTPUT_FOLDER_NAME)
os.makedirs(output_root, exist_ok=True)
progress(0.05, desc="Loading and Verifying Audio...")
# Robust Audio Loading (Load full mix for analysis)
y_full, sr = librosa.load(audio_file_path, sr=None, mono=True)
if y_full.size == 0:
raise ValueError("Loaded audio is empty.")
# 2. Advanced Audio Analysis (Tempo and Key)
progress(0.15, desc="Analyzing Tempo and Musical Key...")
tempo = 120.0
start_sample = 0
key_mode_name = "120BPM_UnknownKey"
try:
tempo, beat_frames = librosa.beat.beat_track(y=y_full, sr=sr, trim=True)
key_mode_name = detect_key_and_mode(y_full, sr)
samples_per_beat = int((60 / tempo) * sr)
start_sample = librosa.frames_to_samples(beat_frames[0]) if beat_frames.size > 0 else 0
gr.Info(f"Analysis Complete: {int(tempo)} BPM, {key_mode_name}.")
key_mode_name = f"{int(tempo)}BPM_{key_mode_name}"
except Exception as e:
gr.Warning(f"Warning: Tempo or Key detection failed ({e}). Using default 120 BPM and 'Unknown Key'.")
samples_per_beat = int((60 / 120.0) * sr) # Fallback beat timing
# 3. REAL STEM SEPARATION using Spleeter
progress(0.25, desc=f"Separating Stems using {stem_model_selection} model...")
spleeter_output_path = separate_stems(audio_file_path, stem_model_selection, output_root)
spleeter_stems = STEM_MODELS[stem_model_selection]['stems']
display_stems = STEM_MODELS[stem_model_selection]['display_stems']
# Dictionary to hold the audio data for each stem from Spleeter's output
stem_audio_data = {}
for spleeter_name, display_name in zip(spleeter_stems, display_stems):
stem_filepath = os.path.join(spleeter_output_path, f"{spleeter_name}.wav")
if not os.path.exists(stem_filepath):
gr.Warning(f"Stem file not found for {display_name}. Skipping this stem.")
continue
# Load the separated stem audio (it will be aligned and resampled by Spleeter)
# We enforce mono loading for consistent processing later
y_stem, sr_stem = librosa.load(stem_filepath, sr=sr, mono=True)
# Align the start of the stem using the previously detected global beat
y_stem_aligned = y_stem[start_sample:]
stem_audio_data[display_name] = y_stem_aligned
# Clean up Spleeter's intermediate directory
shutil.rmtree(spleeter_output_path)
if not stem_audio_data:
raise RuntimeError("No separated stems were successfully processed. Check Spleeter output.")
# 4. Generate Loops (4, 6, 8 Bars)
progress(0.45, desc="Generating Time-Aligned Loops...")
for stem_name, y_stem in stem_audio_data.items():
loops_dir = os.path.join(output_root, 'LOOPS', stem_name)
os.makedirs(loops_dir, exist_ok=True)
samples_per_bar = samples_per_beat * 4 # Assuming 4/4 time signature
for num_bars in LOOP_BAR_LENGTHS:
samples_per_loop = samples_per_bar * num_bars
for i in range(0, len(y_stem) - samples_per_loop + 1, samples_per_loop):
try:
loop_segment = y_stem[i:i + samples_per_loop]
if len(loop_segment) < samples_per_loop * 0.9:
continue
index = i // samples_per_loop + 1
# Naming convention: {BPM_Key}_{Stem}_{Bars}Bar_{Index}.wav
filename = f"{key_mode_name}_{stem_name}_{num_bars}Bar_{index:02d}.wav"
save_segment(os.path.join(loops_dir, filename), loop_segment, sr)
except Exception as e:
gr.Warning(f"Error slicing {num_bars}-bar loop for {stem_name}: {e}")
continue
# 5. Generate One-Shots (Transient Detection)
progress(0.70, desc="Generating One-Shots (Transient Detection)...")
# Sensitivity mapping: 1=Few/Loud (large pre_max), 10=Many/Quiet (small pre_max)
pre_max_frames = int(12 - one_shot_sensitivity)
if pre_max_frames < 2: pre_max_frames = 2
pre_slice_samples = int(sr * 0.05)
post_slice_samples = int(sr * 0.25)
for stem_name, y_stem in stem_audio_data.items():
shots_dir = os.path.join(output_root, 'ONESHOTS', stem_name)
os.makedirs(shots_dir, exist_ok=True)
try:
o_env = librosa.onset.onset_strength(y=y_stem, sr=sr, aggregate=np.median)
onset_frames = librosa.onset.onset_detect(
onset_envelope=o_env,
sr=sr,
units='frames',
pre_max=pre_max_frames,
post_max=pre_max_frames // 2,
wait=10
)
onset_samples = librosa.frames_to_samples(onset_frames)
for i, sample_index in enumerate(onset_samples):
start = max(0, sample_index - pre_slice_samples)
end = min(len(y_stem), sample_index + post_slice_samples)
shot_segment = y_stem[start:end]
if len(shot_segment) > int(sr * 0.05):
filename = f"{key_mode_name}_{stem_name}_OneShot_{i+1:03d}.wav"
save_segment(os.path.join(shots_dir, filename), shot_segment, sr)
except Exception as e:
gr.Warning(f"Error during One-Shot detection for {stem_name}. Skipping. Details: {e}")
continue
# 6. Packaging (License and ZIP)
progress(0.90, desc="Creating License and Packaging Files...")
# Create the License.txt file
license_content = f"""
-- PROFESSIONAL LOOP PACK LICENSE AGREEMENT --
Product: {OUTPUT_FOLDER_NAME}
BPM/Key Reference: {key_mode_name}
Separation Model Used: {stem_model_selection}
1. Royalty-Free Use: All sounds, loops, and one-shots within this pack are
100% royalty-free for commercial use in musical compositions, sound design,
and public performances. You may use them in your own tracks and sell those
tracks without owing any additional royalties to the creator.
2. Restrictions: Redistribution, repackaging, or re-selling of the individual
sounds or loops as part of another sound library or sample pack is strictly
prohibited.
3. Generated: {os.uname().nodename}
"""
license_filepath = os.path.join(output_root, 'License.txt')
with open(license_filepath, 'w') as f:
f.write(license_content.strip())
# Create the final ZIP file