Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import librosa | |
| import numpy as np | |
| import os | |
| import shutil | |
| import zipfile | |
| import tempfile | |
| import soundfile as sf | |
| import traceback | |
| import subprocess # Necessary for running Spleeter | |
| from typing import Tuple, List | |
| # --- Configuration --- | |
| OUTPUT_FOLDER_NAME = "PRO_LOOP_PACK" | |
| # Mapping of model selection to Spleeter config and resulting stem types | |
| STEM_MODELS = { | |
| '2-Stems (Vocals/Inst)': { | |
| 'spleeter_config': '2stems', | |
| 'stems': ['vocals', 'accompaniment'], # Spleeter output names | |
| 'display_stems': ['Vocals', 'Instrumental'] # User-facing names | |
| }, | |
| '4-Stems (Drums, Bass, Vocals, Other)': { | |
| 'spleeter_config': '4stems', | |
| 'stems': ['vocals', 'drums', 'bass', 'other'], | |
| 'display_stems': ['Vocals', 'Drums', 'Bass', 'Other'] | |
| }, | |
| '5-Stems (Drums, Bass, Vocals, Piano, Other)': { | |
| 'spleeter_config': '5stems', | |
| 'stems': ['vocals', 'drums', 'bass', 'piano', 'other'], | |
| 'display_stems': ['Vocals', 'Drums', 'Bass', 'Piano', 'Other'] | |
| }, | |
| } | |
| LOOP_BAR_LENGTHS = [4, 6, 8] | |
| # Key Detection Templates (as defined previously) | |
| KEY_TEMPLATES = { | |
| 'major': [6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.16, 3.61, 3.28, 2.91], | |
| 'minor': [6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.91, 3.03, 3.34] | |
| } | |
| NOTES = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B'] | |
| # --- Utility Functions --- | |
| def save_segment(filepath: str, audio_data: np.ndarray, sr: int): | |
| """Utility function to save a NumPy audio array as a WAV file.""" | |
| # Spleeter outputs 44100Hz audio, so we explicitly set the sample rate | |
| sf.write(filepath, audio_data, sr, format='WAV', subtype='PCM_16') | |
| def detect_key_and_mode(y: np.ndarray, sr: int) -> str: | |
| """Estimates the musical key (e.g., 'C Major' or 'A Minor').""" | |
| try: | |
| chroma = librosa.feature.chroma_cqt(y=y, sr=sr) | |
| chroma_mean = np.mean(chroma, axis=1) | |
| chroma_mean /= chroma_mean.sum() | |
| best_key = "Unknown" | |
| max_correlation = -1.0 | |
| for i, note in enumerate(NOTES): | |
| # Check major keys | |
| major_template = np.roll(KEY_TEMPLATES['major'], i) | |
| corr_major = np.dot(chroma_mean, major_template) | |
| if corr_major > max_correlation: | |
| max_correlation = corr_major | |
| best_key = f"{note} Major" | |
| # Check minor keys | |
| minor_template = np.roll(KEY_TEMPLATES['minor'], i) | |
| corr_minor = np.dot(chroma_mean, minor_template) | |
| if corr_minor > max_correlation: | |
| max_correlation = corr_minor | |
| best_key = f"{note} Minor" | |
| if max_correlation < 0.2: | |
| return "KeyDetectionAmbiguous" | |
| return best_key.replace(' ', '') | |
| except Exception as e: | |
| print(f"Key Detection Failed: {e}") | |
| return "KeyDetectionFailed" | |
| def separate_stems(audio_path: str, model_name: str, output_dir: str) -> str: | |
| """ | |
| Executes Spleeter separation via subprocess. | |
| Requires 'spleeter' package to be installed in the environment. | |
| """ | |
| spleeter_config = STEM_MODELS[model_name]['spleeter_config'] | |
| # Spleeter output folder will be a subfolder named after the input file (without extension) | |
| # We clean this up later. | |
| # Spleeter command: spleeter separate -o {output_dir} -p {config} {input_file} | |
| command = [ | |
| "spleeter", "separate", | |
| "-o", output_dir, | |
| "-p", f"spleeter:{spleeter_config}", | |
| audio_path | |
| ] | |
| try: | |
| # Run Spleeter command | |
| result = subprocess.run(command, check=True, capture_output=True, text=True) | |
| print("Spleeter Output:", result.stdout) | |
| print("Spleeter Errors:", result.stderr) | |
| # Spleeter creates a sub-directory based on the input filename. | |
| # We need to find that subdirectory. | |
| base_filename = os.path.splitext(os.path.basename(audio_path))[0] | |
| spleeter_output_path = os.path.join(output_dir, base_filename) | |
| if not os.path.isdir(spleeter_output_path): | |
| raise FileNotFoundError(f"Spleeter output directory not found at: {spleeter_output_path}") | |
| return spleeter_output_path | |
| except subprocess.CalledProcessError as e: | |
| raise RuntimeError(f"Spleeter command failed. Check if 'spleeter' is installed. Output: {e.stdout}, Error: {e.stderr}") | |
| except Exception as e: | |
| raise RuntimeError(f"Error during Spleeter execution: {e}") | |
| # --- Main Processing Function --- | |
| def create_market_ready_pack( | |
| audio_file_path: str, | |
| one_shot_sensitivity: float, | |
| stem_model_selection: str, | |
| progress=gr.Progress() | |
| ) -> Tuple[str | None, str]: | |
| """ | |
| Processes the input audio file, generates loops and one-shots, | |
| and packages them into a market-ready ZIP file. | |
| """ | |
| temp_dir = None | |
| if not audio_file_path: | |
| return None, "Error: Please upload an audio file before proceeding." | |
| try: | |
| # 1. Setup Temporary Directories | |
| temp_dir = tempfile.mkdtemp() | |
| output_root = os.path.join(temp_dir, OUTPUT_FOLDER_NAME) | |
| os.makedirs(output_root, exist_ok=True) | |
| progress(0.05, desc="Loading and Verifying Audio...") | |
| # Robust Audio Loading (Load full mix for analysis) | |
| y_full, sr = librosa.load(audio_file_path, sr=None, mono=True) | |
| if y_full.size == 0: | |
| raise ValueError("Loaded audio is empty.") | |
| # 2. Advanced Audio Analysis (Tempo and Key) | |
| progress(0.15, desc="Analyzing Tempo and Musical Key...") | |
| tempo = 120.0 | |
| start_sample = 0 | |
| key_mode_name = "120BPM_UnknownKey" | |
| try: | |
| tempo, beat_frames = librosa.beat.beat_track(y=y_full, sr=sr, trim=True) | |
| key_mode_name = detect_key_and_mode(y_full, sr) | |
| samples_per_beat = int((60 / tempo) * sr) | |
| start_sample = librosa.frames_to_samples(beat_frames[0]) if beat_frames.size > 0 else 0 | |
| gr.Info(f"Analysis Complete: {int(tempo)} BPM, {key_mode_name}.") | |
| key_mode_name = f"{int(tempo)}BPM_{key_mode_name}" | |
| except Exception as e: | |
| gr.Warning(f"Warning: Tempo or Key detection failed ({e}). Using default 120 BPM and 'Unknown Key'.") | |
| samples_per_beat = int((60 / 120.0) * sr) # Fallback beat timing | |
| # 3. REAL STEM SEPARATION using Spleeter | |
| progress(0.25, desc=f"Separating Stems using {stem_model_selection} model...") | |
| spleeter_output_path = separate_stems(audio_file_path, stem_model_selection, output_root) | |
| spleeter_stems = STEM_MODELS[stem_model_selection]['stems'] | |
| display_stems = STEM_MODELS[stem_model_selection]['display_stems'] | |
| # Dictionary to hold the audio data for each stem from Spleeter's output | |
| stem_audio_data = {} | |
| for spleeter_name, display_name in zip(spleeter_stems, display_stems): | |
| stem_filepath = os.path.join(spleeter_output_path, f"{spleeter_name}.wav") | |
| if not os.path.exists(stem_filepath): | |
| gr.Warning(f"Stem file not found for {display_name}. Skipping this stem.") | |
| continue | |
| # Load the separated stem audio (it will be aligned and resampled by Spleeter) | |
| # We enforce mono loading for consistent processing later | |
| y_stem, sr_stem = librosa.load(stem_filepath, sr=sr, mono=True) | |
| # Align the start of the stem using the previously detected global beat | |
| y_stem_aligned = y_stem[start_sample:] | |
| stem_audio_data[display_name] = y_stem_aligned | |
| # Clean up Spleeter's intermediate directory | |
| shutil.rmtree(spleeter_output_path) | |
| if not stem_audio_data: | |
| raise RuntimeError("No separated stems were successfully processed. Check Spleeter output.") | |
| # 4. Generate Loops (4, 6, 8 Bars) | |
| progress(0.45, desc="Generating Time-Aligned Loops...") | |
| for stem_name, y_stem in stem_audio_data.items(): | |
| loops_dir = os.path.join(output_root, 'LOOPS', stem_name) | |
| os.makedirs(loops_dir, exist_ok=True) | |
| samples_per_bar = samples_per_beat * 4 # Assuming 4/4 time signature | |
| for num_bars in LOOP_BAR_LENGTHS: | |
| samples_per_loop = samples_per_bar * num_bars | |
| for i in range(0, len(y_stem) - samples_per_loop + 1, samples_per_loop): | |
| try: | |
| loop_segment = y_stem[i:i + samples_per_loop] | |
| if len(loop_segment) < samples_per_loop * 0.9: | |
| continue | |
| index = i // samples_per_loop + 1 | |
| # Naming convention: {BPM_Key}_{Stem}_{Bars}Bar_{Index}.wav | |
| filename = f"{key_mode_name}_{stem_name}_{num_bars}Bar_{index:02d}.wav" | |
| save_segment(os.path.join(loops_dir, filename), loop_segment, sr) | |
| except Exception as e: | |
| gr.Warning(f"Error slicing {num_bars}-bar loop for {stem_name}: {e}") | |
| continue | |
| # 5. Generate One-Shots (Transient Detection) | |
| progress(0.70, desc="Generating One-Shots (Transient Detection)...") | |
| # Sensitivity mapping: 1=Few/Loud (large pre_max), 10=Many/Quiet (small pre_max) | |
| pre_max_frames = int(12 - one_shot_sensitivity) | |
| if pre_max_frames < 2: pre_max_frames = 2 | |
| pre_slice_samples = int(sr * 0.05) | |
| post_slice_samples = int(sr * 0.25) | |
| for stem_name, y_stem in stem_audio_data.items(): | |
| shots_dir = os.path.join(output_root, 'ONESHOTS', stem_name) | |
| os.makedirs(shots_dir, exist_ok=True) | |
| try: | |
| o_env = librosa.onset.onset_strength(y=y_stem, sr=sr, aggregate=np.median) | |
| onset_frames = librosa.onset.onset_detect( | |
| onset_envelope=o_env, | |
| sr=sr, | |
| units='frames', | |
| pre_max=pre_max_frames, | |
| post_max=pre_max_frames // 2, | |
| wait=10 | |
| ) | |
| onset_samples = librosa.frames_to_samples(onset_frames) | |
| for i, sample_index in enumerate(onset_samples): | |
| start = max(0, sample_index - pre_slice_samples) | |
| end = min(len(y_stem), sample_index + post_slice_samples) | |
| shot_segment = y_stem[start:end] | |
| if len(shot_segment) > int(sr * 0.05): | |
| filename = f"{key_mode_name}_{stem_name}_OneShot_{i+1:03d}.wav" | |
| save_segment(os.path.join(shots_dir, filename), shot_segment, sr) | |
| except Exception as e: | |
| gr.Warning(f"Error during One-Shot detection for {stem_name}. Skipping. Details: {e}") | |
| continue | |
| # 6. Packaging (License and ZIP) | |
| progress(0.90, desc="Creating License and Packaging Files...") | |
| # Create the License.txt file | |
| license_content = f""" | |
| -- PROFESSIONAL LOOP PACK LICENSE AGREEMENT -- | |
| Product: {OUTPUT_FOLDER_NAME} | |
| BPM/Key Reference: {key_mode_name} | |
| Separation Model Used: {stem_model_selection} | |
| 1. Royalty-Free Use: All sounds, loops, and one-shots within this pack are | |
| 100% royalty-free for commercial use in musical compositions, sound design, | |
| and public performances. You may use them in your own tracks and sell those | |
| tracks without owing any additional royalties to the creator. | |
| 2. Restrictions: Redistribution, repackaging, or re-selling of the individual | |
| sounds or loops as part of another sound library or sample pack is strictly | |
| prohibited. | |
| 3. Generated: {os.uname().nodename} | |
| """ | |
| license_filepath = os.path.join(output_root, 'License.txt') | |
| with open(license_filepath, 'w') as f: | |
| f.write(license_content.strip()) | |
| # Create the final ZIP file |