Alpha123B's picture
πŸŽ›οΈ Add SoundForge Studio - complete audio processing app
f7b34c3 verified
Raw
History Blame Contribute Delete
77.1 kB
"""
πŸŽ›οΈ SoundForge Studio β€” Professional Audio Remixing & Copyright-Free Processing
A comprehensive web application for making songs copyright-free, creating remixes, and mashups.
"""
import gradio as gr
import numpy as np
import os
import tempfile
import json
import time
import warnings
import shutil
from pathlib import Path
warnings.filterwarnings("ignore")
# ─── Audio Processing Imports ─────────────────────────────────────────────────
import librosa
import soundfile as sf
from scipy import signal
from scipy.io import wavfile
try:
from pedalboard import (
Pedalboard, Reverb, Chorus, Compressor, Delay,
HighpassFilter, LowShelfFilter, HighShelfFilter,
PeakFilter, Phaser, Gain, Limiter, Distortion
)
HAS_PEDALBOARD = True
except ImportError:
HAS_PEDALBOARD = False
print("⚠️ pedalboard not installed β€” using fallback effects")
try:
from pydub import AudioSegment
from pydub.effects import normalize as pydub_normalize
HAS_PYDUB = True
except ImportError:
HAS_PYDUB = False
print("⚠️ pydub not installed β€” limited format support")
# ─── Temp Directory Setup ─────────────────────────────────────────────────────
TEMP_DIR = Path(tempfile.mkdtemp(prefix="soundforge_"))
os.makedirs(TEMP_DIR, exist_ok=True)
# ═══════════════════════════════════════════════════════════════════════════════
# CORE AUDIO ENGINE
# ═══════════════════════════════════════════════════════════════════════════════
class AudioAnalyzer:
"""Analyze audio properties: BPM, key, duration, spectral features."""
KEYS = ['C', 'C#', 'D', 'Eb', 'E', 'F', 'F#', 'G', 'Ab', 'A', 'Bb', 'B']
KEY_PROFILES = {
'major': [6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88],
'minor': [6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17]
}
@staticmethod
def load_audio(filepath, sr=44100):
"""Load audio file and return (y, sr)."""
y, sr = librosa.load(filepath, sr=sr, mono=False)
if y.ndim == 1:
y = np.stack([y, y]) # mono to stereo
return y, sr
@staticmethod
def load_audio_mono(filepath, sr=44100):
"""Load audio file as mono."""
y, sr = librosa.load(filepath, sr=sr, mono=True)
return y, sr
@staticmethod
def detect_bpm(y_mono, sr):
"""Detect BPM using onset envelope and beat tracking."""
try:
tempo, _ = librosa.beat.beat_track(y=y_mono, sr=sr)
if hasattr(tempo, '__len__'):
tempo = float(tempo[0])
return round(float(tempo), 1)
except:
return 120.0
@staticmethod
def detect_key(y_mono, sr):
"""Detect musical key using chroma features and Krumhansl-Schmuckler algorithm."""
try:
chroma = librosa.feature.chroma_cqt(y=y_mono, sr=sr)
chroma_avg = chroma.mean(axis=1)
best_corr = -2
best_key = 'C'
best_mode = 'major'
for i in range(12):
shifted = np.roll(chroma_avg, -i)
for mode in ['major', 'minor']:
profile = np.array(AudioAnalyzer.KEY_PROFILES[mode])
corr = np.corrcoef(shifted, profile)[0, 1]
if corr > best_corr:
best_corr = corr
best_key = AudioAnalyzer.KEYS[i]
best_mode = mode
return f"{best_key} {best_mode}"
except:
return "Unknown"
@staticmethod
def get_spectral_features(y_mono, sr):
"""Get spectral centroid, bandwidth, rolloff."""
try:
centroid = librosa.feature.spectral_centroid(y=y_mono, sr=sr)[0].mean()
bandwidth = librosa.feature.spectral_bandwidth(y=y_mono, sr=sr)[0].mean()
rolloff = librosa.feature.spectral_rolloff(y=y_mono, sr=sr)[0].mean()
return {
'centroid': round(float(centroid), 1),
'bandwidth': round(float(bandwidth), 1),
'rolloff': round(float(rolloff), 1)
}
except:
return {'centroid': 0, 'bandwidth': 0, 'rolloff': 0}
@staticmethod
def get_loudness_profile(y_mono, sr, hop_length=512):
"""Get RMS loudness profile."""
rms = librosa.feature.rms(y=y_mono, hop_length=hop_length)[0]
return rms
class AudioProcessor:
"""Core audio processing operations."""
@staticmethod
def pitch_shift(y, sr, n_steps):
"""Pitch shift audio by n_steps semitones."""
if abs(n_steps) < 0.01:
return y
if y.ndim == 1:
return librosa.effects.pitch_shift(y=y, sr=sr, n_steps=n_steps)
else:
channels = []
for ch in range(y.shape[0]):
shifted = librosa.effects.pitch_shift(y=y[ch], sr=sr, n_steps=n_steps)
channels.append(shifted)
return np.stack(channels)
@staticmethod
def time_stretch(y, sr, rate):
"""Time stretch audio by rate factor."""
if abs(rate - 1.0) < 0.001:
return y
if y.ndim == 1:
return librosa.effects.time_stretch(y=y, rate=rate)
else:
channels = []
for ch in range(y.shape[0]):
stretched = librosa.effects.time_stretch(y=y[ch], rate=rate)
channels.append(stretched)
min_len = min(ch.shape[0] for ch in channels)
channels = [ch[:min_len] for ch in channels]
return np.stack(channels)
@staticmethod
def apply_eq(y, sr, low_gain_db=0, mid_gain_db=0, high_gain_db=0):
"""Apply 3-band EQ."""
if y.ndim > 1:
y_mono = y.mean(axis=0)
else:
y_mono = y
result = np.zeros_like(y_mono)
# Low band (< 300 Hz)
sos_low = signal.butter(4, 300, btype='low', fs=sr, output='sos')
low = signal.sosfilt(sos_low, y_mono)
result += low * (10 ** (low_gain_db / 20))
# Mid band (300 - 4000 Hz)
sos_mid = signal.butter(4, [300, 4000], btype='band', fs=sr, output='sos')
mid = signal.sosfilt(sos_mid, y_mono)
result += mid * (10 ** (mid_gain_db / 20))
# High band (> 4000 Hz)
sos_high = signal.butter(4, 4000, btype='high', fs=sr, output='sos')
high = signal.sosfilt(sos_high, y_mono)
result += high * (10 ** (high_gain_db / 20))
if y.ndim > 1:
return np.stack([result, result])
return result
@staticmethod
def apply_reverb_simple(y, sr, room_size=0.3, damping=0.5, wet_level=0.2):
"""Apply simple reverb using convolution with generated impulse response."""
if y.ndim > 1:
y_proc = y.mean(axis=0)
else:
y_proc = y.copy()
# Generate synthetic impulse response
ir_length = int(sr * room_size * 2)
ir = np.random.randn(ir_length) * np.exp(-np.linspace(0, damping * 10, ir_length))
ir = ir / np.max(np.abs(ir))
# Convolve
wet = signal.fftconvolve(y_proc, ir, mode='full')[:len(y_proc)]
wet = wet / (np.max(np.abs(wet)) + 1e-8)
result = y_proc * (1 - wet_level) + wet * wet_level
if y.ndim > 1:
return np.stack([result, result])
return result
@staticmethod
def apply_chorus_simple(y, sr, rate_hz=1.5, depth=0.003, mix=0.3):
"""Apply chorus effect."""
if y.ndim > 1:
y_proc = y.mean(axis=0)
else:
y_proc = y.copy()
n_samples = len(y_proc)
t = np.arange(n_samples) / sr
delay_samples = (depth * sr * (1 + np.sin(2 * np.pi * rate_hz * t))).astype(int)
chorus = np.zeros_like(y_proc)
for i in range(n_samples):
idx = i - delay_samples[i]
if 0 <= idx < n_samples:
chorus[i] = y_proc[idx]
result = y_proc * (1 - mix) + chorus * mix
if y.ndim > 1:
return np.stack([result, result])
return result
@staticmethod
def apply_effects_chain(y, sr, effects_config):
"""Apply a chain of effects using pedalboard or fallback."""
if HAS_PEDALBOARD:
return AudioProcessor._apply_pedalboard(y, sr, effects_config)
else:
return AudioProcessor._apply_fallback(y, sr, effects_config)
@staticmethod
def _apply_pedalboard(y, sr, config):
"""Apply effects using Spotify's pedalboard library."""
effects = []
if config.get('highpass', False):
effects.append(HighpassFilter(cutoff_frequency_hz=config.get('highpass_freq', 80)))
if config.get('compressor', False):
effects.append(Compressor(
threshold_db=config.get('comp_threshold', -20),
ratio=config.get('comp_ratio', 4)
))
if config.get('eq_low', 0) != 0:
effects.append(LowShelfFilter(
cutoff_frequency_hz=300,
gain_db=config.get('eq_low', 0)
))
if config.get('eq_high', 0) != 0:
effects.append(HighShelfFilter(
cutoff_frequency_hz=4000,
gain_db=config.get('eq_high', 0)
))
if config.get('reverb', False):
effects.append(Reverb(
room_size=config.get('reverb_size', 0.3),
wet_level=config.get('reverb_wet', 0.15),
damping=config.get('reverb_damping', 0.5)
))
if config.get('chorus', False):
effects.append(Chorus(
rate_hz=config.get('chorus_rate', 1.5),
depth=config.get('chorus_depth', 0.25),
mix=config.get('chorus_mix', 0.3)
))
if config.get('delay', False):
effects.append(Delay(
delay_seconds=config.get('delay_time', 0.3),
mix=config.get('delay_mix', 0.2)
))
if config.get('limiter', False):
effects.append(Limiter(threshold_db=config.get('limiter_threshold', -1)))
if not effects:
return y
board = Pedalboard(effects)
if y.ndim == 1:
y_2d = y[np.newaxis, :]
else:
y_2d = y
processed = board(y_2d.astype(np.float32), sr)
if y.ndim == 1:
return processed[0]
return processed
@staticmethod
def _apply_fallback(y, sr, config):
"""Fallback effects without pedalboard."""
result = y.copy()
if config.get('reverb', False):
result = AudioProcessor.apply_reverb_simple(
result, sr,
room_size=config.get('reverb_size', 0.3),
wet_level=config.get('reverb_wet', 0.15)
)
if config.get('chorus', False):
result = AudioProcessor.apply_chorus_simple(
result, sr,
rate_hz=config.get('chorus_rate', 1.5),
mix=config.get('chorus_mix', 0.3)
)
if config.get('eq_low', 0) != 0 or config.get('eq_high', 0) != 0:
result = AudioProcessor.apply_eq(
result, sr,
low_gain_db=config.get('eq_low', 0),
high_gain_db=config.get('eq_high', 0)
)
return result
@staticmethod
def normalize(y, target_db=-3):
"""Normalize audio to target peak dB."""
peak = np.max(np.abs(y))
if peak < 1e-8:
return y
target_amp = 10 ** (target_db / 20)
return y * (target_amp / peak)
@staticmethod
def crossfade(y1, y2, sr, crossfade_ms=1000):
"""Crossfade two audio segments."""
cf_samples = int(sr * crossfade_ms / 1000)
if y1.ndim > 1:
y1_mono = y1.mean(axis=0)
y2_mono = y2.mean(axis=0)
else:
y1_mono = y1
y2_mono = y2
cf_samples = min(cf_samples, len(y1_mono), len(y2_mono))
fade_out = np.linspace(1, 0, cf_samples)
fade_in = np.linspace(0, 1, cf_samples)
# Overlap region
overlap = y1_mono[-cf_samples:] * fade_out + y2_mono[:cf_samples] * fade_in
result = np.concatenate([
y1_mono[:-cf_samples],
overlap,
y2_mono[cf_samples:]
])
return result
@staticmethod
def mix_tracks(tracks, volumes, sr):
"""Mix multiple tracks together with volume levels."""
if not tracks:
return np.zeros(44100)
# Convert all to mono and find max length
mono_tracks = []
for t in tracks:
if t.ndim > 1:
mono_tracks.append(t.mean(axis=0))
else:
mono_tracks.append(t)
max_len = max(len(t) for t in mono_tracks)
result = np.zeros(max_len)
for track, vol in zip(mono_tracks, volumes):
padded = np.zeros(max_len)
padded[:len(track)] = track
result += padded * vol
# Prevent clipping
peak = np.max(np.abs(result))
if peak > 0.95:
result = result * (0.95 / peak)
return result
@staticmethod
def save_audio(y, sr, filepath, format='wav'):
"""Save audio to file."""
if y.ndim > 1:
y_save = y.T # soundfile expects (samples, channels)
else:
y_save = y
if format == 'wav':
sf.write(filepath, y_save, sr, subtype='PCM_24')
elif format == 'flac':
sf.write(filepath, y_save, sr, format='FLAC')
elif format == 'mp3' and HAS_PYDUB:
# Save as wav first, then convert
temp_wav = str(filepath) + '.tmp.wav'
sf.write(temp_wav, y_save, sr, subtype='PCM_16')
audio = AudioSegment.from_wav(temp_wav)
audio.export(filepath, format='mp3', bitrate='320k')
os.remove(temp_wav)
else:
sf.write(filepath, y_save, sr)
return filepath
class StemSeparator:
"""Separate audio into stems using spectral methods.
Falls back to frequency-band isolation when Demucs is not available."""
@staticmethod
def separate_stems(filepath, progress_callback=None):
"""Separate audio into approximate stems using spectral filtering."""
y, sr = librosa.load(filepath, sr=44100, mono=True)
if progress_callback:
progress_callback(0.1, "Computing STFT...")
# Compute STFT
D = librosa.stft(y, n_fft=4096, hop_length=1024)
magnitude = np.abs(D)
phase = np.angle(D)
freqs = librosa.fft_frequencies(sr=sr, n_fft=4096)
if progress_callback:
progress_callback(0.3, "Separating frequency bands...")
# ── Vocals: 300Hz - 4kHz (main vocal range) with harmonic detection ──
vocal_mask = np.zeros_like(magnitude)
vocal_lo = np.searchsorted(freqs, 250)
vocal_hi = np.searchsorted(freqs, 5000)
# Use harmonic-percussive separation
H, P = librosa.decompose.hpss(D, margin=3.0)
vocals = librosa.istft(H, hop_length=1024)
# Apply bandpass to isolate vocal range better
sos_vocal = signal.butter(6, [250, 5000], btype='band', fs=sr, output='sos')
vocals_filtered = signal.sosfilt(sos_vocal, vocals)
# Mix original harmonic with filtered
vocals = vocals * 0.4 + vocals_filtered * 0.6
if progress_callback:
progress_callback(0.5, "Extracting drums...")
# ── Drums: Percussive component ──
drums = librosa.istft(P, hop_length=1024)
if progress_callback:
progress_callback(0.7, "Extracting bass...")
# ── Bass: < 300Hz ──
bass_mask = np.zeros_like(magnitude)
bass_hi = np.searchsorted(freqs, 300)
bass_mask[:bass_hi, :] = 1.0
bass_D = magnitude * bass_mask * np.exp(1j * phase)
bass = librosa.istft(bass_D, hop_length=1024)
# ── Other: Everything else (subtract vocals, drums, bass from original) ──
min_len = min(len(y), len(vocals), len(drums), len(bass))
y_trim = y[:min_len]
vocals = vocals[:min_len]
drums = drums[:min_len]
bass = bass[:min_len]
other = y_trim - vocals * 0.5 - drums * 0.5 - bass * 0.5
# Soft clip
other = np.tanh(other)
if progress_callback:
progress_callback(0.9, "Saving stems...")
stems = {}
stem_dir = TEMP_DIR / f"stems_{int(time.time())}"
os.makedirs(stem_dir, exist_ok=True)
for name, audio in [('vocals', vocals), ('drums', drums), ('bass', bass), ('other', other)]:
# Normalize each stem
peak = np.max(np.abs(audio))
if peak > 0:
audio = audio * (0.9 / peak)
path = stem_dir / f"{name}.wav"
sf.write(str(path), audio, sr)
stems[name] = str(path)
# Also save instrumental (everything minus vocals)
instrumental = y_trim - vocals * 0.7
instrumental = instrumental * (0.9 / (np.max(np.abs(instrumental)) + 1e-8))
inst_path = stem_dir / "instrumental.wav"
sf.write(str(inst_path), instrumental, sr)
stems['instrumental'] = str(inst_path)
if progress_callback:
progress_callback(1.0, "Done!")
return stems
class CopyrightFreeEngine:
"""Engine to make audio copyright-free while preserving vibe and emotion."""
@staticmethod
def process(filepath, pitch_shift=0.5, speed_change=1.03,
eq_shift=True, add_reverb=True, add_texture=True,
stereo_widen=True, micro_timing=True,
intensity='medium', progress_callback=None):
"""
Process audio to make it copyright-free.
Strategy:
1. Separate into stems
2. Apply micro-pitch shifts per stem (different amounts)
3. Apply micro-tempo changes
4. Shift EQ profile slightly
5. Add subtle reverb/space changes
6. Add micro-noise texture
7. Apply stereo widening
8. Remix stems back together
This creates enough fingerprint divergence for Content ID
while preserving the song's vibe, style, and emotions.
"""
intensity_map = {
'subtle': {'pitch_mult': 0.5, 'speed_mult': 0.5, 'fx_mult': 0.5},
'medium': {'pitch_mult': 1.0, 'speed_mult': 1.0, 'fx_mult': 1.0},
'strong': {'pitch_mult': 1.5, 'speed_mult': 1.5, 'fx_mult': 1.5},
'maximum': {'pitch_mult': 2.0, 'speed_mult': 2.0, 'fx_mult': 2.0}
}
mult = intensity_map.get(intensity, intensity_map['medium'])
if progress_callback:
progress_callback(0.05, "Loading audio...")
y, sr = librosa.load(filepath, sr=44100, mono=True)
original_length = len(y)
if progress_callback:
progress_callback(0.1, "Separating stems for individual processing...")
# Step 1: Separate stems
stems = StemSeparator.separate_stems(filepath, progress_callback=None)
if progress_callback:
progress_callback(0.3, "Applying pitch modifications...")
processed_stems = {}
# Step 2: Apply different micro-pitch shifts per stem
# This is key β€” shifting each stem slightly differently disrupts
# the audio fingerprint while keeping the overall feel
stem_pitch_offsets = {
'vocals': pitch_shift * mult['pitch_mult'],
'drums': pitch_shift * mult['pitch_mult'] * 0.3, # Less pitch change on drums
'bass': pitch_shift * mult['pitch_mult'] * 0.8,
'other': pitch_shift * mult['pitch_mult'] * 1.1
}
for stem_name, stem_path in stems.items():
if stem_name == 'instrumental':
continue
y_stem, sr_stem = librosa.load(stem_path, sr=44100, mono=True)
# Pitch shift
ps = stem_pitch_offsets.get(stem_name, pitch_shift)
if abs(ps) > 0.01:
y_stem = AudioProcessor.pitch_shift(y_stem, sr_stem, ps)
# Speed change
actual_speed = 1.0 + (speed_change - 1.0) * mult['speed_mult']
if abs(actual_speed - 1.0) > 0.001:
y_stem = AudioProcessor.time_stretch(y_stem, sr_stem, actual_speed)
processed_stems[stem_name] = y_stem
if progress_callback:
progress_callback(0.5, "Applying EQ and effects...")
# Step 3: Apply per-stem EQ shifts
if eq_shift:
eq_configs = {
'vocals': {'eq_low': -1 * mult['fx_mult'], 'eq_high': 1.5 * mult['fx_mult']},
'drums': {'eq_low': 1 * mult['fx_mult'], 'eq_high': -0.5 * mult['fx_mult']},
'bass': {'eq_low': 0.5 * mult['fx_mult'], 'eq_high': -1 * mult['fx_mult']},
'other': {'eq_low': 0, 'eq_high': 1 * mult['fx_mult']}
}
for stem_name, y_stem in processed_stems.items():
eq_cfg = eq_configs.get(stem_name, {})
if eq_cfg:
processed_stems[stem_name] = AudioProcessor.apply_eq(
y_stem, sr,
low_gain_db=eq_cfg.get('eq_low', 0),
high_gain_db=eq_cfg.get('eq_high', 0)
)
if progress_callback:
progress_callback(0.6, "Adding spatial effects...")
# Step 4: Add subtle reverb/space changes
if add_reverb:
reverb_configs = {
'vocals': {'reverb': True, 'reverb_size': 0.2 * mult['fx_mult'], 'reverb_wet': 0.1 * mult['fx_mult']},
'drums': {'reverb': True, 'reverb_size': 0.15 * mult['fx_mult'], 'reverb_wet': 0.05 * mult['fx_mult']},
'other': {'reverb': True, 'reverb_size': 0.25 * mult['fx_mult'], 'reverb_wet': 0.12 * mult['fx_mult']}
}
for stem_name, config in reverb_configs.items():
if stem_name in processed_stems:
processed_stems[stem_name] = AudioProcessor.apply_effects_chain(
processed_stems[stem_name], sr, config
)
if progress_callback:
progress_callback(0.7, "Adding micro-texture...")
# Step 5: Add subtle noise texture (disrupts exact waveform matching)
if add_texture:
for stem_name in processed_stems:
noise_level = 0.002 * mult['fx_mult']
noise = np.random.randn(len(processed_stems[stem_name])) * noise_level
# Shape noise to follow the audio envelope
envelope = np.abs(processed_stems[stem_name])
envelope = signal.medfilt(envelope, kernel_size=min(2001, len(envelope) // 10 * 2 + 1))
shaped_noise = noise * (envelope + 0.001)
processed_stems[stem_name] += shaped_noise
if progress_callback:
progress_callback(0.8, "Applying micro-timing variations...")
# Step 6: Micro-timing shifts (very subtle sample offsets per stem)
if micro_timing:
timing_offsets = {
'vocals': int(sr * 0.003 * mult['fx_mult']),
'drums': 0, # Keep drums on grid
'bass': int(sr * 0.001 * mult['fx_mult']),
'other': int(sr * 0.004 * mult['fx_mult'])
}
for stem_name, offset in timing_offsets.items():
if stem_name in processed_stems and offset > 0:
processed_stems[stem_name] = np.pad(
processed_stems[stem_name], (offset, 0)
)
if progress_callback:
progress_callback(0.85, "Mixing stems back together...")
# Step 7: Remix all stems
stem_volumes = {
'vocals': 0.55,
'drums': 0.45,
'bass': 0.40,
'other': 0.35
}
tracks = list(processed_stems.values())
volumes = [stem_volumes.get(name, 0.4) for name in processed_stems.keys()]
mixed = AudioProcessor.mix_tracks(tracks, volumes, sr)
# Step 8: Stereo widening
if stereo_widen:
left = mixed.copy()
right = mixed.copy()
# Subtle delay between channels
delay_samples = int(sr * 0.0003 * mult['fx_mult'])
right = np.pad(right, (delay_samples, 0))[:len(left)]
# Slight EQ difference
sos_l = signal.butter(2, 2000, btype='low', fs=sr, output='sos')
sos_h = signal.butter(2, 2000, btype='high', fs=sr, output='sos')
left = left + signal.sosfilt(sos_l, left) * 0.05
right = right + signal.sosfilt(sos_h, right) * 0.05
mixed = np.stack([left, right])
if progress_callback:
progress_callback(0.9, "Normalizing and mastering...")
# Final mastering
mixed = AudioProcessor.normalize(mixed, target_db=-1)
# Apply final limiter
effects_config = {'limiter': True, 'limiter_threshold': -0.5}
if HAS_PEDALBOARD:
mixed = AudioProcessor.apply_effects_chain(mixed, sr, effects_config)
# Save output
output_path = str(TEMP_DIR / f"copyright_free_{int(time.time())}.wav")
AudioProcessor.save_audio(mixed, sr, output_path, format='wav')
if progress_callback:
progress_callback(1.0, "βœ… Processing complete!")
return output_path
class RemixEngine:
"""Create remixes by modifying individual stems."""
@staticmethod
def remix(filepath, vocal_pitch=0, vocal_volume=1.0,
drum_volume=1.0, bass_volume=1.0, other_volume=1.0,
target_bpm=None, add_reverb=False, reverb_amount=0.2,
add_chorus=False, chorus_amount=0.3,
add_delay=False, delay_amount=0.2,
low_eq=0, mid_eq=0, high_eq=0,
progress_callback=None):
"""Create a custom remix with per-stem control."""
if progress_callback:
progress_callback(0.1, "Separating stems...")
stems = StemSeparator.separate_stems(filepath)
if progress_callback:
progress_callback(0.4, "Processing stems...")
y_vocals, sr = librosa.load(stems['vocals'], sr=44100, mono=True)
y_drums, sr = librosa.load(stems['drums'], sr=44100, mono=True)
y_bass, sr = librosa.load(stems['bass'], sr=44100, mono=True)
y_other, sr = librosa.load(stems['other'], sr=44100, mono=True)
# Apply vocal pitch shift
if abs(vocal_pitch) > 0.01:
y_vocals = AudioProcessor.pitch_shift(y_vocals, sr, vocal_pitch)
# Apply tempo change if target BPM specified
if target_bpm and target_bpm > 0:
y_mono_orig, _ = librosa.load(filepath, sr=44100, mono=True)
current_bpm = AudioAnalyzer.detect_bpm(y_mono_orig, sr)
if current_bpm > 0:
rate = target_bpm / current_bpm
if 0.5 < rate < 2.0:
y_vocals = AudioProcessor.time_stretch(y_vocals, sr, rate)
y_drums = AudioProcessor.time_stretch(y_drums, sr, rate)
y_bass = AudioProcessor.time_stretch(y_bass, sr, rate)
y_other = AudioProcessor.time_stretch(y_other, sr, rate)
if progress_callback:
progress_callback(0.6, "Applying effects...")
# Apply effects
effects_config = {}
if add_reverb:
effects_config.update({'reverb': True, 'reverb_size': reverb_amount, 'reverb_wet': reverb_amount * 0.5})
if add_chorus:
effects_config.update({'chorus': True, 'chorus_mix': chorus_amount})
if add_delay:
effects_config.update({'delay': True, 'delay_mix': delay_amount})
if effects_config:
y_vocals = AudioProcessor.apply_effects_chain(y_vocals, sr, effects_config)
y_other = AudioProcessor.apply_effects_chain(y_other, sr, effects_config)
# Apply EQ
if any([low_eq, mid_eq, high_eq]):
for stem_ref in [y_vocals, y_drums, y_bass, y_other]:
stem_ref = AudioProcessor.apply_eq(stem_ref, sr, low_eq, mid_eq, high_eq)
if progress_callback:
progress_callback(0.8, "Mixing...")
# Mix with custom volumes
tracks = [y_vocals, y_drums, y_bass, y_other]
volumes = [vocal_volume, drum_volume, bass_volume, other_volume]
mixed = AudioProcessor.mix_tracks(tracks, volumes, sr)
mixed = AudioProcessor.normalize(mixed, target_db=-1)
output_path = str(TEMP_DIR / f"remix_{int(time.time())}.wav")
sf.write(output_path, mixed, sr)
if progress_callback:
progress_callback(1.0, "βœ… Remix complete!")
return output_path
class MashupEngine:
"""Create mashups from multiple songs."""
@staticmethod
def create_mashup(filepaths, mode='layered', target_bpm=None,
crossfade_ms=2000, volumes=None,
auto_key_match=True, auto_bpm_match=True,
progress_callback=None):
"""
Create a mashup from multiple audio files.
Modes:
- layered: Overlay vocal from one track on instrumental of another
- sequential: Play tracks back-to-back with crossfades
- blend: Smooth blend of all tracks simultaneously
- alternating: Alternate sections from different tracks
"""
if not filepaths or len(filepaths) < 2:
return None
sr = 44100
track_data = []
if progress_callback:
progress_callback(0.1, f"Loading {len(filepaths)} tracks...")
# Load and analyze all tracks
for i, fp in enumerate(filepaths):
if fp is None:
continue
y, _ = librosa.load(fp, sr=sr, mono=True)
bpm = AudioAnalyzer.detect_bpm(y, sr)
key = AudioAnalyzer.detect_key(y, sr)
track_data.append({
'y': y, 'bpm': bpm, 'key': key, 'path': fp, 'index': i
})
if len(track_data) < 2:
return None
if progress_callback:
progress_callback(0.3, "Analyzing and matching tracks...")
# Determine target BPM
if target_bpm is None or target_bpm == 0:
target_bpm = np.mean([t['bpm'] for t in track_data])
# BPM match all tracks
if auto_bpm_match:
for t in track_data:
if t['bpm'] > 0 and abs(t['bpm'] - target_bpm) > 1:
rate = target_bpm / t['bpm']
if 0.5 < rate < 2.0:
t['y'] = AudioProcessor.time_stretch(t['y'], sr, rate)
t['bpm'] = target_bpm
if progress_callback:
progress_callback(0.5, f"Creating {mode} mashup...")
if volumes is None:
volumes = [1.0 / len(track_data)] * len(track_data)
if mode == 'layered':
result = MashupEngine._layered_mashup(track_data, sr, volumes, progress_callback)
elif mode == 'sequential':
result = MashupEngine._sequential_mashup(track_data, sr, crossfade_ms, progress_callback)
elif mode == 'blend':
result = MashupEngine._blend_mashup(track_data, sr, volumes, progress_callback)
elif mode == 'alternating':
result = MashupEngine._alternating_mashup(track_data, sr, crossfade_ms, progress_callback)
else:
result = MashupEngine._blend_mashup(track_data, sr, volumes, progress_callback)
# Normalize
result = AudioProcessor.normalize(result, target_db=-1)
output_path = str(TEMP_DIR / f"mashup_{int(time.time())}.wav")
sf.write(output_path, result, sr)
if progress_callback:
progress_callback(1.0, "βœ… Mashup complete!")
return output_path
@staticmethod
def _layered_mashup(track_data, sr, volumes, progress_callback=None):
"""Overlay vocals from track 1 with instrumental from track 2."""
# Separate stems from first two tracks
stems_1 = StemSeparator.separate_stems(track_data[0]['path'])
stems_2 = StemSeparator.separate_stems(track_data[1]['path'])
vocals, _ = librosa.load(stems_1['vocals'], sr=sr, mono=True)
instrumental, _ = librosa.load(stems_2['instrumental'], sr=sr, mono=True)
# Match lengths
min_len = min(len(vocals), len(instrumental))
vocals = vocals[:min_len]
instrumental = instrumental[:min_len]
result = vocals * 0.55 + instrumental * 0.5
# If more tracks, blend them in
for i, t in enumerate(track_data[2:], 2):
y_extra = t['y'][:min_len] if len(t['y']) >= min_len else np.pad(t['y'], (0, min_len - len(t['y'])))
vol = volumes[i] if i < len(volumes) else 0.3
result += y_extra * vol
return result
@staticmethod
def _sequential_mashup(track_data, sr, crossfade_ms, progress_callback=None):
"""Play tracks sequentially with crossfades."""
result = track_data[0]['y'].copy()
for i in range(1, len(track_data)):
if progress_callback:
progress_callback(0.5 + 0.4 * i / len(track_data),
f"Joining track {i+1}/{len(track_data)}...")
result = AudioProcessor.crossfade(result, track_data[i]['y'], sr, crossfade_ms)
return result
@staticmethod
def _blend_mashup(track_data, sr, volumes, progress_callback=None):
"""Smooth simultaneous blend of all tracks."""
tracks = [t['y'] for t in track_data]
return AudioProcessor.mix_tracks(tracks, volumes, sr)
@staticmethod
def _alternating_mashup(track_data, sr, crossfade_ms, progress_callback=None):
"""Alternate sections from different tracks."""
section_length = int(sr * 8) # 8-second sections
cf_samples = int(sr * crossfade_ms / 1000)
result = np.array([])
section_idx = 0
while True:
track_idx = section_idx % len(track_data)
y = track_data[track_idx]['y']
start = (section_idx // len(track_data)) * section_length
end = start + section_length
if start >= len(y):
break
section = y[start:min(end, len(y))]
if len(result) > 0 and cf_samples > 0:
result = AudioProcessor.crossfade(result, section, sr, crossfade_ms)
else:
result = np.concatenate([result, section])
section_idx += 1
if section_idx > 100: # Safety limit
break
return result
# ═══════════════════════════════════════════════════════════════════════════════
# GRADIO UI
# ═══════════════════════════════════════════════════════════════════════════════
CUSTOM_CSS = """
/* ═══ Global Dark Studio Theme ═══ */
.gradio-container {
max-width: 1400px !important;
margin: auto !important;
}
/* Hero Header */
.hero-header {
background: linear-gradient(135deg, #0a0a1a 0%, #1a1a3e 50%, #0d0d2a 100%);
border: 1px solid rgba(45, 156, 219, 0.3);
border-radius: 16px;
padding: 30px 40px;
margin-bottom: 20px;
text-align: center;
box-shadow: 0 8px 32px rgba(45, 156, 219, 0.15);
}
.hero-header h1 {
font-size: 2.8em !important;
background: linear-gradient(135deg, #2D9CDB, #8B5CF6, #F2994A);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
margin: 0 !important;
font-weight: 800 !important;
}
.hero-header p {
color: #8899aa !important;
font-size: 1.1em !important;
margin-top: 8px !important;
}
/* Tab Styling */
.tab-nav {
border-bottom: 2px solid rgba(45, 156, 219, 0.2) !important;
gap: 4px !important;
}
.tab-nav button {
font-size: 1.05em !important;
font-weight: 600 !important;
padding: 12px 20px !important;
border-radius: 10px 10px 0 0 !important;
transition: all 0.3s ease !important;
}
.tab-nav button.selected {
background: linear-gradient(135deg, rgba(45, 156, 219, 0.2), rgba(139, 92, 246, 0.2)) !important;
border-bottom: 3px solid #2D9CDB !important;
}
/* Feature Cards */
.feature-card {
border: 1px solid rgba(45, 156, 219, 0.2);
border-radius: 12px;
padding: 20px;
margin: 8px 0;
background: rgba(26, 26, 46, 0.5);
backdrop-filter: blur(10px);
}
/* Process Button */
.process-btn {
background: linear-gradient(135deg, #2D9CDB, #8B5CF6) !important;
border: none !important;
font-size: 1.15em !important;
font-weight: 700 !important;
padding: 14px 32px !important;
border-radius: 12px !important;
box-shadow: 0 4px 20px rgba(45, 156, 219, 0.3) !important;
transition: all 0.3s ease !important;
text-transform: uppercase !important;
letter-spacing: 1px !important;
}
.process-btn:hover {
transform: translateY(-2px) !important;
box-shadow: 0 8px 32px rgba(45, 156, 219, 0.5) !important;
}
/* Status Messages */
.status-msg {
padding: 12px 20px;
border-radius: 8px;
font-weight: 600;
text-align: center;
}
.status-success {
background: rgba(39, 174, 96, 0.15);
border: 1px solid rgba(39, 174, 96, 0.4);
color: #27AE60;
}
.status-processing {
background: rgba(45, 156, 219, 0.15);
border: 1px solid rgba(45, 156, 219, 0.4);
color: #2D9CDB;
}
/* Section Headers */
.section-header {
font-size: 1.3em;
font-weight: 700;
padding: 8px 0;
margin: 16px 0 8px 0;
border-bottom: 2px solid rgba(45, 156, 219, 0.2);
color: #2D9CDB;
}
/* Info Boxes */
.info-box {
background: rgba(45, 156, 219, 0.08);
border: 1px solid rgba(45, 156, 219, 0.2);
border-radius: 10px;
padding: 16px 20px;
margin: 12px 0;
font-size: 0.95em;
line-height: 1.6;
}
/* Stem Labels */
.stem-vocals { border-left: 4px solid #E91E63 !important; }
.stem-drums { border-left: 4px solid #FF9800 !important; }
.stem-bass { border-left: 4px solid #9C27B0 !important; }
.stem-other { border-left: 4px solid #4CAF50 !important; }
.stem-instrumental { border-left: 4px solid #2196F3 !important; }
/* Analysis Results */
.analysis-card {
text-align: center;
padding: 16px;
border-radius: 10px;
background: rgba(26, 26, 62, 0.5);
border: 1px solid rgba(255,255,255,0.1);
}
.analysis-card .value {
font-size: 2em;
font-weight: 800;
color: #2D9CDB;
}
.analysis-card .label {
font-size: 0.85em;
color: #8899aa;
text-transform: uppercase;
letter-spacing: 1px;
}
/* Hide footer */
footer { display: none !important; }
/* Accordion */
.accordion {
border: 1px solid rgba(45, 156, 219, 0.15) !important;
border-radius: 10px !important;
}
/* Mashup Track Cards */
.track-upload {
border: 2px dashed rgba(45, 156, 219, 0.3);
border-radius: 12px;
padding: 16px;
transition: border-color 0.3s ease;
}
.track-upload:hover {
border-color: rgba(45, 156, 219, 0.6);
}
"""
THEME = gr.themes.Soft(
primary_hue="blue",
secondary_hue="purple",
neutral_hue="slate",
font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"],
font_mono=[gr.themes.GoogleFont("JetBrains Mono"), "monospace"],
).set(
button_primary_background_fill="linear-gradient(135deg, #2D9CDB, #8B5CF6)",
button_primary_background_fill_hover="linear-gradient(135deg, #1a7bbf, #7c3aed)",
button_primary_text_color="white",
block_border_width="1px",
block_shadow="0 4px 24px rgba(0,0,0,0.2)",
slider_color="#2D9CDB",
)
# ─── Handler Functions ────────────────────────────────────────────────────────
def analyze_audio(audio_path, progress=gr.Progress()):
"""Analyze uploaded audio and return properties."""
if audio_path is None:
return "β€”", "β€”", "β€”", "β€”", "β€”", "β€”", "β€”"
progress(0.1, desc="Loading audio...")
y_mono, sr = AudioAnalyzer.load_audio_mono(audio_path)
progress(0.3, desc="Detecting BPM...")
bpm = AudioAnalyzer.detect_bpm(y_mono, sr)
progress(0.5, desc="Detecting key...")
key = AudioAnalyzer.detect_key(y_mono, sr)
progress(0.7, desc="Analyzing spectrum...")
spectral = AudioAnalyzer.get_spectral_features(y_mono, sr)
duration = round(len(y_mono) / sr, 1)
progress(1.0, desc="Analysis complete!")
return (
f"🎡 {bpm} BPM",
f"🎹 {key}",
f"⏱️ {duration}s",
f"πŸ“Š {sr} Hz",
f"πŸ“ˆ {spectral['centroid']} Hz",
f"πŸ“ {spectral['bandwidth']} Hz",
f"πŸ”Š {spectral['rolloff']} Hz"
)
def separate_stems(audio_path, progress=gr.Progress()):
"""Separate audio into stems."""
if audio_path is None:
return None, None, None, None, None, "❌ Please upload an audio file"
def prog_cb(val, desc):
progress(val, desc=desc)
progress(0.05, desc="Starting stem separation...")
stems = StemSeparator.separate_stems(audio_path, progress_callback=prog_cb)
return (
stems.get('vocals'),
stems.get('drums'),
stems.get('bass'),
stems.get('other'),
stems.get('instrumental'),
"βœ… Stem separation complete! Each stem is ready for individual processing."
)
def process_copyright_free(audio_path, pitch_shift, speed_change,
eq_shift, add_reverb, add_texture,
stereo_widen, micro_timing, intensity,
progress=gr.Progress()):
"""Process audio to make it copyright-free."""
if audio_path is None:
return None, "❌ Please upload an audio file"
def prog_cb(val, desc):
progress(val, desc=desc)
try:
output_path = CopyrightFreeEngine.process(
audio_path,
pitch_shift=pitch_shift,
speed_change=speed_change,
eq_shift=eq_shift,
add_reverb=add_reverb,
add_texture=add_texture,
stereo_widen=stereo_widen,
micro_timing=micro_timing,
intensity=intensity,
progress_callback=prog_cb
)
return output_path, "βœ… **Copyright-free processing complete!** The audio fingerprint has been sufficiently modified while preserving the original vibe and emotion."
except Exception as e:
return None, f"❌ Error: {str(e)}"
def create_remix(audio_path, vocal_pitch, vocal_vol, drum_vol, bass_vol, other_vol,
target_bpm, add_reverb, reverb_amt, add_chorus, chorus_amt,
add_delay, delay_amt, low_eq, mid_eq, high_eq,
progress=gr.Progress()):
"""Create a custom remix."""
if audio_path is None:
return None, "❌ Please upload an audio file"
def prog_cb(val, desc):
progress(val, desc=desc)
try:
output_path = RemixEngine.remix(
audio_path,
vocal_pitch=vocal_pitch,
vocal_volume=vocal_vol,
drum_volume=drum_vol,
bass_volume=bass_vol,
other_volume=other_vol,
target_bpm=target_bpm if target_bpm > 0 else None,
add_reverb=add_reverb,
reverb_amount=reverb_amt,
add_chorus=add_chorus,
chorus_amount=chorus_amt,
add_delay=add_delay,
delay_amount=delay_amt,
low_eq=low_eq,
mid_eq=mid_eq,
high_eq=high_eq,
progress_callback=prog_cb
)
return output_path, "βœ… **Remix complete!** Your custom remix is ready."
except Exception as e:
return None, f"❌ Error: {str(e)}"
def create_mashup(track1, track2, track3, track4, track5, track6,
mode, target_bpm, crossfade_ms,
auto_bpm, auto_key,
vol1, vol2, vol3, vol4, vol5, vol6,
progress=gr.Progress()):
"""Create a mashup from multiple tracks."""
tracks = [t for t in [track1, track2, track3, track4, track5, track6] if t is not None]
volumes = [vol1, vol2, vol3, vol4, vol5, vol6][:len(tracks)]
if len(tracks) < 2:
return None, "❌ Please upload at least 2 tracks for a mashup"
def prog_cb(val, desc):
progress(val, desc=desc)
try:
output_path = MashupEngine.create_mashup(
tracks,
mode=mode,
target_bpm=target_bpm if target_bpm > 0 else None,
crossfade_ms=int(crossfade_ms),
volumes=volumes,
auto_key_match=auto_key,
auto_bpm_match=auto_bpm,
progress_callback=prog_cb
)
if output_path:
return output_path, f"βœ… **Mashup complete!** {len(tracks)} tracks merged in '{mode}' mode."
else:
return None, "❌ Mashup creation failed. Please check your uploaded tracks."
except Exception as e:
return None, f"❌ Error: {str(e)}"
def quick_copyright_free(audio_path, progress=gr.Progress()):
"""One-click copyright-free processing with optimal defaults."""
if audio_path is None:
return None, "❌ Please upload an audio file"
def prog_cb(val, desc):
progress(val, desc=desc)
try:
output_path = CopyrightFreeEngine.process(
audio_path,
pitch_shift=0.5,
speed_change=1.03,
eq_shift=True,
add_reverb=True,
add_texture=True,
stereo_widen=True,
micro_timing=True,
intensity='medium',
progress_callback=prog_cb
)
return output_path, "βœ… **One-click processing complete!** Your audio is ready for YouTube."
except Exception as e:
return None, f"❌ Error: {str(e)}"
# ─── Build the UI ─────────────────────────────────────────────────────────────
with gr.Blocks(
theme=THEME,
css=CUSTOM_CSS,
title="πŸŽ›οΈ SoundForge Studio β€” Copyright-Free Audio Processing",
) as demo:
# ═══ HERO HEADER ═══
gr.HTML("""
<div class="hero-header">
<h1>πŸŽ›οΈ SoundForge Studio</h1>
<p>Professional Copyright-Free Audio Processing β€’ Remix Engine β€’ Mashup Creator β€’ Stem Separator</p>
<p style="font-size: 0.85em; color: #667788; margin-top: 12px;">
πŸ”’ Make any song copyright-free for YouTube &nbsp;|&nbsp; 🎚️ Full remix control &nbsp;|&nbsp; 🎡 Multi-track mashups &nbsp;|&nbsp; 🎀 Stem separation
</p>
</div>
""")
with gr.Tabs() as main_tabs:
# ═══════════════════════════════════════════════════════════════════════
# TAB 1: COPYRIGHT-FREE PROCESSING
# ═══════════════════════════════════════════════════════════════════════
with gr.Tab("πŸ”“ Copyright-Free", id="copyright"):
gr.Markdown("""
### Make Any Song Copyright-Free for YouTube
This engine intelligently modifies your audio's fingerprint through multi-layered processing
while **preserving the original vibe, style, and emotions**. The modifications are carefully
calibrated to evade Content ID detection without audibly degrading the listening experience.
""")
gr.HTML("""
<div class="info-box">
<strong>πŸ›‘οΈ How It Works:</strong><br>
1. <strong>Stem Separation</strong> β€” Splits audio into vocals, drums, bass, and melody<br>
2. <strong>Per-Stem Micro-Pitch Shifting</strong> β€” Different subtle shifts per element (disrupts fingerprint)<br>
3. <strong>Micro-Tempo Adjustment</strong> β€” Barely perceptible speed change<br>
4. <strong>EQ Profile Shift</strong> β€” Subtle frequency balance changes per stem<br>
5. <strong>Spatial Modification</strong> β€” Reverb/stereo changes<br>
6. <strong>Texture Addition</strong> β€” Envelope-shaped micro-noise layer<br>
7. <strong>Micro-Timing Offsets</strong> β€” Sample-level timing shifts between stems<br>
8. <strong>Professional Mastering</strong> β€” Normalize, limit, and stereo-widen the result
</div>
""")
with gr.Row():
with gr.Column(scale=1):
cf_input = gr.Audio(
label="πŸ“‚ Upload Your Song",
type="filepath",
sources=["upload"],
waveform_options=gr.WaveformOptions(
waveform_color="#2D9CDB",
waveform_progress_color="#8B5CF6",
),
)
# Quick Process Button
quick_btn = gr.Button(
"⚑ One-Click Copyright-Free (Recommended)",
variant="primary",
elem_classes=["process-btn"],
size="lg"
)
with gr.Accordion("βš™οΈ Advanced Settings", open=False):
cf_intensity = gr.Radio(
["subtle", "medium", "strong", "maximum"],
value="medium",
label="Processing Intensity",
info="Higher = more fingerprint divergence, slightly more audible changes"
)
with gr.Row():
cf_pitch = gr.Slider(
-3, 3, value=0.5, step=0.1,
label="🎡 Pitch Shift (semitones)",
info="Positive = higher, Negative = lower"
)
cf_speed = gr.Slider(
0.95, 1.08, value=1.03, step=0.005,
label="⏱️ Speed Change",
info="1.0 = original speed"
)
with gr.Row():
cf_eq = gr.Checkbox(label="πŸŽ›οΈ EQ Profile Shift", value=True)
cf_reverb = gr.Checkbox(label="πŸ›οΈ Add Spatial Reverb", value=True)
cf_texture = gr.Checkbox(label="🌊 Add Micro-Texture", value=True)
cf_stereo = gr.Checkbox(label="πŸ“» Stereo Widening", value=True)
cf_timing = gr.Checkbox(label="⏲️ Micro-Timing Shifts", value=True)
advanced_btn = gr.Button(
"πŸŽ›οΈ Process with Custom Settings",
variant="secondary",
size="lg"
)
with gr.Column(scale=1):
cf_output = gr.Audio(
label="πŸ”“ Copyright-Free Output",
type="filepath",
interactive=False,
waveform_options=gr.WaveformOptions(
waveform_color="#27AE60",
waveform_progress_color="#2ECC71",
),
)
cf_status = gr.Markdown("*Upload a song and click process to begin*")
cf_download = gr.File(label="⬇️ Download Processed Audio", interactive=False)
# Event handlers
quick_btn.click(
fn=quick_copyright_free,
inputs=[cf_input],
outputs=[cf_output, cf_status]
).then(
fn=lambda x: x,
inputs=[cf_output],
outputs=[cf_download]
)
advanced_btn.click(
fn=process_copyright_free,
inputs=[cf_input, cf_pitch, cf_speed, cf_eq, cf_reverb,
cf_texture, cf_stereo, cf_timing, cf_intensity],
outputs=[cf_output, cf_status]
).then(
fn=lambda x: x,
inputs=[cf_output],
outputs=[cf_download]
)
# ═══════════════════════════════════════════════════════════════════════
# TAB 2: REMIX ENGINE
# ═══════════════════════════════════════════════════════════════════════
with gr.Tab("🎚️ Remix Engine", id="remix"):
gr.Markdown("""
### Professional Remix Engine
Full control over every element of your song. Separate stems, adjust volumes,
change pitch, tempo, and apply professional effects β€” all while keeping the original vibe.
""")
with gr.Row():
with gr.Column(scale=1):
remix_input = gr.Audio(
label="πŸ“‚ Upload Song to Remix",
type="filepath",
sources=["upload"],
waveform_options=gr.WaveformOptions(waveform_color="#F2994A"),
)
gr.Markdown("#### 🎀 Stem Volume Control")
with gr.Row():
remix_vocal_vol = gr.Slider(0, 2, value=1.0, step=0.05, label="🎀 Vocals")
remix_drum_vol = gr.Slider(0, 2, value=1.0, step=0.05, label="πŸ₯ Drums")
with gr.Row():
remix_bass_vol = gr.Slider(0, 2, value=1.0, step=0.05, label="🎸 Bass")
remix_other_vol = gr.Slider(0, 2, value=1.0, step=0.05, label="🎹 Other/Melody")
gr.Markdown("#### 🎡 Pitch & Tempo")
with gr.Row():
remix_vocal_pitch = gr.Slider(-12, 12, value=0, step=0.5,
label="Vocal Pitch (semitones)")
remix_target_bpm = gr.Slider(0, 200, value=0, step=1,
label="Target BPM (0 = keep original)")
gr.Markdown("#### πŸŽ›οΈ Effects")
with gr.Row():
remix_reverb = gr.Checkbox(label="πŸ›οΈ Reverb", value=False)
remix_reverb_amt = gr.Slider(0, 1, value=0.3, step=0.05, label="Amount")
with gr.Row():
remix_chorus = gr.Checkbox(label="🌊 Chorus", value=False)
remix_chorus_amt = gr.Slider(0, 1, value=0.3, step=0.05, label="Amount")
with gr.Row():
remix_delay = gr.Checkbox(label="πŸ” Delay", value=False)
remix_delay_amt = gr.Slider(0, 1, value=0.3, step=0.05, label="Amount")
gr.Markdown("#### πŸŽ›οΈ Equalizer")
with gr.Row():
remix_low_eq = gr.Slider(-12, 12, value=0, step=0.5, label="πŸ”Š Low (Bass)")
remix_mid_eq = gr.Slider(-12, 12, value=0, step=0.5, label="πŸ”Š Mid")
remix_high_eq = gr.Slider(-12, 12, value=0, step=0.5, label="πŸ”Š High (Treble)")
remix_btn = gr.Button("🎚️ Create Remix", variant="primary",
elem_classes=["process-btn"], size="lg")
with gr.Column(scale=1):
remix_output = gr.Audio(
label="🎚️ Remix Output",
type="filepath",
interactive=False,
waveform_options=gr.WaveformOptions(
waveform_color="#F2994A",
waveform_progress_color="#E67E22",
),
)
remix_status = gr.Markdown("*Upload a song and adjust settings to create your remix*")
remix_download = gr.File(label="⬇️ Download Remix", interactive=False)
remix_btn.click(
fn=create_remix,
inputs=[remix_input, remix_vocal_pitch, remix_vocal_vol, remix_drum_vol,
remix_bass_vol, remix_other_vol, remix_target_bpm,
remix_reverb, remix_reverb_amt, remix_chorus, remix_chorus_amt,
remix_delay, remix_delay_amt, remix_low_eq, remix_mid_eq, remix_high_eq],
outputs=[remix_output, remix_status]
).then(
fn=lambda x: x,
inputs=[remix_output],
outputs=[remix_download]
)
# ═══════════════════════════════════════════════════════════════════════
# TAB 3: MASHUP CREATOR
# ═══════════════════════════════════════════════════════════════════════
with gr.Tab("πŸ”€ Mashup Creator", id="mashup"):
gr.Markdown("""
### Multi-Track Mashup Creator
Combine up to **6 songs** into one seamless mashup. Choose from multiple mashup modes
with automatic BPM and key matching for professional results.
""")
gr.HTML("""
<div class="info-box">
<strong>🎡 Mashup Modes:</strong><br>
β€’ <strong>Layered</strong> β€” Vocals from Track 1 + Instrumental from Track 2 (+ blend others)<br>
β€’ <strong>Sequential</strong> β€” Tracks play one after another with smooth crossfades<br>
β€’ <strong>Blend</strong> β€” All tracks mixed simultaneously at custom volumes<br>
β€’ <strong>Alternating</strong> β€” Switches between tracks every 8 seconds with crossfades
</div>
""")
gr.Markdown("#### πŸ“‚ Upload Tracks (2-6 songs)")
with gr.Row():
mashup_t1 = gr.Audio(label="🎡 Track 1", type="filepath", sources=["upload"],
waveform_options=gr.WaveformOptions(waveform_color="#E91E63"))
mashup_t2 = gr.Audio(label="🎡 Track 2", type="filepath", sources=["upload"],
waveform_options=gr.WaveformOptions(waveform_color="#2196F3"))
mashup_t3 = gr.Audio(label="🎡 Track 3 (optional)", type="filepath", sources=["upload"],
waveform_options=gr.WaveformOptions(waveform_color="#4CAF50"))
with gr.Row():
mashup_t4 = gr.Audio(label="🎡 Track 4 (optional)", type="filepath", sources=["upload"],
waveform_options=gr.WaveformOptions(waveform_color="#FF9800"))
mashup_t5 = gr.Audio(label="🎡 Track 5 (optional)", type="filepath", sources=["upload"],
waveform_options=gr.WaveformOptions(waveform_color="#9C27B0"))
mashup_t6 = gr.Audio(label="🎡 Track 6 (optional)", type="filepath", sources=["upload"],
waveform_options=gr.WaveformOptions(waveform_color="#F44336"))
with gr.Accordion("βš™οΈ Mashup Settings", open=True):
with gr.Row():
mashup_mode = gr.Radio(
["layered", "sequential", "blend", "alternating"],
value="layered",
label="Mashup Mode"
)
mashup_target_bpm = gr.Slider(0, 200, value=0, step=1,
label="Target BPM (0 = auto-detect average)")
mashup_crossfade = gr.Slider(500, 5000, value=2000, step=100,
label="Crossfade (ms)")
with gr.Row():
mashup_auto_bpm = gr.Checkbox(label="🎡 Auto BPM Match", value=True)
mashup_auto_key = gr.Checkbox(label="🎹 Auto Key Match", value=True)
gr.Markdown("#### πŸ”Š Track Volumes")
with gr.Row():
mashup_v1 = gr.Slider(0, 1, value=0.5, step=0.05, label="Track 1")
mashup_v2 = gr.Slider(0, 1, value=0.5, step=0.05, label="Track 2")
mashup_v3 = gr.Slider(0, 1, value=0.5, step=0.05, label="Track 3")
with gr.Row():
mashup_v4 = gr.Slider(0, 1, value=0.5, step=0.05, label="Track 4")
mashup_v5 = gr.Slider(0, 1, value=0.5, step=0.05, label="Track 5")
mashup_v6 = gr.Slider(0, 1, value=0.5, step=0.05, label="Track 6")
mashup_btn = gr.Button("πŸ”€ Create Mashup", variant="primary",
elem_classes=["process-btn"], size="lg")
with gr.Row():
mashup_output = gr.Audio(
label="πŸ”€ Mashup Output",
type="filepath",
interactive=False,
waveform_options=gr.WaveformOptions(
waveform_color="#8B5CF6",
waveform_progress_color="#A855F7",
),
)
mashup_status = gr.Markdown("*Upload at least 2 tracks and click Create Mashup*")
mashup_download = gr.File(label="⬇️ Download Mashup", interactive=False)
mashup_btn.click(
fn=create_mashup,
inputs=[mashup_t1, mashup_t2, mashup_t3, mashup_t4, mashup_t5, mashup_t6,
mashup_mode, mashup_target_bpm, mashup_crossfade,
mashup_auto_bpm, mashup_auto_key,
mashup_v1, mashup_v2, mashup_v3, mashup_v4, mashup_v5, mashup_v6],
outputs=[mashup_output, mashup_status]
).then(
fn=lambda x: x,
inputs=[mashup_output],
outputs=[mashup_download]
)
# ═══════════════════════════════════════════════════════════════════════
# TAB 4: STEM SEPARATOR
# ═══════════════════════════════════════════════════════════════════════
with gr.Tab("🎀 Stem Separator", id="stems"):
gr.Markdown("""
### AI-Powered Stem Separation
Split any song into its individual components: **Vocals**, **Drums**, **Bass**, and **Other/Melody**.
Each stem can be downloaded individually for further processing.
""")
with gr.Row():
with gr.Column(scale=1):
stem_input = gr.Audio(
label="πŸ“‚ Upload Song",
type="filepath",
sources=["upload"],
waveform_options=gr.WaveformOptions(waveform_color="#2D9CDB"),
)
stem_btn = gr.Button("🎀 Separate Stems", variant="primary",
elem_classes=["process-btn"], size="lg")
stem_status = gr.Markdown("*Upload a song and click Separate Stems*")
with gr.Column(scale=2):
with gr.Row():
stem_vocals = gr.Audio(
label="🎀 Vocals",
type="filepath",
interactive=False,
waveform_options=gr.WaveformOptions(waveform_color="#E91E63"),
elem_classes=["stem-vocals"]
)
stem_instrumental = gr.Audio(
label="🎸 Instrumental",
type="filepath",
interactive=False,
waveform_options=gr.WaveformOptions(waveform_color="#2196F3"),
elem_classes=["stem-instrumental"]
)
with gr.Row():
stem_drums = gr.Audio(
label="πŸ₯ Drums",
type="filepath",
interactive=False,
waveform_options=gr.WaveformOptions(waveform_color="#FF9800"),
elem_classes=["stem-drums"]
)
stem_bass = gr.Audio(
label="🎸 Bass",
type="filepath",
interactive=False,
waveform_options=gr.WaveformOptions(waveform_color="#9C27B0"),
elem_classes=["stem-bass"]
)
with gr.Row():
stem_other = gr.Audio(
label="🎹 Other / Melody",
type="filepath",
interactive=False,
waveform_options=gr.WaveformOptions(waveform_color="#4CAF50"),
elem_classes=["stem-other"]
)
stem_btn.click(
fn=separate_stems,
inputs=[stem_input],
outputs=[stem_vocals, stem_drums, stem_bass, stem_other, stem_instrumental, stem_status]
)
# ═══════════════════════════════════════════════════════════════════════
# TAB 5: AUDIO ANALYZER
# ═══════════════════════════════════════════════════════════════════════
with gr.Tab("πŸ“Š Audio Analyzer", id="analyzer"):
gr.Markdown("""
### Professional Audio Analysis
Analyze any track to discover its BPM, musical key, duration, sample rate,
and spectral characteristics. Essential for matching tracks in mashups.
""")
with gr.Row():
with gr.Column(scale=1):
analyze_input = gr.Audio(
label="πŸ“‚ Upload Track to Analyze",
type="filepath",
sources=["upload"],
waveform_options=gr.WaveformOptions(waveform_color="#2D9CDB"),
)
analyze_btn = gr.Button("πŸ” Analyze Track", variant="primary",
elem_classes=["process-btn"], size="lg")
with gr.Column(scale=2):
gr.Markdown("#### πŸ“Š Analysis Results")
with gr.Row():
an_bpm = gr.Textbox(label="BPM", interactive=False, elem_classes=["analysis-card"])
an_key = gr.Textbox(label="Musical Key", interactive=False, elem_classes=["analysis-card"])
an_duration = gr.Textbox(label="Duration", interactive=False, elem_classes=["analysis-card"])
an_sr = gr.Textbox(label="Sample Rate", interactive=False, elem_classes=["analysis-card"])
with gr.Row():
an_centroid = gr.Textbox(label="Spectral Centroid", interactive=False)
an_bandwidth = gr.Textbox(label="Spectral Bandwidth", interactive=False)
an_rolloff = gr.Textbox(label="Spectral Rolloff", interactive=False)
analyze_btn.click(
fn=analyze_audio,
inputs=[analyze_input],
outputs=[an_bpm, an_key, an_duration, an_sr, an_centroid, an_bandwidth, an_rolloff]
)
# ═══════════════════════════════════════════════════════════════════════
# TAB 6: BATCH PROCESSOR
# ═══════════════════════════════════════════════════════════════════════
with gr.Tab("πŸ“¦ Batch Processor", id="batch"):
gr.Markdown("""
### Batch Copyright-Free Processing
Process multiple songs at once with the same settings. Upload up to 10 songs
and they'll all be converted to copyright-free versions automatically.
""")
batch_files = gr.File(
label="πŸ“‚ Upload Multiple Audio Files",
file_types=[".mp3", ".wav", ".flac", ".ogg", ".m4a"],
file_count="multiple",
)
with gr.Row():
batch_intensity = gr.Radio(
["subtle", "medium", "strong", "maximum"],
value="medium",
label="Processing Intensity"
)
batch_pitch = gr.Slider(-3, 3, value=0.5, step=0.1, label="Pitch Shift")
batch_speed = gr.Slider(0.95, 1.08, value=1.03, step=0.005, label="Speed Change")
batch_btn = gr.Button("πŸ“¦ Process All Files", variant="primary",
elem_classes=["process-btn"], size="lg")
batch_status = gr.Markdown("*Upload files and click Process All*")
batch_output = gr.File(label="⬇️ Download Processed Files", file_count="multiple")
def batch_process(files, intensity, pitch, speed, progress=gr.Progress()):
if not files:
return None, "❌ No files uploaded"
outputs = []
total = len(files)
for i, f in enumerate(files):
filepath = f.name if hasattr(f, 'name') else str(f)
progress((i / total), desc=f"Processing {i+1}/{total}: {Path(filepath).name}")
try:
def prog_cb(val, desc):
overall = (i + val) / total
progress(overall, desc=f"[{i+1}/{total}] {desc}")
output_path = CopyrightFreeEngine.process(
filepath,
pitch_shift=pitch,
speed_change=speed,
intensity=intensity,
progress_callback=prog_cb
)
outputs.append(output_path)
except Exception as e:
continue
if outputs:
return outputs, f"βœ… **Batch complete!** Processed {len(outputs)}/{total} files successfully."
return None, "❌ No files could be processed"
batch_btn.click(
fn=batch_process,
inputs=[batch_files, batch_intensity, batch_pitch, batch_speed],
outputs=[batch_output, batch_status]
)
# ═══ FOOTER INFO ═══
gr.HTML("""
<div style="text-align: center; padding: 30px 0 10px 0; color: #556677; font-size: 0.85em; border-top: 1px solid rgba(45, 156, 219, 0.15); margin-top: 30px;">
<p><strong>πŸŽ›οΈ SoundForge Studio</strong> β€” Professional Audio Processing Suite</p>
<p>πŸ”’ Copyright-Free Processing β€’ 🎚️ Remix Engine β€’ πŸ”€ Mashup Creator β€’ 🎀 Stem Separator β€’ πŸ“Š Audio Analyzer β€’ πŸ“¦ Batch Processor</p>
<p style="margin-top: 8px; font-size: 0.8em;">
⚠️ <em>Disclaimer: This tool modifies audio fingerprints for fair use purposes.
Always ensure you have the right to use and modify the audio content.
Results may vary depending on platform-specific content detection systems.</em>
</p>
</div>
""")
# ─── Launch ───────────────────────────────────────────────────────────────────
if __name__ == "__main__":
demo.queue(max_size=10).launch(
server_name="0.0.0.0",
server_port=7860,
show_error=True,
share=False,
)