Dmash / app.py
Curlyblaze's picture
Create app.py
9891609 verified
import gradio as gr
import librosa
import numpy as np
import soundfile as sf
def analyze_and_mix(track_a, track_b, offset_seconds, volume_b):
if track_a is None or track_b is None:
return None, "Please upload both tracks."
# 1. Load the audio files
# sr=None ensures we keep the original sampling rate, but for mixing we need them uniform.
# We fix sr=44100 for standard audio quality.
sr = 44100
# Load Track A
y_a, sr = librosa.load(track_a, sr=sr)
# Load Track B
y_b, sr = librosa.load(track_b, sr=sr)
# 2. BPM Analysis (The "AI" part)
tempo_a, _ = librosa.beat.beat_track(y=y_a, sr=sr)
tempo_b, _ = librosa.beat.beat_track(y=y_b, sr=sr)
# Handle cases where tempo might be an array
if isinstance(tempo_a, np.ndarray): tempo_a = tempo_a[0]
if isinstance(tempo_b, np.ndarray): tempo_b = tempo_b[0]
# 3. Time Stretching (Professional Beatmatching)
# We stretch Track B to match Track A's tempo
stretch_factor = tempo_b / tempo_a
# Limit extreme stretching (if it tries to stretch more than 2x, it sounds bad)
if 0.5 < stretch_factor < 2.0:
# librosa's time_stretch works on spectrograms or audio buffers
# Note: This can be CPU intensive for long songs
y_b_stretched = librosa.effects.time_stretch(y_b, rate=stretch_factor)
else:
# If the BPM difference is too massive, don't stretch (fallback)
y_b_stretched = y_b
# 4. Volume Adjustment
y_b_stretched = y_b_stretched * volume_b
# 5. Handling the Offset (User Control)
# Convert seconds to samples
offset_samples = int(offset_seconds * sr)
# Calculate total length needed for the mashup
len_a = len(y_a)
len_b = len(y_b_stretched)
# If offset is positive, B starts after A starts
# If offset is negative, B starts before A (we will pad A)
if offset_samples >= 0:
total_length = max(len_a, offset_samples + len_b)
# Create silent canvas
mix = np.zeros(total_length)
# Add Track A
mix[:len_a] += y_a
# Add Track B at offset
mix[offset_samples : offset_samples + len_b] += y_b_stretched
else:
# Negative offset means B starts "before" A (or A starts later)
# We handle this by shifting A forward by the absolute value of offset
abs_offset = abs(offset_samples)
total_length = max(len_a + abs_offset, len_b)
mix = np.zeros(total_length)
# Add Track B at 0
mix[:len_b] += y_b_stretched
# Add Track A at the shifted point
mix[abs_offset : abs_offset + len_a] += y_a
# 6. Normalization (Prevents clipping/distortion)
max_val = np.max(np.abs(mix))
if max_val > 0:
mix = mix / max_val
# 7. Export
output_path = "mashup_result.mp3"
sf.write(output_path, mix, sr)
info_text = (f"Track A BPM: {tempo_a:.2f}\n"
f"Track B BPM: {tempo_b:.2f}\n"
f"Track B was stretched by factor: {stretch_factor:.2f} to match Track A.")
return output_path, info_text
# --- User Interface ---
with gr.Blocks(title="AI Professional DJ Mashup") as demo:
gr.Markdown("# 🎧 AI DJ Mashup Maker")
gr.Markdown("Upload two songs. The AI will automatically beat-match Song B to Song A. Use the slider to control when Song B comes in.")
with gr.Row():
with gr.Column():
input_a = gr.Audio(type="filepath", label="Track A (Base Track)")
input_b = gr.Audio(type="filepath", label="Track B (Overlay Track)")
with gr.Column():
offset_slider = gr.Slider(minimum=0, maximum=60, value=10, step=0.5, label="Mashup Start Offset (Seconds)")
vol_slider = gr.Slider(minimum=0.1, maximum=1.5, value=1.0, step=0.1, label="Track B Volume")
btn = gr.Button("Create Mashup", variant="primary")
with gr.Row():
output_audio = gr.Audio(label="Final Mashup")
output_info = gr.Textbox(label="Mix Details")
btn.click(fn=analyze_and_mix, inputs=[input_a, input_b, offset_slider, vol_slider], outputs=[output_audio, output_info])
if __name__ == "__main__":
demo.launch()