import gradio as gr import librosa import numpy as np import soundfile as sf def analyze_and_mix(track_a, track_b, offset_seconds, volume_b): if track_a is None or track_b is None: return None, "Please upload both tracks." # 1. Load the audio files # sr=None ensures we keep the original sampling rate, but for mixing we need them uniform. # We fix sr=44100 for standard audio quality. sr = 44100 # Load Track A y_a, sr = librosa.load(track_a, sr=sr) # Load Track B y_b, sr = librosa.load(track_b, sr=sr) # 2. BPM Analysis (The "AI" part) tempo_a, _ = librosa.beat.beat_track(y=y_a, sr=sr) tempo_b, _ = librosa.beat.beat_track(y=y_b, sr=sr) # Handle cases where tempo might be an array if isinstance(tempo_a, np.ndarray): tempo_a = tempo_a[0] if isinstance(tempo_b, np.ndarray): tempo_b = tempo_b[0] # 3. Time Stretching (Professional Beatmatching) # We stretch Track B to match Track A's tempo stretch_factor = tempo_b / tempo_a # Limit extreme stretching (if it tries to stretch more than 2x, it sounds bad) if 0.5 < stretch_factor < 2.0: # librosa's time_stretch works on spectrograms or audio buffers # Note: This can be CPU intensive for long songs y_b_stretched = librosa.effects.time_stretch(y_b, rate=stretch_factor) else: # If the BPM difference is too massive, don't stretch (fallback) y_b_stretched = y_b # 4. Volume Adjustment y_b_stretched = y_b_stretched * volume_b # 5. Handling the Offset (User Control) # Convert seconds to samples offset_samples = int(offset_seconds * sr) # Calculate total length needed for the mashup len_a = len(y_a) len_b = len(y_b_stretched) # If offset is positive, B starts after A starts # If offset is negative, B starts before A (we will pad A) if offset_samples >= 0: total_length = max(len_a, offset_samples + len_b) # Create silent canvas mix = np.zeros(total_length) # Add Track A mix[:len_a] += y_a # Add Track B at offset mix[offset_samples : offset_samples + len_b] += y_b_stretched else: # Negative offset means B starts "before" A (or A starts later) # We handle this by shifting A forward by the absolute value of offset abs_offset = abs(offset_samples) total_length = max(len_a + abs_offset, len_b) mix = np.zeros(total_length) # Add Track B at 0 mix[:len_b] += y_b_stretched # Add Track A at the shifted point mix[abs_offset : abs_offset + len_a] += y_a # 6. Normalization (Prevents clipping/distortion) max_val = np.max(np.abs(mix)) if max_val > 0: mix = mix / max_val # 7. Export output_path = "mashup_result.mp3" sf.write(output_path, mix, sr) info_text = (f"Track A BPM: {tempo_a:.2f}\n" f"Track B BPM: {tempo_b:.2f}\n" f"Track B was stretched by factor: {stretch_factor:.2f} to match Track A.") return output_path, info_text # --- User Interface --- with gr.Blocks(title="AI Professional DJ Mashup") as demo: gr.Markdown("# 🎧 AI DJ Mashup Maker") gr.Markdown("Upload two songs. The AI will automatically beat-match Song B to Song A. Use the slider to control when Song B comes in.") with gr.Row(): with gr.Column(): input_a = gr.Audio(type="filepath", label="Track A (Base Track)") input_b = gr.Audio(type="filepath", label="Track B (Overlay Track)") with gr.Column(): offset_slider = gr.Slider(minimum=0, maximum=60, value=10, step=0.5, label="Mashup Start Offset (Seconds)") vol_slider = gr.Slider(minimum=0.1, maximum=1.5, value=1.0, step=0.1, label="Track B Volume") btn = gr.Button("Create Mashup", variant="primary") with gr.Row(): output_audio = gr.Audio(label="Final Mashup") output_info = gr.Textbox(label="Mix Details") btn.click(fn=analyze_and_mix, inputs=[input_a, input_b, offset_slider, vol_slider], outputs=[output_audio, output_info]) if __name__ == "__main__": demo.launch()