| import gradio as gr |
| import librosa |
| import numpy as np |
| import soundfile as sf |
| import scipy.signal |
| import tempfile |
| import os |
|
|
| |
| |
| |
|
|
| def load_audio(path, target_sr=44100): |
| y, sr = librosa.load(path, sr=target_sr, mono=True) |
| return y.astype(np.float32), sr |
|
|
|
|
| def normalize(y, headroom_db=-1.0): |
| peak = np.max(np.abs(y)) |
| if peak < 1e-9: |
| return y |
| target = 10 ** (headroom_db / 20.0) |
| return y * (target / peak) |
|
|
|
|
| def anti_click_window(y, ramp_samples=256): |
| out = y.copy() |
| ramp = np.linspace(0, 1, ramp_samples).astype(np.float32) |
| out[:ramp_samples] *= ramp |
| out[-ramp_samples:] *= ramp[::-1] |
| return out |
|
|
|
|
| |
| |
| |
|
|
| def detect_bpm(y, sr): |
| tempo, _ = librosa.beat.beat_track(y=y, sr=sr) |
| return float(np.asarray(tempo).flatten()[0]) |
|
|
|
|
| |
| |
| |
|
|
| def detect_key(y, sr): |
| chroma = librosa.feature.chroma_cqt(y=y, sr=sr) |
| chroma_mean = chroma.mean(axis=1) |
| major_t = np.array([1,0,1,0,1,1,0,1,0,1,0,1], dtype=float) |
| minor_t = np.array([1,0,1,1,0,1,0,1,1,0,1,0], dtype=float) |
| maj_s = [np.dot(np.roll(major_t, i), chroma_mean) for i in range(12)] |
| min_s = [np.dot(np.roll(minor_t, i), chroma_mean) for i in range(12)] |
| if max(maj_s) >= max(min_s): |
| return int(np.argmax(maj_s)), "major" |
| return int(np.argmax(min_s)), "minor" |
|
|
|
|
| def semitones_to_shift(key_a, key_b): |
| diff = (key_b - key_a) % 12 |
| if diff > 6: |
| diff -= 12 |
| return diff |
|
|
|
|
| def pitch_shift(y, sr, n_steps): |
| if n_steps == 0: |
| return y |
| return librosa.effects.pitch_shift(y=y, sr=sr, n_steps=float(n_steps)) |
|
|
|
|
| |
| |
| |
|
|
| def find_best_transition_point(y_a, sr, transition_sec, hop=512): |
| """ |
| Score every frame of track A and pick the best mix-out point. |
| |
| Criteria: |
| β’ Low RMS energy β breakdown / intro of chorus = easier to mix out |
| β’ Low onset strength β not mid-drum-fill |
| β’ Beat-aligned (4-beat downbeat) β rhythmic correctness |
| β’ Position: last 35-70 % of track β natural DJ cue range |
| """ |
| rms = librosa.feature.rms(y=y_a, hop_length=hop)[0] |
| onset = librosa.onset.onset_strength(y=y_a, sr=sr, hop_length=hop) |
| total_f = len(rms) |
| trans_f = int(transition_sec * sr / hop) |
|
|
| |
| smooth_rms = np.convolve(rms, np.ones(30) / 30, mode='same') |
|
|
| |
| score = (1.0 / (smooth_rms + 1e-6)) * (1.0 / (onset + 1e-6)) |
|
|
| |
| w_start = int(total_f * 0.35) |
| w_end = max(w_start + 1, total_f - trans_f - int(4 * sr / hop)) |
| if w_end <= w_start: |
| w_start = max(0, total_f - trans_f - 20) |
| w_end = max(w_start + 1, total_f - trans_f) |
|
|
| |
| centre = int(total_f * 0.55) |
| sigma = total_f * 0.12 |
| pos_w = np.exp(-0.5 * ((np.arange(total_f) - centre) / sigma) ** 2) |
| score = score * pos_w |
| score[:w_start] = 0 |
| score[w_end:] = 0 |
|
|
| |
| _, beat_frames = librosa.beat.beat_track(y=y_a, sr=sr, hop_length=hop) |
| downbeats = beat_frames[::4] |
| candidates = [int(f) for f in downbeats if w_start <= f < w_end] |
|
|
| if candidates: |
| best_frame = max(candidates, key=lambda f: score[min(f, total_f - 1)]) |
| else: |
| best_frame = int(np.argmax(score)) |
|
|
| return int(best_frame) * hop |
|
|
|
|
| |
| |
| |
|
|
| def _fit_to_length(y, n): |
| """Trim or zero-pad array to exactly n samples.""" |
| n = int(n) |
| if len(y) >= n: |
| return y[:n].astype(np.float32) |
| return np.pad(y, (0, n - len(y))).astype(np.float32) |
|
|
|
|
| def build_gradual_stretch(y, sr, bpm_start, bpm_end, n_samples_target): |
| """ |
| Produce a segment of exactly n_samples_target samples whose playback |
| tempo linearly morphs from bpm_start β bpm_end. |
| |
| Method: overlap-add of short (80 ms) Hann-windowed chunks, each |
| independently time-stretched by the instantaneous rate. |
| """ |
| if abs(bpm_start - bpm_end) < 0.5 or bpm_end < 1: |
| rate = float(np.clip(bpm_start / max(bpm_end, 1), 0.5, 2.0)) |
| return _fit_to_length(librosa.effects.time_stretch(y, rate=rate), |
| n_samples_target) |
|
|
| WIN = int(sr * 0.08) |
| HOP = WIN // 2 |
| N = max(1, (len(y) - WIN) // HOP + 1) |
|
|
| |
| out_buf = np.zeros(int(n_samples_target * 2 + sr), dtype=np.float32) |
| write_pos = 0 |
|
|
| for i in range(N): |
| t = i / max(N - 1, 1) |
| bpm_now = bpm_start + (bpm_end - bpm_start) * t |
| rate = float(np.clip(bpm_start / max(bpm_now, 1), 0.5, 2.0)) |
|
|
| r_start = i * HOP |
| r_end = min(r_start + WIN, len(y)) |
| chunk = y[r_start:r_end] |
| if len(chunk) < 32: |
| continue |
|
|
| stretched = librosa.effects.time_stretch(chunk, rate=rate) |
|
|
| |
| env = np.hanning(len(stretched)).astype(np.float32) |
| stretched = (stretched * env).astype(np.float32) |
|
|
| w_end = write_pos + len(stretched) |
| if w_end > len(out_buf): |
| extra = w_end - len(out_buf) + sr |
| out_buf = np.concatenate([out_buf, np.zeros(extra, dtype=np.float32)]) |
| out_buf[write_pos:w_end] += stretched |
| write_pos += HOP |
|
|
| result = out_buf[:write_pos + WIN] |
| return _fit_to_length(result, n_samples_target) |
|
|
|
|
| |
| |
| |
|
|
| def lowpass_filter(y, sr, cutoff_hz=300, order=4): |
| nyq = sr / 2.0 |
| if cutoff_hz >= nyq: |
| return y |
| b, a = scipy.signal.butter(order, cutoff_hz / nyq, btype='low') |
| return scipy.signal.filtfilt(b, a, y).astype(np.float32) |
|
|
|
|
| def highpass_filter(y, sr, cutoff_hz=300, order=4): |
| nyq = sr / 2.0 |
| if cutoff_hz >= nyq: |
| return y |
| b, a = scipy.signal.butter(order, cutoff_hz / nyq, btype='high') |
| return scipy.signal.filtfilt(b, a, y).astype(np.float32) |
|
|
|
|
| def make_crossfade_curve(length, style="smooth"): |
| t = np.linspace(0, np.pi, length) |
| if style == "smooth": |
| fade_out = 0.5 * (1 + np.cos(t)) |
| fade_in = 0.5 * (1 - np.cos(t)) |
| else: |
| x = np.linspace(0, 1, length) |
| fade_out = np.exp(-4 * x); fade_out /= fade_out[0] |
| fade_in = 1.0 - np.exp(-4 * x) |
| return fade_out.astype(np.float32), fade_in.astype(np.float32) |
|
|
|
|
| def eq_crossfade(seg_a, seg_b, sr, fade_out, fade_in): |
| """Bass-split EQ crossfade: cut A's sub while bringing in B's sub.""" |
| bass_a = lowpass_filter(seg_a, sr) |
| mids_a = highpass_filter(seg_a, sr) |
| bass_b = lowpass_filter(seg_b, sr) |
| mids_b = highpass_filter(seg_b, sr) |
| return ((bass_a * fade_out + bass_b * fade_in) + |
| (mids_a * fade_out + mids_b * fade_in)).astype(np.float32) |
|
|
|
|
| |
| |
| |
|
|
| def automix(file_a_path, file_b_path, transition_sec=10, mix_style="Smooth Mix"): |
| SR = 44100 |
|
|
| |
| y_a, _ = load_audio(file_a_path, SR) |
| y_b, _ = load_audio(file_b_path, SR) |
| y_a = normalize(y_a) |
| y_b = normalize(y_b) |
|
|
| |
| bpm_a = detect_bpm(y_a, SR) |
| bpm_b = detect_bpm(y_b, SR) |
|
|
| |
| key_a, _ = detect_key(y_a, SR) |
| key_b, _ = detect_key(y_b, SR) |
| shift = semitones_to_shift(key_a, key_b) |
| if abs(shift) > 0: |
| y_b = pitch_shift(y_b, SR, -shift) |
| y_b = normalize(y_b) |
|
|
| |
| trans_samples = int(transition_sec * SR) |
| trans_start = find_best_transition_point(y_a, SR, transition_sec) |
| trans_start = int(np.clip(trans_start, 0, |
| max(0, len(y_a) - trans_samples - 1))) |
| trans_end = trans_start + trans_samples |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| seg_a_raw = y_a[trans_start:trans_end] |
| zone_a = build_gradual_stretch(seg_a_raw, SR, |
| bpm_start=bpm_a, |
| bpm_end=bpm_b, |
| n_samples_target=trans_samples) |
|
|
| if len(y_b) < trans_samples: |
| seg_b_raw = np.pad(y_b, (0, trans_samples - len(y_b))) |
| else: |
| seg_b_raw = y_b[:trans_samples] |
|
|
| zone_b = build_gradual_stretch(seg_b_raw, SR, |
| bpm_start=bpm_b, |
| bpm_end=bpm_a, |
| n_samples_target=trans_samples) |
|
|
| |
| tail_b = y_b[trans_samples:] if len(y_b) > trans_samples else np.array([], dtype=np.float32) |
|
|
| |
| style_key = "aggressive" if "Aggressive" in mix_style else "smooth" |
| fade_out, fade_in = make_crossfade_curve(trans_samples, style_key) |
| blend = eq_crossfade(zone_a, zone_b, SR, fade_out, fade_in) |
|
|
| |
| pre_a = y_a[:trans_start] |
| mix = np.concatenate([pre_a, blend, tail_b]) |
|
|
| |
| mix = anti_click_window(mix) |
| mix = normalize(mix, headroom_db=-0.5) |
|
|
| out_path = os.path.join(tempfile.gettempdir(), "automix_output.wav") |
| sf.write(out_path, mix, SR, subtype='PCM_16') |
|
|
| trans_time_sec = trans_start / SR |
| return out_path, bpm_a, bpm_b, key_a, key_b, shift, trans_time_sec |
|
|
|
|
| |
| |
| |
|
|
| KEY_NAMES = ["C","C#","D","D#","E","F","F#","G","G#","A","A#","B"] |
|
|
| CSS = """ |
| @import url('https://fonts.googleapis.com/css2?family=Syne:wght@400;700;800&family=DM+Mono:wght@300;400;500&display=swap'); |
| |
| :root { |
| --bg: #0a0a0f; |
| --surface: #111118; |
| --border: #1f1f2e; |
| --accent1: #ff3cac; |
| --accent2: #00f0ff; |
| --accent3: #7928ca; |
| --text: #e8e8f0; |
| --muted: #6b6b80; |
| --radius: 12px; |
| } |
| |
| body, .gradio-container { |
| background: var(--bg) !important; |
| font-family: 'DM Mono', monospace !important; |
| color: var(--text) !important; |
| } |
| |
| .app-header { |
| text-align: center; |
| padding: 48px 20px 32px; |
| position: relative; |
| } |
| .app-header::before { |
| content: ''; |
| position: absolute; |
| inset: 0; |
| background: radial-gradient(ellipse 80% 60% at 50% 0%, rgba(121,40,202,.18) 0%, transparent 70%); |
| pointer-events: none; |
| } |
| .app-title { |
| font-family: 'Syne', sans-serif !important; |
| font-size: clamp(2.2rem, 5vw, 3.8rem) !important; |
| font-weight: 800 !important; |
| letter-spacing: -1px; |
| background: linear-gradient(135deg, var(--accent1) 0%, var(--accent2) 55%, var(--accent3) 100%); |
| -webkit-background-clip: text; |
| -webkit-text-fill-color: transparent; |
| background-clip: text; |
| margin: 0 0 8px; |
| } |
| .app-sub { |
| color: var(--muted); |
| font-size: .85rem; |
| letter-spacing: .12em; |
| text-transform: uppercase; |
| } |
| .panel { |
| background: var(--surface); |
| border: 1px solid var(--border); |
| border-radius: var(--radius); |
| padding: 24px; |
| } |
| .mix-btn { |
| background: linear-gradient(135deg, var(--accent1), var(--accent3)) !important; |
| border: none !important; |
| color: #fff !important; |
| font-family: 'Syne', sans-serif !important; |
| font-weight: 700 !important; |
| font-size: 1rem !important; |
| letter-spacing: .08em !important; |
| padding: 14px 36px !important; |
| border-radius: 8px !important; |
| cursor: pointer !important; |
| transition: opacity .2s, transform .15s !important; |
| } |
| .mix-btn:hover { opacity: .88; transform: translateY(-1px); } |
| .stats-box { |
| background: #0d0d14 !important; |
| border: 1px solid var(--border) !important; |
| border-radius: 8px !important; |
| padding: 16px 20px !important; |
| font-size: .78rem !important; |
| color: var(--muted) !important; |
| line-height: 1.8 !important; |
| white-space: pre-wrap !important; |
| } |
| .gr-block, .gr-box { background: var(--surface) !important; border-color: var(--border) !important; } |
| label { color: var(--muted) !important; font-size: .78rem !important; letter-spacing: .1em !important; text-transform: uppercase !important; } |
| input[type=range] { accent-color: var(--accent1); } |
| select, .gr-dropdown { background: #0d0d14 !important; color: var(--text) !important; border-color: var(--border) !important; } |
| audio { width: 100%; border-radius: 8px; } |
| .divider { border: none; border-top: 1px solid var(--border); margin: 24px 0; } |
| """ |
|
|
|
|
| def run_mix(file_a, file_b, transition_sec, mix_style): |
| if file_a is None or file_b is None: |
| return None, "β Please upload both tracks." |
| try: |
| out_path, bpm_a, bpm_b, key_a, key_b, shift, trans_sec = automix( |
| file_a, file_b, |
| transition_sec=int(transition_sec), |
| mix_style=mix_style |
| ) |
| mm = int(trans_sec // 60) |
| ss = int(trans_sec % 60) |
| stats = ( |
| f"TRACK A β BPM: {bpm_a:.1f} KEY: {KEY_NAMES[key_a]}\n" |
| f"TRACK B β BPM: {bpm_b:.1f} KEY: {KEY_NAMES[key_b]}\n" |
| f"βββββββββββββββββββββββββββββββββββββββββ\n" |
| f"TRANSITION START : {mm:02d}:{ss:02d} (auto-detected downbeat)\n" |
| f"TRANSITION LENGTH : {int(transition_sec)} seconds\n" |
| f"MIX STYLE : {mix_style.upper()}\n" |
| f"BPM MORPH : {bpm_a:.1f} β {bpm_b:.1f} (gradual during transition)\n" |
| f" Track B reverts to {bpm_b:.1f} BPM after mix\n" |
| f"PITCH CORRECTION : {'+' if shift >= 0 else ''}{shift} semitones\n" |
| f"OUTPUT : 44100 Hz Β· 16-bit PCM WAV" |
| ) |
| return out_path, stats |
| except Exception as e: |
| import traceback |
| return None, f"Error: {str(e)}\n\n{traceback.format_exc()}" |
|
|
|
|
| with gr.Blocks(title="AI AutoMix DJ") as demo: |
|
|
| gr.HTML("<style>" + CSS + "</style>") |
|
|
| gr.HTML(""" |
| <div class="app-header"> |
| <div class="app-title">AI AutoMix DJ</div> |
| <div class="app-sub">Gradual BPM Morph Β· Smart Downbeat Detection Β· EQ Harmonic Mixing</div> |
| </div> |
| """) |
|
|
| with gr.Row(): |
| with gr.Column(scale=1): |
| gr.HTML('<div style="color:#6b6b80;font-size:.75rem;text-transform:uppercase;' |
| 'letter-spacing:.1em;margin-bottom:6px;">Track A β Outgoing</div>') |
| file_a = gr.Audio(label="", type="filepath", elem_classes=["panel"]) |
| with gr.Column(scale=1): |
| gr.HTML('<div style="color:#6b6b80;font-size:.75rem;text-transform:uppercase;' |
| 'letter-spacing:.1em;margin-bottom:6px;">Track B β Incoming</div>') |
| file_b = gr.Audio(label="", type="filepath", elem_classes=["panel"]) |
|
|
| gr.HTML('<hr class="divider"/>') |
|
|
| with gr.Row(): |
| with gr.Column(scale=2): |
| transition_slider = gr.Slider(minimum=5, maximum=15, value=10, step=1, |
| label="Transition Duration (seconds)") |
| with gr.Column(scale=2): |
| mix_style = gr.Dropdown(choices=["Smooth Mix", "Aggressive Mix"], |
| value="Smooth Mix", label="Mix Style") |
| with gr.Column(scale=1): |
| mix_btn = gr.Button("βΆ Mix Now", elem_classes=["mix-btn"]) |
|
|
| gr.HTML('<hr class="divider"/>') |
|
|
| with gr.Row(): |
| with gr.Column(scale=3): |
| gr.HTML('<div style="color:#6b6b80;font-size:.75rem;text-transform:uppercase;' |
| 'letter-spacing:.1em;margin-bottom:8px;">Output Mix</div>') |
| output_audio = gr.Audio(label="", type="filepath") |
| with gr.Column(scale=2): |
| gr.HTML('<div style="color:#6b6b80;font-size:.75rem;text-transform:uppercase;' |
| 'letter-spacing:.1em;margin-bottom:8px;">Mix Analysis</div>') |
| stats_box = gr.Textbox(value="Waiting for mixβ¦", label="", |
| lines=9, max_lines=12, elem_classes=["stats-box"]) |
|
|
| mix_btn.click(fn=run_mix, |
| inputs=[file_a, file_b, transition_slider, mix_style], |
| outputs=[output_audio, stats_box]) |
|
|
| gr.HTML(""" |
| <div style="text-align:center;margin-top:32px;color:#2a2a3a;font-size:.72rem;letter-spacing:.12em;"> |
| AI AUTOMIX DJ Β· GRADUAL BPM MORPH ENGINE Β· LIBROSA Β· GRADIO Β· SCIPY |
| </div> |
| """) |
|
|
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|