import gradio as gr
import librosa
import numpy as np
import soundfile as sf
import scipy.signal
import tempfile
import os

# ─────────────────────────────────────────────
#  AUDIO I/O & BASIC PROCESSING
# ─────────────────────────────────────────────

def load_audio(path, target_sr=44100):
    y, sr = librosa.load(path, sr=target_sr, mono=True)
    return y.astype(np.float32), sr


def normalize(y, headroom_db=-1.0):
    peak = np.max(np.abs(y))
    if peak < 1e-9:
        return y
    target = 10 ** (headroom_db / 20.0)
    return y * (target / peak)


def anti_click_window(y, ramp_samples=256):
    out = y.copy()
    ramp = np.linspace(0, 1, ramp_samples).astype(np.float32)
    out[:ramp_samples]  *= ramp
    out[-ramp_samples:] *= ramp[::-1]
    return out


# ─────────────────────────────────────────────
#  BPM DETECTION
# ─────────────────────────────────────────────

def detect_bpm(y, sr):
    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
    return float(np.asarray(tempo).flatten()[0])


# ─────────────────────────────────────────────
#  KEY DETECTION
# ─────────────────────────────────────────────

def detect_key(y, sr):
    chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
    chroma_mean = chroma.mean(axis=1)
    major_t = np.array([1,0,1,0,1,1,0,1,0,1,0,1], dtype=float)
    minor_t = np.array([1,0,1,1,0,1,0,1,1,0,1,0], dtype=float)
    maj_s = [np.dot(np.roll(major_t, i), chroma_mean) for i in range(12)]
    min_s = [np.dot(np.roll(minor_t, i), chroma_mean) for i in range(12)]
    if max(maj_s) >= max(min_s):
        return int(np.argmax(maj_s)), "major"
    return int(np.argmax(min_s)), "minor"


def semitones_to_shift(key_a, key_b):
    diff = (key_b - key_a) % 12
    if diff > 6:
        diff -= 12
    return diff


def pitch_shift(y, sr, n_steps):
    if n_steps == 0:
        return y
    return librosa.effects.pitch_shift(y=y, sr=sr, n_steps=float(n_steps))


# ─────────────────────────────────────────────
#  SMART TRANSITION POINT (downbeat + breakdown)
# ─────────────────────────────────────────────

def find_best_transition_point(y_a, sr, transition_sec, hop=512):
    """
    Score every frame of track A and pick the best mix-out point.

    Criteria:
      • Low RMS energy  →  breakdown / intro of chorus = easier to mix out
      • Low onset strength  →  not mid-drum-fill
      • Beat-aligned (4-beat downbeat)  →  rhythmic correctness
      • Position: last 35-70 % of track  →  natural DJ cue range
    """
    rms    = librosa.feature.rms(y=y_a, hop_length=hop)[0]
    onset  = librosa.onset.onset_strength(y=y_a, sr=sr, hop_length=hop)
    total_f = len(rms)
    trans_f = int(transition_sec * sr / hop)

    # Smooth energy over ~1.5 s to detect macro breakdowns
    smooth_rms = np.convolve(rms, np.ones(30) / 30, mode='same')

    # Score: favour low energy AND low onset density
    score = (1.0 / (smooth_rms + 1e-6)) * (1.0 / (onset + 1e-6))

    # Position window
    w_start = int(total_f * 0.35)
    w_end   = max(w_start + 1, total_f - trans_f - int(4 * sr / hop))
    if w_end <= w_start:
        w_start = max(0, total_f - trans_f - 20)
        w_end   = max(w_start + 1, total_f - trans_f)

    # Gaussian position weight centred at 55 % of track
    centre = int(total_f * 0.55)
    sigma  = total_f * 0.12
    pos_w  = np.exp(-0.5 * ((np.arange(total_f) - centre) / sigma) ** 2)
    score  = score * pos_w
    score[:w_start] = 0
    score[w_end:]   = 0

    # Restrict to 4-beat downbeat positions
    _, beat_frames = librosa.beat.beat_track(y=y_a, sr=sr, hop_length=hop)
    downbeats = beat_frames[::4]
    candidates = [int(f) for f in downbeats if w_start <= f < w_end]

    if candidates:
        best_frame = max(candidates, key=lambda f: score[min(f, total_f - 1)])
    else:
        best_frame = int(np.argmax(score))

    return int(best_frame) * hop


# ─────────────────────────────────────────────
#  GRADUAL BPM MORPH ENGINE
# ─────────────────────────────────────────────

def _fit_to_length(y, n):
    """Trim or zero-pad array to exactly n samples."""
    n = int(n)
    if len(y) >= n:
        return y[:n].astype(np.float32)
    return np.pad(y, (0, n - len(y))).astype(np.float32)


def build_gradual_stretch(y, sr, bpm_start, bpm_end, n_samples_target):
    """
    Produce a segment of exactly n_samples_target samples whose playback
    tempo linearly morphs from bpm_start → bpm_end.

    Method: overlap-add of short (80 ms) Hann-windowed chunks, each
    independently time-stretched by the instantaneous rate.
    """
    if abs(bpm_start - bpm_end) < 0.5 or bpm_end < 1:
        rate = float(np.clip(bpm_start / max(bpm_end, 1), 0.5, 2.0))
        return _fit_to_length(librosa.effects.time_stretch(y, rate=rate),
                               n_samples_target)

    WIN = int(sr * 0.08)   # 80 ms analysis window
    HOP = WIN // 2
    N   = max(1, (len(y) - WIN) // HOP + 1)

    # Output buffer sized generously
    out_buf   = np.zeros(int(n_samples_target * 2 + sr), dtype=np.float32)
    write_pos = 0

    for i in range(N):
        t       = i / max(N - 1, 1)                          # 0 → 1
        bpm_now = bpm_start + (bpm_end - bpm_start) * t
        rate    = float(np.clip(bpm_start / max(bpm_now, 1), 0.5, 2.0))

        r_start = i * HOP
        r_end   = min(r_start + WIN, len(y))
        chunk   = y[r_start:r_end]
        if len(chunk) < 32:
            continue

        stretched = librosa.effects.time_stretch(chunk, rate=rate)

        # Hann envelope for smooth overlap-add (avoids clicks between chunks)
        env       = np.hanning(len(stretched)).astype(np.float32)
        stretched = (stretched * env).astype(np.float32)

        w_end = write_pos + len(stretched)
        if w_end > len(out_buf):
            extra   = w_end - len(out_buf) + sr
            out_buf = np.concatenate([out_buf, np.zeros(extra, dtype=np.float32)])
        out_buf[write_pos:w_end] += stretched
        write_pos += HOP          # advance by HOP (overlap-add)

    result = out_buf[:write_pos + WIN]
    return _fit_to_length(result, n_samples_target)


# ─────────────────────────────────────────────
#  EQ / FILTERS
# ─────────────────────────────────────────────

def lowpass_filter(y, sr, cutoff_hz=300, order=4):
    nyq = sr / 2.0
    if cutoff_hz >= nyq:
        return y
    b, a = scipy.signal.butter(order, cutoff_hz / nyq, btype='low')
    return scipy.signal.filtfilt(b, a, y).astype(np.float32)


def highpass_filter(y, sr, cutoff_hz=300, order=4):
    nyq = sr / 2.0
    if cutoff_hz >= nyq:
        return y
    b, a = scipy.signal.butter(order, cutoff_hz / nyq, btype='high')
    return scipy.signal.filtfilt(b, a, y).astype(np.float32)


def make_crossfade_curve(length, style="smooth"):
    t = np.linspace(0, np.pi, length)
    if style == "smooth":
        fade_out = 0.5 * (1 + np.cos(t))
        fade_in  = 0.5 * (1 - np.cos(t))
    else:
        x        = np.linspace(0, 1, length)
        fade_out = np.exp(-4 * x); fade_out /= fade_out[0]
        fade_in  = 1.0 - np.exp(-4 * x)
    return fade_out.astype(np.float32), fade_in.astype(np.float32)


def eq_crossfade(seg_a, seg_b, sr, fade_out, fade_in):
    """Bass-split EQ crossfade: cut A's sub while bringing in B's sub."""
    bass_a = lowpass_filter(seg_a, sr)
    mids_a = highpass_filter(seg_a, sr)
    bass_b = lowpass_filter(seg_b, sr)
    mids_b = highpass_filter(seg_b, sr)
    return ((bass_a * fade_out + bass_b * fade_in) +
            (mids_a * fade_out + mids_b * fade_in)).astype(np.float32)


# ─────────────────────────────────────────────
#  MAIN MIX ENGINE
# ─────────────────────────────────────────────

def automix(file_a_path, file_b_path, transition_sec=10, mix_style="Smooth Mix"):
    SR = 44100

    # 1 ── Load & normalize ───────────────────────────────────────────────
    y_a, _ = load_audio(file_a_path, SR)
    y_b, _ = load_audio(file_b_path, SR)
    y_a = normalize(y_a)
    y_b = normalize(y_b)

    # 2 ── BPM detection ──────────────────────────────────────────────────
    bpm_a = detect_bpm(y_a, SR)
    bpm_b = detect_bpm(y_b, SR)

    # 3 ── Key / pitch correction on track B ──────────────────────────────
    key_a, _ = detect_key(y_a, SR)
    key_b, _ = detect_key(y_b, SR)
    shift     = semitones_to_shift(key_a, key_b)
    if abs(shift) > 0:
        y_b = pitch_shift(y_b, SR, -shift)
        y_b = normalize(y_b)

    # 4 ── Smart transition point (downbeat in a breakdown) ────────────────
    trans_samples = int(transition_sec * SR)
    trans_start   = find_best_transition_point(y_a, SR, transition_sec)
    trans_start   = int(np.clip(trans_start, 0,
                                max(0, len(y_a) - trans_samples - 1)))
    trans_end     = trans_start + trans_samples

    # 5 ── Gradual BPM morph segments ─────────────────────────────────────
    #
    #  zone_a : Track A's outro — tempo morphs FROM bpm_a TO bpm_b
    #           so it meets track B at the same speed mid-transition
    #
    #  zone_b : Track B's intro — tempo morphs FROM bpm_b TO bpm_a
    #           (mirror of zone_a, so both tracks align at the midpoint)
    #           After the transition, track B continues at its natural bpm_b
    #           (we just play the unmodified tail)
    #
    seg_a_raw = y_a[trans_start:trans_end]
    zone_a    = build_gradual_stretch(seg_a_raw, SR,
                                      bpm_start=bpm_a,
                                      bpm_end=bpm_b,
                                      n_samples_target=trans_samples)

    if len(y_b) < trans_samples:
        seg_b_raw = np.pad(y_b, (0, trans_samples - len(y_b)))
    else:
        seg_b_raw = y_b[:trans_samples]

    zone_b = build_gradual_stretch(seg_b_raw, SR,
                                   bpm_start=bpm_b,
                                   bpm_end=bpm_a,
                                   n_samples_target=trans_samples)

    # Track B tail plays at its ORIGINAL natural tempo (no stretching)
    tail_b = y_b[trans_samples:] if len(y_b) > trans_samples else np.array([], dtype=np.float32)

    # 6 ── EQ crossfade blend ─────────────────────────────────────────────
    style_key         = "aggressive" if "Aggressive" in mix_style else "smooth"
    fade_out, fade_in = make_crossfade_curve(trans_samples, style_key)
    blend             = eq_crossfade(zone_a, zone_b, SR, fade_out, fade_in)

    # 7 ── Assemble final mix ──────────────────────────────────────────────
    pre_a = y_a[:trans_start]
    mix   = np.concatenate([pre_a, blend, tail_b])

    # 8 ── Polish ─────────────────────────────────────────────────────────
    mix = anti_click_window(mix)
    mix = normalize(mix, headroom_db=-0.5)

    out_path = os.path.join(tempfile.gettempdir(), "automix_output.wav")
    sf.write(out_path, mix, SR, subtype='PCM_16')

    trans_time_sec = trans_start / SR
    return out_path, bpm_a, bpm_b, key_a, key_b, shift, trans_time_sec


# ─────────────────────────────────────────────
#  GRADIO UI
# ─────────────────────────────────────────────

KEY_NAMES = ["C","C#","D","D#","E","F","F#","G","G#","A","A#","B"]

CSS = """
@import url('https://fonts.googleapis.com/css2?family=Syne:wght@400;700;800&family=DM+Mono:wght@300;400;500&display=swap');

:root {
  --bg:       #0a0a0f;
  --surface:  #111118;
  --border:   #1f1f2e;
  --accent1:  #ff3cac;
  --accent2:  #00f0ff;
  --accent3:  #7928ca;
  --text:     #e8e8f0;
  --muted:    #6b6b80;
  --radius:   12px;
}

body, .gradio-container {
  background: var(--bg) !important;
  font-family: 'DM Mono', monospace !important;
  color: var(--text) !important;
}

.app-header {
  text-align: center;
  padding: 48px 20px 32px;
  position: relative;
}
.app-header::before {
  content: '';
  position: absolute;
  inset: 0;
  background: radial-gradient(ellipse 80% 60% at 50% 0%, rgba(121,40,202,.18) 0%, transparent 70%);
  pointer-events: none;
}
.app-title {
  font-family: 'Syne', sans-serif !important;
  font-size: clamp(2.2rem, 5vw, 3.8rem) !important;
  font-weight: 800 !important;
  letter-spacing: -1px;
  background: linear-gradient(135deg, var(--accent1) 0%, var(--accent2) 55%, var(--accent3) 100%);
  -webkit-background-clip: text;
  -webkit-text-fill-color: transparent;
  background-clip: text;
  margin: 0 0 8px;
}
.app-sub {
  color: var(--muted);
  font-size: .85rem;
  letter-spacing: .12em;
  text-transform: uppercase;
}
.panel {
  background: var(--surface);
  border: 1px solid var(--border);
  border-radius: var(--radius);
  padding: 24px;
}
.mix-btn {
  background: linear-gradient(135deg, var(--accent1), var(--accent3)) !important;
  border: none !important;
  color: #fff !important;
  font-family: 'Syne', sans-serif !important;
  font-weight: 700 !important;
  font-size: 1rem !important;
  letter-spacing: .08em !important;
  padding: 14px 36px !important;
  border-radius: 8px !important;
  cursor: pointer !important;
  transition: opacity .2s, transform .15s !important;
}
.mix-btn:hover { opacity: .88; transform: translateY(-1px); }
.stats-box {
  background: #0d0d14 !important;
  border: 1px solid var(--border) !important;
  border-radius: 8px !important;
  padding: 16px 20px !important;
  font-size: .78rem !important;
  color: var(--muted) !important;
  line-height: 1.8 !important;
  white-space: pre-wrap !important;
}
.gr-block, .gr-box { background: var(--surface) !important; border-color: var(--border) !important; }
label { color: var(--muted) !important; font-size: .78rem !important; letter-spacing: .1em !important; text-transform: uppercase !important; }
input[type=range] { accent-color: var(--accent1); }
select, .gr-dropdown { background: #0d0d14 !important; color: var(--text) !important; border-color: var(--border) !important; }
audio { width: 100%; border-radius: 8px; }
.divider { border: none; border-top: 1px solid var(--border); margin: 24px 0; }
"""


def run_mix(file_a, file_b, transition_sec, mix_style):
    if file_a is None or file_b is None:
        return None, "⚠  Please upload both tracks."
    try:
        out_path, bpm_a, bpm_b, key_a, key_b, shift, trans_sec = automix(
            file_a, file_b,
            transition_sec=int(transition_sec),
            mix_style=mix_style
        )
        mm = int(trans_sec // 60)
        ss = int(trans_sec % 60)
        stats = (
            f"TRACK A  →  BPM: {bpm_a:.1f}   KEY: {KEY_NAMES[key_a]}\n"
            f"TRACK B  →  BPM: {bpm_b:.1f}   KEY: {KEY_NAMES[key_b]}\n"
            f"─────────────────────────────────────────\n"
            f"TRANSITION START  : {mm:02d}:{ss:02d}  (auto-detected downbeat)\n"
            f"TRANSITION LENGTH : {int(transition_sec)} seconds\n"
            f"MIX STYLE         : {mix_style.upper()}\n"
            f"BPM MORPH         : {bpm_a:.1f} ↔ {bpm_b:.1f} (gradual during transition)\n"
            f"                    Track B reverts to {bpm_b:.1f} BPM after mix\n"
            f"PITCH CORRECTION  : {'+' if shift >= 0 else ''}{shift} semitones\n"
            f"OUTPUT            : 44100 Hz · 16-bit PCM WAV"
        )
        return out_path, stats
    except Exception as e:
        import traceback
        return None, f"Error: {str(e)}\n\n{traceback.format_exc()}"


with gr.Blocks(title="AI AutoMix DJ") as demo:

    gr.HTML("<style>" + CSS + "</style>")

    gr.HTML("""
    <div class="app-header">
      <div class="app-title">AI AutoMix DJ</div>
      <div class="app-sub">Gradual BPM Morph · Smart Downbeat Detection · EQ Harmonic Mixing</div>
    </div>
    """)

    with gr.Row():
        with gr.Column(scale=1):
            gr.HTML('<div style="color:#6b6b80;font-size:.75rem;text-transform:uppercase;'
                    'letter-spacing:.1em;margin-bottom:6px;">Track A — Outgoing</div>')
            file_a = gr.Audio(label="", type="filepath", elem_classes=["panel"])
        with gr.Column(scale=1):
            gr.HTML('<div style="color:#6b6b80;font-size:.75rem;text-transform:uppercase;'
                    'letter-spacing:.1em;margin-bottom:6px;">Track B — Incoming</div>')
            file_b = gr.Audio(label="", type="filepath", elem_classes=["panel"])

    gr.HTML('<hr class="divider"/>')

    with gr.Row():
        with gr.Column(scale=2):
            transition_slider = gr.Slider(minimum=5, maximum=15, value=10, step=1,
                                          label="Transition Duration (seconds)")
        with gr.Column(scale=2):
            mix_style = gr.Dropdown(choices=["Smooth Mix", "Aggressive Mix"],
                                    value="Smooth Mix", label="Mix Style")
        with gr.Column(scale=1):
            mix_btn = gr.Button("▶  Mix Now", elem_classes=["mix-btn"])

    gr.HTML('<hr class="divider"/>')

    with gr.Row():
        with gr.Column(scale=3):
            gr.HTML('<div style="color:#6b6b80;font-size:.75rem;text-transform:uppercase;'
                    'letter-spacing:.1em;margin-bottom:8px;">Output Mix</div>')
            output_audio = gr.Audio(label="", type="filepath")
        with gr.Column(scale=2):
            gr.HTML('<div style="color:#6b6b80;font-size:.75rem;text-transform:uppercase;'
                    'letter-spacing:.1em;margin-bottom:8px;">Mix Analysis</div>')
            stats_box = gr.Textbox(value="Waiting for mix…", label="",
                                   lines=9, max_lines=12, elem_classes=["stats-box"])

    mix_btn.click(fn=run_mix,
                  inputs=[file_a, file_b, transition_slider, mix_style],
                  outputs=[output_audio, stats_box])

    gr.HTML("""
    <div style="text-align:center;margin-top:32px;color:#2a2a3a;font-size:.72rem;letter-spacing:.12em;">
      AI AUTOMIX DJ · GRADUAL BPM MORPH ENGINE · LIBROSA · GRADIO · SCIPY
    </div>
    """)


if __name__ == "__main__":
    demo.launch()