Spaces:

Diggz10
/

bpm

Build error

App Files Files Community

Diggz10 commited on Aug 22, 2025

Commit

7f608b7

verified ·

1 Parent(s): 1ba01a7

Update app.py

Browse files

Files changed (1) hide show

app.py +126 -250

app.py CHANGED Viewed

@@ -1,303 +1,187 @@
-import os
-import math
-import tempfile
-import warnings
 from typing import Dict, List, Tuple
 import gradio as gr
 import numpy as np
 import pandas as pd
 import librosa
 warnings.filterwarnings("ignore", category=UserWarning)
 warnings.filterwarnings("ignore", category=FutureWarning)
-# =========================================================
-# Key detection profiles (two well-known sets) for voting
-# =========================================================
-# Krumhansl-Schmuckler (Harte)
-KS_MAJOR = np.array([6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88], dtype=float)
-KS_MINOR = np.array([6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17], dtype=float)
-# Temperley / Kostka–Payne (scaled roughly to similar ranges)
-TP_MAJOR = np.array([0.748, 0.060, 0.488, 0.082, 0.670, 0.460, 0.096, 0.715, 0.104, 0.366, 0.057, 0.400], dtype=float) * 10
-TP_MINOR = np.array([0.712, 0.084, 0.474, 0.618, 0.049, 0.460, 0.105, 0.670, 0.461, 0.044, 0.373, 0.330], dtype=float) * 10
-PITCHES_FLAT = ['C', 'Db', 'D', 'Eb', 'E', 'F', 'Gb', 'G', 'Ab', 'A', 'Bb', 'B']
 CAMELOT_MAJOR = {'B':'1B','F#':'2B','Gb':'2B','Db':'3B','C#':'3B','Ab':'4B','Eb':'5B','Bb':'6B','F':'7B','C':'8B','G':'9B','D':'10B','A':'11B','E':'12B'}
 CAMELOT_MINOR = {'Ab':'1A','G#':'1A','Eb':'2A','D#':'2A','Bb':'3A','A#':'3A','F':'4A','C':'5A','G':'6A','D':'7A','A':'8A','E':'9A','B':'10A','F#':'11A','Gb':'11A','Db':'12A','C#':'12A'}
-# =========================================================
-# Utility helpers
-# =========================================================
-def roll(arr: np.ndarray, steps: int) -> np.ndarray:
-    return np.roll(arr, steps)
-def tonic_from_index(idx: int) -> str:
-    return PITCHES_FLAT[int(idx) % 12]
-def camelot(tonic: str, mode: str) -> str:
-    return (CAMELOT_MAJOR if mode == "major" else CAMELOT_MINOR).get(tonic, "")
-def normalize(v: np.ndarray) -> np.ndarray:
-    n = np.linalg.norm(v) + 1e-12
-    return v / n
-# =========================================================
-# Improved BPM estimation (multi-method consensus)
-# =========================================================
-def pick_best_bpm(y: np.ndarray, sr: int, hop: int = 512) -> Tuple[float, float]:
     """
-    Returns (bpm, confidence[0..1]).
-    Strategy:
-      1) Onset envelope -> autocorrelation peak
-      2) Tempogram peak
-      3) librosa beat tracker tempo
-    Then consensus + half/double correction scored against onset envelope.
     """
     onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop, aggregate=np.median)
-    # 1) Autocorr peak
-    ac = librosa.autocorrelate(onset_env, max_size=onset_env.size // 2)
-    # Convert lags to BPM (exclude lag 0)
     lags = np.arange(1, len(ac))
-    bpms_ac = 60.0 * sr / (lags * hop)
-    # Keep BPM range plausible
-    mask = (bpms_ac >= 60) & (bpms_ac <= 200)
-    bpms_ac = bpms_ac[mask]
-    ac_vals = ac[1:][mask]
     bpm_ac = float(bpms_ac[np.argmax(ac_vals)]) if len(bpms_ac) else 0.0
-    conf_ac = float(np.max(ac_vals) / (np.sum(ac_vals) + 1e-12)) if len(ac_vals) else 0.0
-    # 2) Tempogram peak
-    tg = librosa.feature.tempogram(onset_envelope=onset_env, sr=sr, hop_length=hop)
     tempi = librosa.beat.tempo(onset_envelope=onset_env, sr=sr, hop_length=hop, aggregate=None)
-    # robust choice: most frequent tempo
     if tempi is not None and len(tempi):
-        # histogram in 60..200
-        t = tempi[(tempi >= 60) & (tempi <= 200)]
         if len(t):
-            hist, edges = np.histogram(t, bins=np.arange(60, 202, 1))
-            bpm_tg = float(60 + np.argmax(hist))
-            conf_tg = float(np.max(hist) / (np.sum(hist) + 1e-12))
-        else:
-            bpm_tg, conf_tg = 0.0, 0.0
-    else:
-        bpm_tg, conf_tg = 0.0, 0.0
-    # 3) Beat tracker tempo
     tempo_bt, beats = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr, hop_length=hop)
-    bpm_bt = float(tempo_bt)
-    conf_bt = 0.5 if beats is not None and len(beats) > 8 else 0.1
-    candidates = [bpm for bpm in [bpm_ac, bpm_tg, bpm_bt] if 30 < bpm < 240]
-    if not candidates:
-        return max(bpm_bt, 0.0), 0.0
-    # Generate half/double variants and score them by alignment with onsets
-    expanded = []
-    for bpm in candidates:
-        expanded += [bpm/2, bpm, bpm*2]
-    expanded = [b for b in expanded if 60 <= b <= 200]
-    def alignment_score(bpm_val: float) -> float:
-        # Predict beat locations and sum onset strengths near beats
-        period = (60.0 / bpm_val) * sr / hop  # beats in frames
-        # Start at the strongest onset frame
-        start = int(np.argmax(onset_env))
-        beat_frames = np.arange(start, len(onset_env), period)
-        beat_frames = np.round(beat_frames).astype(int)
-        beat_frames = beat_frames[beat_frames < len(onset_env)]
-        # window around each beat
-        s = 0.0
-        for f in beat_frames:
-            lo = max(0, f-2)
-            hi = min(len(onset_env), f+3)
-            s += float(np.max(onset_env[lo:hi]))
-        return s / (len(beat_frames) + 1e-12)
-    scored = [(b, alignment_score(b)) for b in expanded]
-    best_bpm, best_score = max(scored, key=lambda x: x[1])
-    # Confidence combines alignment and agreement among methods
-    agree = np.mean([min(best_bpm, c)/max(best_bpm, c) for c in candidates])  # 1 if identical
-    confidence = float(0.7 * (best_score / (np.max(onset_env) + 1e-12)) + 0.3 * agree)
-    confidence = float(np.clip(confidence, 0.0, 1.0))
-    return best_bpm, confidence
-# =========================================================
-# Improved Key estimation
-# =========================================================
-def beat_sync_chroma(y: np.ndarray, sr: int, hop: int = 512) -> np.ndarray:
-    # Harmonic component only to suppress drums
-    y_harm, _ = librosa.effects.hpss(y)
-    # Tuned, high-resolution CQT chroma
-    chroma_cqt = librosa.feature.chroma_cqt(
-        y=y_harm, sr=sr, hop_length=hop, bins_per_octave=36, window='hann', cqt_mode='full'
-    )
-    # Timbre-robust CENS chroma
-    chroma_cens = librosa.feature.chroma_cens(y=y_harm, sr=sr, hop_length=hop)
-    # Weighted sum (CQT carries pitch detail, CENS stabilizes)
-    chroma = normalize(0.65 * chroma_cqt + 0.35 * chroma_cens)
-    # Beat-synchronize to reduce local key shifts/percussive bias
-    tempo, beats = librosa.beat.beat_track(y=y_harm, sr=sr, hop_length=hop)
-    if beats is not None and len(beats) > 2:
-        chroma_sync = librosa.util.sync(chroma, beats, aggregate=np.mean)
-    else:
-        chroma_sync = chroma
-    # Normalize columns and average to pitch-class profile
-    chroma_sync = chroma_sync / (np.linalg.norm(chroma_sync, axis=0, keepdims=True) + 1e-12)
-    return np.mean(chroma_sync, axis=1)
-def score_key(pcp: np.ndarray, profiles: Tuple[np.ndarray, np.ndarray]) -> Tuple[str, str, float]:
-    maj_prof, min_prof = profiles
-    pcp = normalize(pcp)
-    best_score = -1.0
-    best_mode = "major"
-    best_tonic = 0
-    for i in range(12):
-        s_maj = float(np.dot(pcp, normalize(roll(maj_prof, -i))))
-        s_min = float(np.dot(pcp, normalize(roll(min_prof, -i))))
-        if s_maj > best_score:
-            best_score, best_mode, best_tonic = s_maj, "major", i
-        if s_min > best_score:
-            best_score, best_mode, best_tonic = s_min, "minor", i
-    # confidence = margin between best and runner-up
     all_scores = []
     for i in range(12):
-        all_scores.append(float(np.dot(pcp, normalize(roll(maj_prof, -i)))))
-        all_scores.append(float(np.dot(pcp, normalize(roll(min_prof, -i)))))
-    all_scores = np.array(all_scores, dtype=float)
-    margin = (np.sort(all_scores)[-1] - np.sort(all_scores)[-2]) / (np.max(all_scores) + 1e-12)
     confidence = float(np.clip(margin, 0.0, 1.0))
-    tonic = tonic_from_index(best_tonic)
-    key_name = f"{tonic} {best_mode}"
-    return key_name, best_mode, confidence, best_tonic
-def estimate_key(y: np.ndarray, sr: int) -> Tuple[str, str, float, int]:
-    """
-    Dual-profile voting: Krumhansl + Temperley.
-    We average their confidences and pick the agreement (or strongest if tie).
-    """
-    pcp = beat_sync_chroma(y, sr)
-    k_key, k_mode, k_conf, k_tonic = score_key(pcp, (KS_MAJOR, KS_MINOR))
-    t_key, t_mode, t_conf, t_tonic = score_key(pcp, (TP_MAJOR, TP_MINOR))
-    # If both agree on tonic & mode, boost confidence
-    if (k_mode == t_mode) and (k_tonic == t_tonic):
-        mode = k_mode
-        tonic_idx = k_tonic
-        name = k_key  # same as t_key
-        conf = float(np.clip(0.5 * (k_conf + t_conf) + 0.3, 0.0, 1.0))
     else:
-        # Choose the one with higher confidence, but allow close-call fallback
-        if (k_conf >= t_conf + 0.05):
-            name, mode, tonic_idx, conf = k_key, k_mode, k_tonic, k_conf * 0.9
-        elif (t_conf >= k_conf + 0.05):
-            name, mode, tonic_idx, conf = t_key, t_mode, t_tonic, t_conf * 0.9
-        else:
-            # disagree slightly: pick by proximity to major/minor brightness
-            brightness = float(np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))) / (sr/2.0 + 1e-12)
-            pick_t = (k_tonic, t_tonic)[int(brightness > 0.5)]
-            pick_m = ("minor", "major")[int(brightness > 0.5)]
-            if pick_m == k_mode and pick_t == k_tonic:
-                name, mode, tonic_idx, conf = k_key, k_mode, k_tonic, (k_conf+t_conf)/2
-            else:
-                name, mode, tonic_idx, conf = t_key, t_mode, t_tonic, (k_conf+t_conf)/2
-    return name, mode, float(np.clip(conf, 0.0, 1.0)), int(tonic_idx)
-# =========================================================
-# Extra features
-# =========================================================
-def robust_scale(x: float, lo: float, hi: float) -> float:
-    return float(np.clip((x - lo) / (hi - lo + 1e-12), 0.0, 1.0))
 def estimate_extras(y: np.ndarray, sr: int, bpm: float, mode: str) -> Dict[str, float]:
     rms = librosa.feature.rms(y=y, frame_length=2048, hop_length=512).squeeze()
-    energy = robust_scale(float(np.mean(rms)), lo=0.01, hi=0.2)
     try:
-        plp = librosa.beat.plp(y=y, sr=sr)
-        pulse = float(np.mean(plp))
     except Exception:
         pulse = 0.5
-    tempo_pref = math.exp(-((bpm - 118.0) / 50.0) ** 2)
-    danceability = 0.6 * tempo_pref + 0.4 * pulse
     centroid = librosa.feature.spectral_centroid(y=y, sr=sr).squeeze()
-    brightness = float(np.mean(centroid)) / (sr/2.0 + 1e-12)
-    brightness = np.clip(brightness, 0.0, 1.0)
-    happiness = 0.5 * brightness + 0.3 * math.exp(-((bpm - 120.0) / 60.0) ** 2) + (0.2 if mode == "major" else 0.0)
-    return {
-        "Energy": round(energy * 100, 1),
-        "Danceability": round(np.clip(danceability, 0.0, 1.0) * 100, 1),
-        "Happiness": round(np.clip(happiness, 0.0, 1.0) * 100, 1),
-    }
-# =========================================================
-# Core analyzer
-# =========================================================
 def analyze_one(path: str, max_duration_s: float = 300.0) -> Dict[str, str]:
     fn = os.path.basename(path)
-    # Mono 22.05k for speed; trim silence
-    y, sr = librosa.load(path, sr=22050, mono=True, duration=max_duration_s)
-    y, _ = librosa.effects.trim(y, top_db=40)
     if y.size == 0:
-        return {"File Name": fn, "Key": "N/A", "Alt Key": "", "BPM": "N/A",
-                "Energy": "N/A", "Danceability": "N/A", "Happiness": "N/A"}
-    # BPM (with confidence)
-    bpm_val, bpm_conf = pick_best_bpm(y, sr, hop=512)
-    bpm_disp = int(round(bpm_val)) if bpm_val > 0 else "N/A"
-    # Key (with confidence)
-    key_name, mode, key_conf, tonic_idx = estimate_key(y, sr)
-    camelot_code = camelot(PITCHES_FLAT[tonic_idx], mode)
-    extras = estimate_extras(y, sr, bpm_val if bpm_val > 0 else 120.0, mode)
-    return {
-        "File Name": fn,
-        "Key": f"{key_name}",           # e.g., "Bb minor"
-        "Alt Key": camelot_code,        # e.g., "3A"
-        "BPM": bpm_disp,
-        "Energy": extras["Energy"],
-        "Danceability": extras["Danceability"],
-        "Happiness": extras["Happiness"],
-    }
 def analyze_batch(files: List[str], save_results: bool, search: str):
     if not files:
         return pd.DataFrame(columns=["File Name","Key","Alt Key","BPM","Energy","Danceability","Happiness"]), None
     rows = []
     for f in files:
         try:
             rows.append(analyze_one(f))
         except Exception as e:
-            rows.append({"File Name": os.path.basename(f), "Key": f"Error: {e}", "Alt Key": "", "BPM": "",
-                         "Energy": "", "Danceability": "", "Happiness": ""})
     df = pd.DataFrame(rows, columns=["File Name","Key","Alt Key","BPM","Energy","Danceability","Happiness"])
     if search and search.strip():
         mask = df.apply(lambda col: col.astype(str).str.contains(search.strip(), case=False, na=False))
         df = df[mask.any(axis=1)]
@@ -307,26 +191,18 @@ def analyze_batch(files: List[str], save_results: bool, search: str):
         tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
         df.to_csv(tmp.name, index=False, encoding="utf-8")
         csv_file = tmp.name
     return df, csv_file
-# =========================================================
-# UI
-# =========================================================
 CSS = """
 #app-title { font-weight: 700; font-size: 28px; }
 .small-note { font-size: 12px; opacity: 0.8; }
 th, td { text-align: left !important; }
 """
 with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
-    gr.Markdown("<div id='app-title'>Audio Key & BPM Finder — Accurate Mode</div>")
-    gr.Markdown(
-        "Upload audio (mp3/wav/m4a…). The app estimates **Key**, **Camelot (Alt Key)**, and **BPM** using consensus methods, "
-        "plus heuristic **Energy**, **Danceability**, **Happiness**."
-        "<br><span class='small-note'>Tip: Longer clips (30–120s) improve accuracy. Results are global track estimates.</span>"
-    )
     with gr.Row():
         files = gr.File(label="Audio Files", file_count="multiple", type="filepath")
@@ -339,7 +215,7 @@ with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
                           interactive=False, wrap=True, label="Results")
     out_csv = gr.File(label="Download CSV", visible=True)
-    run.click(fn=analyze_batch, inputs=[files, save, search], outputs=[out_df, out_csv])
 if __name__ == "__main__":
     demo.launch()

+import os, io, math, tempfile, warnings
 from typing import Dict, List, Tuple
 import gradio as gr
 import numpy as np
 import pandas as pd
 import librosa
+from pydub import AudioSegment
 warnings.filterwarnings("ignore", category=UserWarning)
 warnings.filterwarnings("ignore", category=FutureWarning)
+# ---------- Key profiles ----------
+KS_MAJOR = np.array([6.35,2.23,3.48,2.33,4.38,4.09,2.52,5.19,2.39,3.66,2.29,2.88], float)
+KS_MINOR = np.array([6.33,2.68,3.52,5.38,2.60,3.53,2.54,4.75,3.98,2.69,3.34,3.17], float)
+TP_MAJOR = np.array([0.748,0.060,0.488,0.082,0.670,0.460,0.096,0.715,0.104,0.366,0.057,0.400], float)*10
+TP_MINOR = np.array([0.712,0.084,0.474,0.618,0.049,0.460,0.105,0.670,0.461,0.044,0.373,0.330], float)*10
+PITCHES_FLAT = ['C','Db','D','Eb','E','F','Gb','G','Ab','A','Bb','B']
 CAMELOT_MAJOR = {'B':'1B','F#':'2B','Gb':'2B','Db':'3B','C#':'3B','Ab':'4B','Eb':'5B','Bb':'6B','F':'7B','C':'8B','G':'9B','D':'10B','A':'11B','E':'12B'}
 CAMELOT_MINOR = {'Ab':'1A','G#':'1A','Eb':'2A','D#':'2A','Bb':'3A','A#':'3A','F':'4A','C':'5A','G':'6A','D':'7A','A':'8A','E':'9A','B':'10A','F#':'11A','Gb':'11A','Db':'12A','C#':'12A'}
+def roll(a, k): return np.roll(a, k)
+def norm(v): return v/(np.linalg.norm(v)+1e-12)
+def tonic_from_index(i:int)->str: return PITCHES_FLAT[i%12]
+def camelot(tonic:str, mode:str)->str: return (CAMELOT_MAJOR if mode=="major" else CAMELOT_MINOR).get(tonic,"")
+# ---------- Robust audio loader (fixes “unsupported type/codec”) ----------
+def load_audio_any(path: str, sr: int = 22050, duration: float = 300.0):
     """
+    Try librosa (audioread/ffmpeg). If it fails (unsupported type/codec),
+    use pydub+ffmpeg to decode to WAV in-memory, then load.
     """
+    try:
+        y, sr_out = librosa.load(path, sr=sr, mono=True, duration=duration)
+        return y, sr_out
+    except Exception:
+        # Fallback: decode via pydub -> WAV bytes
+        seg = AudioSegment.from_file(path)  # needs ffmpeg (installed via apt.txt)
+        if duration:
+            seg = seg[: int(duration * 1000)]
+        buf = io.BytesIO()
+        seg.export(buf, format="wav")
+        buf.seek(0)
+        y, sr_out = librosa.load(buf, sr=sr, mono=True)
+        return y, sr_out
+# ---------- BPM (consensus + half/double correction) ----------
+def pick_best_bpm(y: np.ndarray, sr: int, hop: int = 512) -> Tuple[float, float]:
     onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop, aggregate=np.median)
+    ac = librosa.autocorrelate(onset_env, max_size=onset_env.size//2)
     lags = np.arange(1, len(ac))
+    bpms_ac = 60.0*sr/(lags*hop)
+    mask = (bpms_ac>=60)&(bpms_ac<=200)
+    ac_vals = ac[1:][mask]; bpms_ac = bpms_ac[mask]
     bpm_ac = float(bpms_ac[np.argmax(ac_vals)]) if len(bpms_ac) else 0.0
+    conf_ac = float(np.max(ac_vals)/(np.sum(ac_vals)+1e-12)) if len(ac_vals) else 0.0
     tempi = librosa.beat.tempo(onset_envelope=onset_env, sr=sr, hop_length=hop, aggregate=None)
     if tempi is not None and len(tempi):
+        t = tempi[(tempi>=60)&(tempi<=200)]
         if len(t):
+            hist, _ = np.histogram(t, bins=np.arange(60,202,1))
+            bpm_tg = float(60 + np.argmax(hist)); conf_tg = float(np.max(hist)/(np.sum(hist)+1e-12))
+        else: bpm_tg, conf_tg = 0.0, 0.0
+    else: bpm_tg, conf_tg = 0.0, 0.0
     tempo_bt, beats = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr, hop_length=hop)
+    bpm_bt = float(tempo_bt); conf_bt = 0.5 if beats is not None and len(beats)>8 else 0.1
+    candidates = [b for b in [bpm_ac, bpm_tg, bpm_bt] if 30<b<240]
+    if not candidates: return max(bpm_bt,0.0), 0.0
+    expanded = [b for x in candidates for b in (x/2, x, x*2) if 60<=b<=200]
+    def align_score(bpm_val: float) -> float:
+        period = (60.0/bpm_val)*sr/hop
+        start = int(np.argmax(onset_env))
+        frames = np.round(np.arange(start, len(onset_env), period)).astype(int)
+        frames = frames[frames<len(onset_env)]
+        s = 0.0
+        for f in frames:
+            lo=max(0,f-2); hi=min(len(onset_env), f+3)
+            s += float(np.max(onset_env[lo:hi]))
+        return s/(len(frames)+1e-12)
+    scored = [(b, align_score(b)) for b in expanded]
+    best_bpm, best_s = max(scored, key=lambda x:x[1])
+    agree = np.mean([min(best_bpm,c)/max(best_bpm,c) for c in candidates])
+    conf = float(np.clip(0.7*(best_s/(np.max(onset_env)+1e-12)) + 0.3*agree, 0.0, 1.0))
+    return best_bpm, conf
+# ---------- Key (beat-sync CQT+CENS, dual-profile vote) ----------
+def beat_sync_pcp(y: np.ndarray, sr: int, hop: int = 512) -> np.ndarray:
+    y_h, _ = librosa.effects.hpss(y)
+    cqt = librosa.feature.chroma_cqt(y=y_h, sr=sr, hop_length=hop, bins_per_octave=36, cqt_mode="full")
+    cens = librosa.feature.chroma_cens(y=y_h, sr=sr, hop_length=hop)
+    chroma = norm(0.65*cqt + 0.35*cens)
+    _, beats = librosa.beat.beat_track(y=y_h, sr=sr, hop_length=hop)
+    if beats is not None and len(beats)>2:
+        chroma = librosa.util.sync(chroma, beats, aggregate=np.mean)
+    chroma = chroma / (np.linalg.norm(chroma, axis=0, keepdims=True)+1e-12)
+    return np.mean(chroma, axis=1)
+def score_key(pcp: np.ndarray, prof_major: np.ndarray, prof_minor: np.ndarray):
+    pcp = norm(pcp)
+    best_score, best_mode, best_tonic = -1.0, "major", 0
     all_scores = []
     for i in range(12):
+        sM = float(np.dot(pcp, norm(roll(prof_major, -i))))
+        sm = float(np.dot(pcp, norm(roll(prof_minor, -i))))
+        all_scores += [sM, sm]
+        if sM>best_score: best_score, best_mode, best_tonic = sM, "major", i
+        if sm>best_score: best_score, best_mode, best_tonic = sm, "minor", i
+    all_scores = np.array(all_scores)
+    margin = (np.sort(all_scores)[-1]-np.sort(all_scores)[-2])/(np.max(all_scores)+1e-12)
     confidence = float(np.clip(margin, 0.0, 1.0))
+    return best_mode, best_tonic, confidence
+def estimate_key(y: np.ndarray, sr: int):
+    pcp = beat_sync_pcp(y, sr)
+    m1, t1, c1 = score_key(pcp, KS_MAJOR, KS_MINOR)
+    m2, t2, c2 = score_key(pcp, TP_MAJOR, TP_MINOR)
+    if (m1==m2) and (t1==t2):
+        mode, tonic, conf = m1, t1, float(np.clip(0.5*(c1+c2)+0.3, 0.0, 1.0))
     else:
+        mode, tonic, conf = (m1, t1, c1) if c1>=c2 else (m2, t2, c2)
+    name = f"{tonic_from_index(tonic)} {mode}"
+    return name, mode, conf, tonic
+# ---------- Extras ----------
+def robust_scale(x, lo, hi): return float(np.clip((x-lo)/(hi-lo+1e-12), 0.0, 1.0))
 def estimate_extras(y: np.ndarray, sr: int, bpm: float, mode: str) -> Dict[str, float]:
     rms = librosa.feature.rms(y=y, frame_length=2048, hop_length=512).squeeze()
+    energy = robust_scale(float(np.mean(rms)), 0.01, 0.2)
     try:
+        plp = librosa.beat.plp(y=y, sr=sr); pulse = float(np.mean(plp))
     except Exception:
         pulse = 0.5
+    tempo_pref = math.exp(-((bpm-118.0)/50.0)**2)
+    dance = 0.6*tempo_pref + 0.4*pulse
     centroid = librosa.feature.spectral_centroid(y=y, sr=sr).squeeze()
+    bright = float(np.mean(centroid))/(sr/2.0+1e-12); bright = np.clip(bright,0,1)
+    happy = 0.5*bright + 0.3*math.exp(-((bpm-120.0)/60.0)**2) + (0.2 if mode=="major" else 0.0)
+    return {"Energy":round(energy*100,1), "Danceability":round(np.clip(dance,0,1)*100,1), "Happiness":round(np.clip(happy,0,1)*100,1)}
+# ---------- Core ----------
 def analyze_one(path: str, max_duration_s: float = 300.0) -> Dict[str, str]:
     fn = os.path.basename(path)
+    try:
+        y, sr = load_audio_any(path, sr=22050, duration=max_duration_s)
+    except Exception as e:
+        return {"File Name": fn, "Key": f"Error: {e}", "Alt Key": "", "BPM": "", "Energy": "", "Danceability": "", "Happiness": ""}
+    y, _ = librosa.effects.trim(y, top_db=40)
     if y.size == 0:
+        return {"File Name": fn, "Key": "N/A", "Alt Key": "", "BPM": "N/A", "Energy": "N/A", "Danceability": "N/A", "Happiness": "N/A"}
+    bpm_val, _ = pick_best_bpm(y, sr, hop=512)
+    bpm_disp = int(round(bpm_val)) if bpm_val>0 else "N/A"
+    key_name, mode, _, tonic = estimate_key(y, sr)
+    camelot_code = camelot(tonic_from_index(tonic), mode)
+    extras = estimate_extras(y, sr, bpm_val if bpm_val>0 else 120.0, mode)
+    return {"File Name": fn, "Key": key_name, "Alt Key": camelot_code, "BPM": bpm_disp,
+            "Energy": extras["Energy"], "Danceability": extras["Danceability"], "Happiness": extras["Happiness"]}
 def analyze_batch(files: List[str], save_results: bool, search: str):
     if not files:
         return pd.DataFrame(columns=["File Name","Key","Alt Key","BPM","Energy","Danceability","Happiness"]), None
     rows = []
     for f in files:
         try:
             rows.append(analyze_one(f))
         except Exception as e:
+            rows.append({"File Name": os.path.basename(f), "Key": f"Error: {e}", "Alt Key": "", "BPM": "", "Energy": "", "Danceability": "", "Happiness": ""})
     df = pd.DataFrame(rows, columns=["File Name","Key","Alt Key","BPM","Energy","Danceability","Happiness"])
     if search and search.strip():
         mask = df.apply(lambda col: col.astype(str).str.contains(search.strip(), case=False, na=False))
         df = df[mask.any(axis=1)]
         tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
         df.to_csv(tmp.name, index=False, encoding="utf-8")
         csv_file = tmp.name
     return df, csv_file
+# ---------- UI ----------
 CSS = """
 #app-title { font-weight: 700; font-size: 28px; }
 .small-note { font-size: 12px; opacity: 0.8; }
 th, td { text-align: left !important; }
 """
 with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
+    gr.Markdown("<div id='app-title'>Audio Key & BPM Finder — Robust Loader</div>")
+    gr.Markdown("Upload MP3/WAV/M4A, etc. This Space installs **FFmpeg** and falls back to pydub if needed. "
+                "Outputs **Key**, **Camelot (Alt Key)**, **BPM**, plus **Energy/Danceability/Happiness**.")
     with gr.Row():
         files = gr.File(label="Audio Files", file_count="multiple", type="filepath")
                           interactive=False, wrap=True, label="Results")
     out_csv = gr.File(label="Download CSV", visible=True)
+    run.click(analyze_batch, inputs=[files, save, search], outputs=[out_df, out_csv])
 if __name__ == "__main__":
     demo.launch()