File size: 10,551 Bytes
7f608b7
224fd2d
b14d274
 
 
 
 
7f608b7
b14d274
 
 
 
7f608b7
 
 
b14d274
7f608b7
 
b14d274
7f608b7
224fd2d
 
b14d274
7f608b7
 
 
 
b14d274
7f608b7
 
b14d274
7f608b7
 
b14d274
7f608b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224fd2d
 
7f608b7
224fd2d
7f608b7
 
 
224fd2d
7f608b7
224fd2d
 
 
7f608b7
224fd2d
7f608b7
 
 
 
224fd2d
 
7f608b7
b14d274
7f608b7
 
b14d274
7f608b7
b14d274
7f608b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224fd2d
 
7f608b7
 
 
 
 
 
 
224fd2d
7f608b7
b14d274
7f608b7
 
 
 
b14d274
7f608b7
 
b14d274
7f608b7
 
 
 
b14d274
7f608b7
 
224fd2d
b14d274
7f608b7
b14d274
7f608b7
b14d274
 
7f608b7
 
b14d274
7f608b7
 
 
b14d274
7f608b7
224fd2d
 
7f608b7
 
 
 
b14d274
7f608b7
224fd2d
7f608b7
224fd2d
7f608b7
 
b14d274
7f608b7
 
b14d274
7f608b7
b14d274
7f608b7
 
b14d274
 
224fd2d
 
b14d274
 
 
224fd2d
b14d274
7f608b7
224fd2d
 
b14d274
 
 
 
224fd2d
b14d274
 
 
 
 
7f608b7
b14d274
 
 
 
 
 
7f608b7
 
 
b14d274
 
 
 
224fd2d
b14d274
 
 
224fd2d
 
b14d274
 
7f608b7
b14d274
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
import os, io, math, tempfile, warnings
from typing import Dict, List, Tuple

import gradio as gr
import numpy as np
import pandas as pd
import librosa
from pydub import AudioSegment

warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

# ---------- Key profiles ----------
KS_MAJOR = np.array([6.35,2.23,3.48,2.33,4.38,4.09,2.52,5.19,2.39,3.66,2.29,2.88], float)
KS_MINOR = np.array([6.33,2.68,3.52,5.38,2.60,3.53,2.54,4.75,3.98,2.69,3.34,3.17], float)

TP_MAJOR = np.array([0.748,0.060,0.488,0.082,0.670,0.460,0.096,0.715,0.104,0.366,0.057,0.400], float)*10
TP_MINOR = np.array([0.712,0.084,0.474,0.618,0.049,0.460,0.105,0.670,0.461,0.044,0.373,0.330], float)*10

PITCHES_FLAT = ['C','Db','D','Eb','E','F','Gb','G','Ab','A','Bb','B']
CAMELOT_MAJOR = {'B':'1B','F#':'2B','Gb':'2B','Db':'3B','C#':'3B','Ab':'4B','Eb':'5B','Bb':'6B','F':'7B','C':'8B','G':'9B','D':'10B','A':'11B','E':'12B'}
CAMELOT_MINOR = {'Ab':'1A','G#':'1A','Eb':'2A','D#':'2A','Bb':'3A','A#':'3A','F':'4A','C':'5A','G':'6A','D':'7A','A':'8A','E':'9A','B':'10A','F#':'11A','Gb':'11A','Db':'12A','C#':'12A'}

def roll(a, k): return np.roll(a, k)
def norm(v): return v/(np.linalg.norm(v)+1e-12)
def tonic_from_index(i:int)->str: return PITCHES_FLAT[i%12]
def camelot(tonic:str, mode:str)->str: return (CAMELOT_MAJOR if mode=="major" else CAMELOT_MINOR).get(tonic,"")

# ---------- Robust audio loader (fixes “unsupported type/codec”) ----------
def load_audio_any(path: str, sr: int = 22050, duration: float = 300.0):
    """
    Try librosa (audioread/ffmpeg). If it fails (unsupported type/codec),
    use pydub+ffmpeg to decode to WAV in-memory, then load.
    """
    try:
        y, sr_out = librosa.load(path, sr=sr, mono=True, duration=duration)
        return y, sr_out
    except Exception:
        # Fallback: decode via pydub -> WAV bytes
        seg = AudioSegment.from_file(path)  # needs ffmpeg (installed via apt.txt)
        if duration:
            seg = seg[: int(duration * 1000)]
        buf = io.BytesIO()
        seg.export(buf, format="wav")
        buf.seek(0)
        y, sr_out = librosa.load(buf, sr=sr, mono=True)
        return y, sr_out

# ---------- BPM (consensus + half/double correction) ----------
def pick_best_bpm(y: np.ndarray, sr: int, hop: int = 512) -> Tuple[float, float]:
    onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop, aggregate=np.median)

    ac = librosa.autocorrelate(onset_env, max_size=onset_env.size//2)
    lags = np.arange(1, len(ac))
    bpms_ac = 60.0*sr/(lags*hop)
    mask = (bpms_ac>=60)&(bpms_ac<=200)
    ac_vals = ac[1:][mask]; bpms_ac = bpms_ac[mask]
    bpm_ac = float(bpms_ac[np.argmax(ac_vals)]) if len(bpms_ac) else 0.0
    conf_ac = float(np.max(ac_vals)/(np.sum(ac_vals)+1e-12)) if len(ac_vals) else 0.0

    tempi = librosa.beat.tempo(onset_envelope=onset_env, sr=sr, hop_length=hop, aggregate=None)
    if tempi is not None and len(tempi):
        t = tempi[(tempi>=60)&(tempi<=200)]
        if len(t):
            hist, _ = np.histogram(t, bins=np.arange(60,202,1))
            bpm_tg = float(60 + np.argmax(hist)); conf_tg = float(np.max(hist)/(np.sum(hist)+1e-12))
        else: bpm_tg, conf_tg = 0.0, 0.0
    else: bpm_tg, conf_tg = 0.0, 0.0

    tempo_bt, beats = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr, hop_length=hop)
    bpm_bt = float(tempo_bt); conf_bt = 0.5 if beats is not None and len(beats)>8 else 0.1

    candidates = [b for b in [bpm_ac, bpm_tg, bpm_bt] if 30<b<240]
    if not candidates: return max(bpm_bt,0.0), 0.0

    expanded = [b for x in candidates for b in (x/2, x, x*2) if 60<=b<=200]

    def align_score(bpm_val: float) -> float:
        period = (60.0/bpm_val)*sr/hop
        start = int(np.argmax(onset_env))
        frames = np.round(np.arange(start, len(onset_env), period)).astype(int)
        frames = frames[frames<len(onset_env)]
        s = 0.0
        for f in frames:
            lo=max(0,f-2); hi=min(len(onset_env), f+3)
            s += float(np.max(onset_env[lo:hi]))
        return s/(len(frames)+1e-12)

    scored = [(b, align_score(b)) for b in expanded]
    best_bpm, best_s = max(scored, key=lambda x:x[1])
    agree = np.mean([min(best_bpm,c)/max(best_bpm,c) for c in candidates])
    conf = float(np.clip(0.7*(best_s/(np.max(onset_env)+1e-12)) + 0.3*agree, 0.0, 1.0))
    return best_bpm, conf

# ---------- Key (beat-sync CQT+CENS, dual-profile vote) ----------
def beat_sync_pcp(y: np.ndarray, sr: int, hop: int = 512) -> np.ndarray:
    y_h, _ = librosa.effects.hpss(y)
    cqt = librosa.feature.chroma_cqt(y=y_h, sr=sr, hop_length=hop, bins_per_octave=36, cqt_mode="full")
    cens = librosa.feature.chroma_cens(y=y_h, sr=sr, hop_length=hop)
    chroma = norm(0.65*cqt + 0.35*cens)

    _, beats = librosa.beat.beat_track(y=y_h, sr=sr, hop_length=hop)
    if beats is not None and len(beats)>2:
        chroma = librosa.util.sync(chroma, beats, aggregate=np.mean)
    chroma = chroma / (np.linalg.norm(chroma, axis=0, keepdims=True)+1e-12)
    return np.mean(chroma, axis=1)

def score_key(pcp: np.ndarray, prof_major: np.ndarray, prof_minor: np.ndarray):
    pcp = norm(pcp)
    best_score, best_mode, best_tonic = -1.0, "major", 0
    all_scores = []
    for i in range(12):
        sM = float(np.dot(pcp, norm(roll(prof_major, -i))))
        sm = float(np.dot(pcp, norm(roll(prof_minor, -i))))
        all_scores += [sM, sm]
        if sM>best_score: best_score, best_mode, best_tonic = sM, "major", i
        if sm>best_score: best_score, best_mode, best_tonic = sm, "minor", i
    all_scores = np.array(all_scores)
    margin = (np.sort(all_scores)[-1]-np.sort(all_scores)[-2])/(np.max(all_scores)+1e-12)
    confidence = float(np.clip(margin, 0.0, 1.0))
    return best_mode, best_tonic, confidence

def estimate_key(y: np.ndarray, sr: int):
    pcp = beat_sync_pcp(y, sr)
    m1, t1, c1 = score_key(pcp, KS_MAJOR, KS_MINOR)
    m2, t2, c2 = score_key(pcp, TP_MAJOR, TP_MINOR)

    if (m1==m2) and (t1==t2):
        mode, tonic, conf = m1, t1, float(np.clip(0.5*(c1+c2)+0.3, 0.0, 1.0))
    else:
        mode, tonic, conf = (m1, t1, c1) if c1>=c2 else (m2, t2, c2)

    name = f"{tonic_from_index(tonic)} {mode}"
    return name, mode, conf, tonic

# ---------- Extras ----------
def robust_scale(x, lo, hi): return float(np.clip((x-lo)/(hi-lo+1e-12), 0.0, 1.0))
def estimate_extras(y: np.ndarray, sr: int, bpm: float, mode: str) -> Dict[str, float]:
    rms = librosa.feature.rms(y=y, frame_length=2048, hop_length=512).squeeze()
    energy = robust_scale(float(np.mean(rms)), 0.01, 0.2)
    try:
        plp = librosa.beat.plp(y=y, sr=sr); pulse = float(np.mean(plp))
    except Exception:
        pulse = 0.5
    tempo_pref = math.exp(-((bpm-118.0)/50.0)**2)
    dance = 0.6*tempo_pref + 0.4*pulse
    centroid = librosa.feature.spectral_centroid(y=y, sr=sr).squeeze()
    bright = float(np.mean(centroid))/(sr/2.0+1e-12); bright = np.clip(bright,0,1)
    happy = 0.5*bright + 0.3*math.exp(-((bpm-120.0)/60.0)**2) + (0.2 if mode=="major" else 0.0)
    return {"Energy":round(energy*100,1), "Danceability":round(np.clip(dance,0,1)*100,1), "Happiness":round(np.clip(happy,0,1)*100,1)}

# ---------- Core ----------
def analyze_one(path: str, max_duration_s: float = 300.0) -> Dict[str, str]:
    fn = os.path.basename(path)
    try:
        y, sr = load_audio_any(path, sr=22050, duration=max_duration_s)
    except Exception as e:
        return {"File Name": fn, "Key": f"Error: {e}", "Alt Key": "", "BPM": "", "Energy": "", "Danceability": "", "Happiness": ""}

    y, _ = librosa.effects.trim(y, top_db=40)
    if y.size == 0:
        return {"File Name": fn, "Key": "N/A", "Alt Key": "", "BPM": "N/A", "Energy": "N/A", "Danceability": "N/A", "Happiness": "N/A"}

    bpm_val, _ = pick_best_bpm(y, sr, hop=512)
    bpm_disp = int(round(bpm_val)) if bpm_val>0 else "N/A"

    key_name, mode, _, tonic = estimate_key(y, sr)
    camelot_code = camelot(tonic_from_index(tonic), mode)

    extras = estimate_extras(y, sr, bpm_val if bpm_val>0 else 120.0, mode)

    return {"File Name": fn, "Key": key_name, "Alt Key": camelot_code, "BPM": bpm_disp,
            "Energy": extras["Energy"], "Danceability": extras["Danceability"], "Happiness": extras["Happiness"]}

def analyze_batch(files: List[str], save_results: bool, search: str):
    if not files:
        return pd.DataFrame(columns=["File Name","Key","Alt Key","BPM","Energy","Danceability","Happiness"]), None
    rows = []
    for f in files:
        try:
            rows.append(analyze_one(f))
        except Exception as e:
            rows.append({"File Name": os.path.basename(f), "Key": f"Error: {e}", "Alt Key": "", "BPM": "", "Energy": "", "Danceability": "", "Happiness": ""})
    df = pd.DataFrame(rows, columns=["File Name","Key","Alt Key","BPM","Energy","Danceability","Happiness"])
    if search and search.strip():
        mask = df.apply(lambda col: col.astype(str).str.contains(search.strip(), case=False, na=False))
        df = df[mask.any(axis=1)]

    csv_file = None
    if save_results and len(df):
        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
        df.to_csv(tmp.name, index=False, encoding="utf-8")
        csv_file = tmp.name
    return df, csv_file

# ---------- UI ----------
CSS = """
#app-title { font-weight: 700; font-size: 28px; }
.small-note { font-size: 12px; opacity: 0.8; }
th, td { text-align: left !important; }
"""
with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
    gr.Markdown("<div id='app-title'>Audio Key & BPM Finder — Robust Loader</div>")
    gr.Markdown("Upload MP3/WAV/M4A, etc. This Space installs **FFmpeg** and falls back to pydub if needed. "
                "Outputs **Key**, **Camelot (Alt Key)**, **BPM**, plus **Energy/Danceability/Happiness**.")

    with gr.Row():
        files = gr.File(label="Audio Files", file_count="multiple", type="filepath")
    with gr.Row():
        search = gr.Textbox(label="Search (filter any column)", placeholder="Type to filter…", scale=3)
        save = gr.Checkbox(label="Save results as CSV", value=False, scale=1)
        run = gr.Button("Analyze", variant="primary", scale=1)

    out_df = gr.Dataframe(headers=["File Name","Key","Alt Key","BPM","Energy","Danceability","Happiness"],
                          interactive=False, wrap=True, label="Results")
    out_csv = gr.File(label="Download CSV", visible=True)

    run.click(analyze_batch, inputs=[files, save, search], outputs=[out_df, out_csv])

if __name__ == "__main__":
    demo.launch()