Diggz10 commited on
Commit
7f608b7
·
verified ·
1 Parent(s): 1ba01a7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +126 -250
app.py CHANGED
@@ -1,303 +1,187 @@
1
- import os
2
- import math
3
- import tempfile
4
- import warnings
5
  from typing import Dict, List, Tuple
6
 
7
  import gradio as gr
8
  import numpy as np
9
  import pandas as pd
10
  import librosa
 
11
 
12
  warnings.filterwarnings("ignore", category=UserWarning)
13
  warnings.filterwarnings("ignore", category=FutureWarning)
14
 
15
- # =========================================================
16
- # Key detection profiles (two well-known sets) for voting
17
- # =========================================================
18
 
19
- # Krumhansl-Schmuckler (Harte)
20
- KS_MAJOR = np.array([6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88], dtype=float)
21
- KS_MINOR = np.array([6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17], dtype=float)
22
-
23
- # Temperley / Kostka–Payne (scaled roughly to similar ranges)
24
- TP_MAJOR = np.array([0.748, 0.060, 0.488, 0.082, 0.670, 0.460, 0.096, 0.715, 0.104, 0.366, 0.057, 0.400], dtype=float) * 10
25
- TP_MINOR = np.array([0.712, 0.084, 0.474, 0.618, 0.049, 0.460, 0.105, 0.670, 0.461, 0.044, 0.373, 0.330], dtype=float) * 10
26
-
27
- PITCHES_FLAT = ['C', 'Db', 'D', 'Eb', 'E', 'F', 'Gb', 'G', 'Ab', 'A', 'Bb', 'B']
28
 
 
29
  CAMELOT_MAJOR = {'B':'1B','F#':'2B','Gb':'2B','Db':'3B','C#':'3B','Ab':'4B','Eb':'5B','Bb':'6B','F':'7B','C':'8B','G':'9B','D':'10B','A':'11B','E':'12B'}
30
  CAMELOT_MINOR = {'Ab':'1A','G#':'1A','Eb':'2A','D#':'2A','Bb':'3A','A#':'3A','F':'4A','C':'5A','G':'6A','D':'7A','A':'8A','E':'9A','B':'10A','F#':'11A','Gb':'11A','Db':'12A','C#':'12A'}
31
 
32
- # =========================================================
33
- # Utility helpers
34
- # =========================================================
35
-
36
- def roll(arr: np.ndarray, steps: int) -> np.ndarray:
37
- return np.roll(arr, steps)
38
-
39
- def tonic_from_index(idx: int) -> str:
40
- return PITCHES_FLAT[int(idx) % 12]
41
-
42
- def camelot(tonic: str, mode: str) -> str:
43
- return (CAMELOT_MAJOR if mode == "major" else CAMELOT_MINOR).get(tonic, "")
44
 
45
- def normalize(v: np.ndarray) -> np.ndarray:
46
- n = np.linalg.norm(v) + 1e-12
47
- return v / n
48
-
49
- # =========================================================
50
- # Improved BPM estimation (multi-method consensus)
51
- # =========================================================
52
-
53
- def pick_best_bpm(y: np.ndarray, sr: int, hop: int = 512) -> Tuple[float, float]:
54
  """
55
- Returns (bpm, confidence[0..1]).
56
- Strategy:
57
- 1) Onset envelope -> autocorrelation peak
58
- 2) Tempogram peak
59
- 3) librosa beat tracker tempo
60
- Then consensus + half/double correction scored against onset envelope.
61
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop, aggregate=np.median)
63
 
64
- # 1) Autocorr peak
65
- ac = librosa.autocorrelate(onset_env, max_size=onset_env.size // 2)
66
- # Convert lags to BPM (exclude lag 0)
67
  lags = np.arange(1, len(ac))
68
- bpms_ac = 60.0 * sr / (lags * hop)
69
- # Keep BPM range plausible
70
- mask = (bpms_ac >= 60) & (bpms_ac <= 200)
71
- bpms_ac = bpms_ac[mask]
72
- ac_vals = ac[1:][mask]
73
  bpm_ac = float(bpms_ac[np.argmax(ac_vals)]) if len(bpms_ac) else 0.0
74
- conf_ac = float(np.max(ac_vals) / (np.sum(ac_vals) + 1e-12)) if len(ac_vals) else 0.0
75
 
76
- # 2) Tempogram peak
77
- tg = librosa.feature.tempogram(onset_envelope=onset_env, sr=sr, hop_length=hop)
78
  tempi = librosa.beat.tempo(onset_envelope=onset_env, sr=sr, hop_length=hop, aggregate=None)
79
- # robust choice: most frequent tempo
80
  if tempi is not None and len(tempi):
81
- # histogram in 60..200
82
- t = tempi[(tempi >= 60) & (tempi <= 200)]
83
  if len(t):
84
- hist, edges = np.histogram(t, bins=np.arange(60, 202, 1))
85
- bpm_tg = float(60 + np.argmax(hist))
86
- conf_tg = float(np.max(hist) / (np.sum(hist) + 1e-12))
87
- else:
88
- bpm_tg, conf_tg = 0.0, 0.0
89
- else:
90
- bpm_tg, conf_tg = 0.0, 0.0
91
 
92
- # 3) Beat tracker tempo
93
  tempo_bt, beats = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr, hop_length=hop)
94
- bpm_bt = float(tempo_bt)
95
- conf_bt = 0.5 if beats is not None and len(beats) > 8 else 0.1
96
-
97
- candidates = [bpm for bpm in [bpm_ac, bpm_tg, bpm_bt] if 30 < bpm < 240]
98
- if not candidates:
99
- return max(bpm_bt, 0.0), 0.0
100
-
101
- # Generate half/double variants and score them by alignment with onsets
102
- expanded = []
103
- for bpm in candidates:
104
- expanded += [bpm/2, bpm, bpm*2]
105
- expanded = [b for b in expanded if 60 <= b <= 200]
106
-
107
- def alignment_score(bpm_val: float) -> float:
108
- # Predict beat locations and sum onset strengths near beats
109
- period = (60.0 / bpm_val) * sr / hop # beats in frames
110
- # Start at the strongest onset frame
111
- start = int(np.argmax(onset_env))
112
- beat_frames = np.arange(start, len(onset_env), period)
113
- beat_frames = np.round(beat_frames).astype(int)
114
- beat_frames = beat_frames[beat_frames < len(onset_env)]
115
- # window around each beat
116
- s = 0.0
117
- for f in beat_frames:
118
- lo = max(0, f-2)
119
- hi = min(len(onset_env), f+3)
120
- s += float(np.max(onset_env[lo:hi]))
121
- return s / (len(beat_frames) + 1e-12)
122
-
123
- scored = [(b, alignment_score(b)) for b in expanded]
124
- best_bpm, best_score = max(scored, key=lambda x: x[1])
125
-
126
- # Confidence combines alignment and agreement among methods
127
- agree = np.mean([min(best_bpm, c)/max(best_bpm, c) for c in candidates]) # 1 if identical
128
- confidence = float(0.7 * (best_score / (np.max(onset_env) + 1e-12)) + 0.3 * agree)
129
- confidence = float(np.clip(confidence, 0.0, 1.0))
130
-
131
- return best_bpm, confidence
132
-
133
- # =========================================================
134
- # Improved Key estimation
135
- # =========================================================
136
-
137
- def beat_sync_chroma(y: np.ndarray, sr: int, hop: int = 512) -> np.ndarray:
138
- # Harmonic component only to suppress drums
139
- y_harm, _ = librosa.effects.hpss(y)
140
- # Tuned, high-resolution CQT chroma
141
- chroma_cqt = librosa.feature.chroma_cqt(
142
- y=y_harm, sr=sr, hop_length=hop, bins_per_octave=36, window='hann', cqt_mode='full'
143
- )
144
- # Timbre-robust CENS chroma
145
- chroma_cens = librosa.feature.chroma_cens(y=y_harm, sr=sr, hop_length=hop)
146
- # Weighted sum (CQT carries pitch detail, CENS stabilizes)
147
- chroma = normalize(0.65 * chroma_cqt + 0.35 * chroma_cens)
148
-
149
- # Beat-synchronize to reduce local key shifts/percussive bias
150
- tempo, beats = librosa.beat.beat_track(y=y_harm, sr=sr, hop_length=hop)
151
- if beats is not None and len(beats) > 2:
152
- chroma_sync = librosa.util.sync(chroma, beats, aggregate=np.mean)
153
- else:
154
- chroma_sync = chroma
155
-
156
- # Normalize columns and average to pitch-class profile
157
- chroma_sync = chroma_sync / (np.linalg.norm(chroma_sync, axis=0, keepdims=True) + 1e-12)
158
- return np.mean(chroma_sync, axis=1)
159
 
160
- def score_key(pcp: np.ndarray, profiles: Tuple[np.ndarray, np.ndarray]) -> Tuple[str, str, float]:
161
- maj_prof, min_prof = profiles
162
- pcp = normalize(pcp)
163
 
164
- best_score = -1.0
165
- best_mode = "major"
166
- best_tonic = 0
167
 
168
- for i in range(12):
169
- s_maj = float(np.dot(pcp, normalize(roll(maj_prof, -i))))
170
- s_min = float(np.dot(pcp, normalize(roll(min_prof, -i))))
171
- if s_maj > best_score:
172
- best_score, best_mode, best_tonic = s_maj, "major", i
173
- if s_min > best_score:
174
- best_score, best_mode, best_tonic = s_min, "minor", i
175
-
176
- # confidence = margin between best and runner-up
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  all_scores = []
178
  for i in range(12):
179
- all_scores.append(float(np.dot(pcp, normalize(roll(maj_prof, -i)))))
180
- all_scores.append(float(np.dot(pcp, normalize(roll(min_prof, -i)))))
181
- all_scores = np.array(all_scores, dtype=float)
182
- margin = (np.sort(all_scores)[-1] - np.sort(all_scores)[-2]) / (np.max(all_scores) + 1e-12)
 
 
 
183
  confidence = float(np.clip(margin, 0.0, 1.0))
 
184
 
185
- tonic = tonic_from_index(best_tonic)
186
- key_name = f"{tonic} {best_mode}"
187
- return key_name, best_mode, confidence, best_tonic
 
188
 
189
- def estimate_key(y: np.ndarray, sr: int) -> Tuple[str, str, float, int]:
190
- """
191
- Dual-profile voting: Krumhansl + Temperley.
192
- We average their confidences and pick the agreement (or strongest if tie).
193
- """
194
- pcp = beat_sync_chroma(y, sr)
195
- k_key, k_mode, k_conf, k_tonic = score_key(pcp, (KS_MAJOR, KS_MINOR))
196
- t_key, t_mode, t_conf, t_tonic = score_key(pcp, (TP_MAJOR, TP_MINOR))
197
-
198
- # If both agree on tonic & mode, boost confidence
199
- if (k_mode == t_mode) and (k_tonic == t_tonic):
200
- mode = k_mode
201
- tonic_idx = k_tonic
202
- name = k_key # same as t_key
203
- conf = float(np.clip(0.5 * (k_conf + t_conf) + 0.3, 0.0, 1.0))
204
  else:
205
- # Choose the one with higher confidence, but allow close-call fallback
206
- if (k_conf >= t_conf + 0.05):
207
- name, mode, tonic_idx, conf = k_key, k_mode, k_tonic, k_conf * 0.9
208
- elif (t_conf >= k_conf + 0.05):
209
- name, mode, tonic_idx, conf = t_key, t_mode, t_tonic, t_conf * 0.9
210
- else:
211
- # disagree slightly: pick by proximity to major/minor brightness
212
- brightness = float(np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))) / (sr/2.0 + 1e-12)
213
- pick_t = (k_tonic, t_tonic)[int(brightness > 0.5)]
214
- pick_m = ("minor", "major")[int(brightness > 0.5)]
215
- if pick_m == k_mode and pick_t == k_tonic:
216
- name, mode, tonic_idx, conf = k_key, k_mode, k_tonic, (k_conf+t_conf)/2
217
- else:
218
- name, mode, tonic_idx, conf = t_key, t_mode, t_tonic, (k_conf+t_conf)/2
219
-
220
- return name, mode, float(np.clip(conf, 0.0, 1.0)), int(tonic_idx)
221
-
222
- # =========================================================
223
- # Extra features
224
- # =========================================================
225
-
226
- def robust_scale(x: float, lo: float, hi: float) -> float:
227
- return float(np.clip((x - lo) / (hi - lo + 1e-12), 0.0, 1.0))
228
 
 
 
229
  def estimate_extras(y: np.ndarray, sr: int, bpm: float, mode: str) -> Dict[str, float]:
230
  rms = librosa.feature.rms(y=y, frame_length=2048, hop_length=512).squeeze()
231
- energy = robust_scale(float(np.mean(rms)), lo=0.01, hi=0.2)
232
-
233
  try:
234
- plp = librosa.beat.plp(y=y, sr=sr)
235
- pulse = float(np.mean(plp))
236
  except Exception:
237
  pulse = 0.5
238
-
239
- tempo_pref = math.exp(-((bpm - 118.0) / 50.0) ** 2)
240
- danceability = 0.6 * tempo_pref + 0.4 * pulse
241
-
242
  centroid = librosa.feature.spectral_centroid(y=y, sr=sr).squeeze()
243
- brightness = float(np.mean(centroid)) / (sr/2.0 + 1e-12)
244
- brightness = np.clip(brightness, 0.0, 1.0)
245
- happiness = 0.5 * brightness + 0.3 * math.exp(-((bpm - 120.0) / 60.0) ** 2) + (0.2 if mode == "major" else 0.0)
246
-
247
- return {
248
- "Energy": round(energy * 100, 1),
249
- "Danceability": round(np.clip(danceability, 0.0, 1.0) * 100, 1),
250
- "Happiness": round(np.clip(happiness, 0.0, 1.0) * 100, 1),
251
- }
252
-
253
- # =========================================================
254
- # Core analyzer
255
- # =========================================================
256
 
 
257
  def analyze_one(path: str, max_duration_s: float = 300.0) -> Dict[str, str]:
258
  fn = os.path.basename(path)
259
- # Mono 22.05k for speed; trim silence
260
- y, sr = librosa.load(path, sr=22050, mono=True, duration=max_duration_s)
261
- y, _ = librosa.effects.trim(y, top_db=40)
 
262
 
 
263
  if y.size == 0:
264
- return {"File Name": fn, "Key": "N/A", "Alt Key": "", "BPM": "N/A",
265
- "Energy": "N/A", "Danceability": "N/A", "Happiness": "N/A"}
266
 
267
- # BPM (with confidence)
268
- bpm_val, bpm_conf = pick_best_bpm(y, sr, hop=512)
269
- bpm_disp = int(round(bpm_val)) if bpm_val > 0 else "N/A"
270
 
271
- # Key (with confidence)
272
- key_name, mode, key_conf, tonic_idx = estimate_key(y, sr)
273
- camelot_code = camelot(PITCHES_FLAT[tonic_idx], mode)
274
 
275
- extras = estimate_extras(y, sr, bpm_val if bpm_val > 0 else 120.0, mode)
276
 
277
- return {
278
- "File Name": fn,
279
- "Key": f"{key_name}", # e.g., "Bb minor"
280
- "Alt Key": camelot_code, # e.g., "3A"
281
- "BPM": bpm_disp,
282
- "Energy": extras["Energy"],
283
- "Danceability": extras["Danceability"],
284
- "Happiness": extras["Happiness"],
285
- }
286
 
287
  def analyze_batch(files: List[str], save_results: bool, search: str):
288
  if not files:
289
  return pd.DataFrame(columns=["File Name","Key","Alt Key","BPM","Energy","Danceability","Happiness"]), None
290
-
291
  rows = []
292
  for f in files:
293
  try:
294
  rows.append(analyze_one(f))
295
  except Exception as e:
296
- rows.append({"File Name": os.path.basename(f), "Key": f"Error: {e}", "Alt Key": "", "BPM": "",
297
- "Energy": "", "Danceability": "", "Happiness": ""})
298
-
299
  df = pd.DataFrame(rows, columns=["File Name","Key","Alt Key","BPM","Energy","Danceability","Happiness"])
300
-
301
  if search and search.strip():
302
  mask = df.apply(lambda col: col.astype(str).str.contains(search.strip(), case=False, na=False))
303
  df = df[mask.any(axis=1)]
@@ -307,26 +191,18 @@ def analyze_batch(files: List[str], save_results: bool, search: str):
307
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
308
  df.to_csv(tmp.name, index=False, encoding="utf-8")
309
  csv_file = tmp.name
310
-
311
  return df, csv_file
312
 
313
- # =========================================================
314
- # UI
315
- # =========================================================
316
-
317
  CSS = """
318
  #app-title { font-weight: 700; font-size: 28px; }
319
  .small-note { font-size: 12px; opacity: 0.8; }
320
  th, td { text-align: left !important; }
321
  """
322
-
323
  with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
324
- gr.Markdown("<div id='app-title'>Audio Key & BPM Finder — Accurate Mode</div>")
325
- gr.Markdown(
326
- "Upload audio (mp3/wav/m4a…). The app estimates **Key**, **Camelot (Alt Key)**, and **BPM** using consensus methods, "
327
- "plus heuristic **Energy**, **Danceability**, **Happiness**."
328
- "<br><span class='small-note'>Tip: Longer clips (30–120s) improve accuracy. Results are global track estimates.</span>"
329
- )
330
 
331
  with gr.Row():
332
  files = gr.File(label="Audio Files", file_count="multiple", type="filepath")
@@ -339,7 +215,7 @@ with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
339
  interactive=False, wrap=True, label="Results")
340
  out_csv = gr.File(label="Download CSV", visible=True)
341
 
342
- run.click(fn=analyze_batch, inputs=[files, save, search], outputs=[out_df, out_csv])
343
 
344
  if __name__ == "__main__":
345
  demo.launch()
 
1
+ import os, io, math, tempfile, warnings
 
 
 
2
  from typing import Dict, List, Tuple
3
 
4
  import gradio as gr
5
  import numpy as np
6
  import pandas as pd
7
  import librosa
8
+ from pydub import AudioSegment
9
 
10
  warnings.filterwarnings("ignore", category=UserWarning)
11
  warnings.filterwarnings("ignore", category=FutureWarning)
12
 
13
+ # ---------- Key profiles ----------
14
+ KS_MAJOR = np.array([6.35,2.23,3.48,2.33,4.38,4.09,2.52,5.19,2.39,3.66,2.29,2.88], float)
15
+ KS_MINOR = np.array([6.33,2.68,3.52,5.38,2.60,3.53,2.54,4.75,3.98,2.69,3.34,3.17], float)
16
 
17
+ TP_MAJOR = np.array([0.748,0.060,0.488,0.082,0.670,0.460,0.096,0.715,0.104,0.366,0.057,0.400], float)*10
18
+ TP_MINOR = np.array([0.712,0.084,0.474,0.618,0.049,0.460,0.105,0.670,0.461,0.044,0.373,0.330], float)*10
 
 
 
 
 
 
 
19
 
20
+ PITCHES_FLAT = ['C','Db','D','Eb','E','F','Gb','G','Ab','A','Bb','B']
21
  CAMELOT_MAJOR = {'B':'1B','F#':'2B','Gb':'2B','Db':'3B','C#':'3B','Ab':'4B','Eb':'5B','Bb':'6B','F':'7B','C':'8B','G':'9B','D':'10B','A':'11B','E':'12B'}
22
  CAMELOT_MINOR = {'Ab':'1A','G#':'1A','Eb':'2A','D#':'2A','Bb':'3A','A#':'3A','F':'4A','C':'5A','G':'6A','D':'7A','A':'8A','E':'9A','B':'10A','F#':'11A','Gb':'11A','Db':'12A','C#':'12A'}
23
 
24
+ def roll(a, k): return np.roll(a, k)
25
+ def norm(v): return v/(np.linalg.norm(v)+1e-12)
26
+ def tonic_from_index(i:int)->str: return PITCHES_FLAT[i%12]
27
+ def camelot(tonic:str, mode:str)->str: return (CAMELOT_MAJOR if mode=="major" else CAMELOT_MINOR).get(tonic,"")
 
 
 
 
 
 
 
 
28
 
29
+ # ---------- Robust audio loader (fixes “unsupported type/codec”) ----------
30
+ def load_audio_any(path: str, sr: int = 22050, duration: float = 300.0):
 
 
 
 
 
 
 
31
  """
32
+ Try librosa (audioread/ffmpeg). If it fails (unsupported type/codec),
33
+ use pydub+ffmpeg to decode to WAV in-memory, then load.
 
 
 
 
34
  """
35
+ try:
36
+ y, sr_out = librosa.load(path, sr=sr, mono=True, duration=duration)
37
+ return y, sr_out
38
+ except Exception:
39
+ # Fallback: decode via pydub -> WAV bytes
40
+ seg = AudioSegment.from_file(path) # needs ffmpeg (installed via apt.txt)
41
+ if duration:
42
+ seg = seg[: int(duration * 1000)]
43
+ buf = io.BytesIO()
44
+ seg.export(buf, format="wav")
45
+ buf.seek(0)
46
+ y, sr_out = librosa.load(buf, sr=sr, mono=True)
47
+ return y, sr_out
48
+
49
+ # ---------- BPM (consensus + half/double correction) ----------
50
+ def pick_best_bpm(y: np.ndarray, sr: int, hop: int = 512) -> Tuple[float, float]:
51
  onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop, aggregate=np.median)
52
 
53
+ ac = librosa.autocorrelate(onset_env, max_size=onset_env.size//2)
 
 
54
  lags = np.arange(1, len(ac))
55
+ bpms_ac = 60.0*sr/(lags*hop)
56
+ mask = (bpms_ac>=60)&(bpms_ac<=200)
57
+ ac_vals = ac[1:][mask]; bpms_ac = bpms_ac[mask]
 
 
58
  bpm_ac = float(bpms_ac[np.argmax(ac_vals)]) if len(bpms_ac) else 0.0
59
+ conf_ac = float(np.max(ac_vals)/(np.sum(ac_vals)+1e-12)) if len(ac_vals) else 0.0
60
 
 
 
61
  tempi = librosa.beat.tempo(onset_envelope=onset_env, sr=sr, hop_length=hop, aggregate=None)
 
62
  if tempi is not None and len(tempi):
63
+ t = tempi[(tempi>=60)&(tempi<=200)]
 
64
  if len(t):
65
+ hist, _ = np.histogram(t, bins=np.arange(60,202,1))
66
+ bpm_tg = float(60 + np.argmax(hist)); conf_tg = float(np.max(hist)/(np.sum(hist)+1e-12))
67
+ else: bpm_tg, conf_tg = 0.0, 0.0
68
+ else: bpm_tg, conf_tg = 0.0, 0.0
 
 
 
69
 
 
70
  tempo_bt, beats = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr, hop_length=hop)
71
+ bpm_bt = float(tempo_bt); conf_bt = 0.5 if beats is not None and len(beats)>8 else 0.1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
+ candidates = [b for b in [bpm_ac, bpm_tg, bpm_bt] if 30<b<240]
74
+ if not candidates: return max(bpm_bt,0.0), 0.0
 
75
 
76
+ expanded = [b for x in candidates for b in (x/2, x, x*2) if 60<=b<=200]
 
 
77
 
78
+ def align_score(bpm_val: float) -> float:
79
+ period = (60.0/bpm_val)*sr/hop
80
+ start = int(np.argmax(onset_env))
81
+ frames = np.round(np.arange(start, len(onset_env), period)).astype(int)
82
+ frames = frames[frames<len(onset_env)]
83
+ s = 0.0
84
+ for f in frames:
85
+ lo=max(0,f-2); hi=min(len(onset_env), f+3)
86
+ s += float(np.max(onset_env[lo:hi]))
87
+ return s/(len(frames)+1e-12)
88
+
89
+ scored = [(b, align_score(b)) for b in expanded]
90
+ best_bpm, best_s = max(scored, key=lambda x:x[1])
91
+ agree = np.mean([min(best_bpm,c)/max(best_bpm,c) for c in candidates])
92
+ conf = float(np.clip(0.7*(best_s/(np.max(onset_env)+1e-12)) + 0.3*agree, 0.0, 1.0))
93
+ return best_bpm, conf
94
+
95
+ # ---------- Key (beat-sync CQT+CENS, dual-profile vote) ----------
96
+ def beat_sync_pcp(y: np.ndarray, sr: int, hop: int = 512) -> np.ndarray:
97
+ y_h, _ = librosa.effects.hpss(y)
98
+ cqt = librosa.feature.chroma_cqt(y=y_h, sr=sr, hop_length=hop, bins_per_octave=36, cqt_mode="full")
99
+ cens = librosa.feature.chroma_cens(y=y_h, sr=sr, hop_length=hop)
100
+ chroma = norm(0.65*cqt + 0.35*cens)
101
+
102
+ _, beats = librosa.beat.beat_track(y=y_h, sr=sr, hop_length=hop)
103
+ if beats is not None and len(beats)>2:
104
+ chroma = librosa.util.sync(chroma, beats, aggregate=np.mean)
105
+ chroma = chroma / (np.linalg.norm(chroma, axis=0, keepdims=True)+1e-12)
106
+ return np.mean(chroma, axis=1)
107
+
108
+ def score_key(pcp: np.ndarray, prof_major: np.ndarray, prof_minor: np.ndarray):
109
+ pcp = norm(pcp)
110
+ best_score, best_mode, best_tonic = -1.0, "major", 0
111
  all_scores = []
112
  for i in range(12):
113
+ sM = float(np.dot(pcp, norm(roll(prof_major, -i))))
114
+ sm = float(np.dot(pcp, norm(roll(prof_minor, -i))))
115
+ all_scores += [sM, sm]
116
+ if sM>best_score: best_score, best_mode, best_tonic = sM, "major", i
117
+ if sm>best_score: best_score, best_mode, best_tonic = sm, "minor", i
118
+ all_scores = np.array(all_scores)
119
+ margin = (np.sort(all_scores)[-1]-np.sort(all_scores)[-2])/(np.max(all_scores)+1e-12)
120
  confidence = float(np.clip(margin, 0.0, 1.0))
121
+ return best_mode, best_tonic, confidence
122
 
123
+ def estimate_key(y: np.ndarray, sr: int):
124
+ pcp = beat_sync_pcp(y, sr)
125
+ m1, t1, c1 = score_key(pcp, KS_MAJOR, KS_MINOR)
126
+ m2, t2, c2 = score_key(pcp, TP_MAJOR, TP_MINOR)
127
 
128
+ if (m1==m2) and (t1==t2):
129
+ mode, tonic, conf = m1, t1, float(np.clip(0.5*(c1+c2)+0.3, 0.0, 1.0))
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  else:
131
+ mode, tonic, conf = (m1, t1, c1) if c1>=c2 else (m2, t2, c2)
132
+
133
+ name = f"{tonic_from_index(tonic)} {mode}"
134
+ return name, mode, conf, tonic
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
+ # ---------- Extras ----------
137
+ def robust_scale(x, lo, hi): return float(np.clip((x-lo)/(hi-lo+1e-12), 0.0, 1.0))
138
  def estimate_extras(y: np.ndarray, sr: int, bpm: float, mode: str) -> Dict[str, float]:
139
  rms = librosa.feature.rms(y=y, frame_length=2048, hop_length=512).squeeze()
140
+ energy = robust_scale(float(np.mean(rms)), 0.01, 0.2)
 
141
  try:
142
+ plp = librosa.beat.plp(y=y, sr=sr); pulse = float(np.mean(plp))
 
143
  except Exception:
144
  pulse = 0.5
145
+ tempo_pref = math.exp(-((bpm-118.0)/50.0)**2)
146
+ dance = 0.6*tempo_pref + 0.4*pulse
 
 
147
  centroid = librosa.feature.spectral_centroid(y=y, sr=sr).squeeze()
148
+ bright = float(np.mean(centroid))/(sr/2.0+1e-12); bright = np.clip(bright,0,1)
149
+ happy = 0.5*bright + 0.3*math.exp(-((bpm-120.0)/60.0)**2) + (0.2 if mode=="major" else 0.0)
150
+ return {"Energy":round(energy*100,1), "Danceability":round(np.clip(dance,0,1)*100,1), "Happiness":round(np.clip(happy,0,1)*100,1)}
 
 
 
 
 
 
 
 
 
 
151
 
152
+ # ---------- Core ----------
153
  def analyze_one(path: str, max_duration_s: float = 300.0) -> Dict[str, str]:
154
  fn = os.path.basename(path)
155
+ try:
156
+ y, sr = load_audio_any(path, sr=22050, duration=max_duration_s)
157
+ except Exception as e:
158
+ return {"File Name": fn, "Key": f"Error: {e}", "Alt Key": "", "BPM": "", "Energy": "", "Danceability": "", "Happiness": ""}
159
 
160
+ y, _ = librosa.effects.trim(y, top_db=40)
161
  if y.size == 0:
162
+ return {"File Name": fn, "Key": "N/A", "Alt Key": "", "BPM": "N/A", "Energy": "N/A", "Danceability": "N/A", "Happiness": "N/A"}
 
163
 
164
+ bpm_val, _ = pick_best_bpm(y, sr, hop=512)
165
+ bpm_disp = int(round(bpm_val)) if bpm_val>0 else "N/A"
 
166
 
167
+ key_name, mode, _, tonic = estimate_key(y, sr)
168
+ camelot_code = camelot(tonic_from_index(tonic), mode)
 
169
 
170
+ extras = estimate_extras(y, sr, bpm_val if bpm_val>0 else 120.0, mode)
171
 
172
+ return {"File Name": fn, "Key": key_name, "Alt Key": camelot_code, "BPM": bpm_disp,
173
+ "Energy": extras["Energy"], "Danceability": extras["Danceability"], "Happiness": extras["Happiness"]}
 
 
 
 
 
 
 
174
 
175
  def analyze_batch(files: List[str], save_results: bool, search: str):
176
  if not files:
177
  return pd.DataFrame(columns=["File Name","Key","Alt Key","BPM","Energy","Danceability","Happiness"]), None
 
178
  rows = []
179
  for f in files:
180
  try:
181
  rows.append(analyze_one(f))
182
  except Exception as e:
183
+ rows.append({"File Name": os.path.basename(f), "Key": f"Error: {e}", "Alt Key": "", "BPM": "", "Energy": "", "Danceability": "", "Happiness": ""})
 
 
184
  df = pd.DataFrame(rows, columns=["File Name","Key","Alt Key","BPM","Energy","Danceability","Happiness"])
 
185
  if search and search.strip():
186
  mask = df.apply(lambda col: col.astype(str).str.contains(search.strip(), case=False, na=False))
187
  df = df[mask.any(axis=1)]
 
191
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
192
  df.to_csv(tmp.name, index=False, encoding="utf-8")
193
  csv_file = tmp.name
 
194
  return df, csv_file
195
 
196
+ # ---------- UI ----------
 
 
 
197
  CSS = """
198
  #app-title { font-weight: 700; font-size: 28px; }
199
  .small-note { font-size: 12px; opacity: 0.8; }
200
  th, td { text-align: left !important; }
201
  """
 
202
  with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
203
+ gr.Markdown("<div id='app-title'>Audio Key & BPM Finder — Robust Loader</div>")
204
+ gr.Markdown("Upload MP3/WAV/M4A, etc. This Space installs **FFmpeg** and falls back to pydub if needed. "
205
+ "Outputs **Key**, **Camelot (Alt Key)**, **BPM**, plus **Energy/Danceability/Happiness**.")
 
 
 
206
 
207
  with gr.Row():
208
  files = gr.File(label="Audio Files", file_count="multiple", type="filepath")
 
215
  interactive=False, wrap=True, label="Results")
216
  out_csv = gr.File(label="Download CSV", visible=True)
217
 
218
+ run.click(analyze_batch, inputs=[files, save, search], outputs=[out_df, out_csv])
219
 
220
  if __name__ == "__main__":
221
  demo.launch()