Diggz10 commited on
Commit
224fd2d
·
verified ·
1 Parent(s): b14d274

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +238 -176
app.py CHANGED
@@ -1,252 +1,318 @@
1
  import os
2
- import io
3
  import math
4
  import tempfile
5
  import warnings
6
- from typing import List, Tuple, Dict
7
 
8
  import gradio as gr
9
  import numpy as np
10
  import pandas as pd
11
  import librosa
12
- import soundfile as sf
13
 
14
  warnings.filterwarnings("ignore", category=UserWarning)
15
  warnings.filterwarnings("ignore", category=FutureWarning)
16
 
17
- # ------------------------------
18
- # Key detection (Krumhansl-Schmuckler)
19
- # ------------------------------
20
 
21
- MAJOR_PROFILE = np.array([6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88])
22
- MINOR_PROFILE = np.array([6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17])
 
 
 
 
 
23
 
24
- # Pitch-class order used across the app
25
  PITCHES_FLAT = ['C', 'Db', 'D', 'Eb', 'E', 'F', 'Gb', 'G', 'Ab', 'A', 'Bb', 'B']
26
 
27
- # Prefer flat spellings to align with common Camelot mappings
28
- MAJOR_NAMES = [f"{p} major" for p in PITCHES_FLAT]
29
- MINOR_NAMES = [f"{p} minor" for p in PITCHES_FLAT]
30
 
31
- # Camelot mapping (tonic -> code)
32
- CAMELOT_MAJOR = {
33
- 'B': '1B', 'F#': '2B', 'Gb': '2B', 'Db': '3B', 'C#': '3B', 'Ab': '4B', 'Eb': '5B',
34
- 'Bb': '6B', 'F': '7B', 'C': '8B', 'G': '9B', 'D': '10B', 'A': '11B', 'E': '12B'
35
- }
36
- CAMELOT_MINOR = {
37
- 'Ab': '1A', 'G#': '1A', 'Eb': '2A', 'D#': '2A', 'Bb': '3A', 'A#': '3A', 'F': '4A',
38
- 'C': '5A', 'G': '6A', 'D': '7A', 'A': '8A', 'E': '9A', 'B': '10A', 'F#': '11A',
39
- 'Gb': '11A', 'Db': '12A', 'C#': '12A'
40
- }
41
 
 
 
42
 
43
- def rotate_profile(profile: np.ndarray, steps: int) -> np.ndarray:
44
- return np.roll(profile, steps)
45
 
 
 
46
 
47
- def _tonic_name_from_index(idx: int) -> str:
48
- # idx 0..11 in the PITCHES_FLAT order
49
- return PITCHES_FLAT[idx % 12]
50
 
 
 
 
51
 
52
- def estimate_key(y: np.ndarray, sr: int) -> Tuple[str, str, int]:
53
  """
54
- Returns (key_name, mode, tonic_index)
55
- mode in {"major","minor"}
56
- tonic_index: 0..11 where 0=C, 1=Db, ..., 11=B in PITCHES_FLAT
 
 
 
57
  """
58
- # Use harmonic component for stability
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  y_harm, _ = librosa.effects.hpss(y)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
- # Chroma
62
- chroma = librosa.feature.chroma_cqt(y=y_harm, sr=sr, n_chroma=12)
63
- chroma_mean = chroma.mean(axis=1)
64
- if np.allclose(chroma_mean.sum(), 0):
65
- # Fallback to avoid divide-by-zero if silence
66
- chroma_mean = np.ones(12)
67
 
68
- # Normalize
69
- chroma_mean = chroma_mean / (np.linalg.norm(chroma_mean) + 1e-9)
 
70
 
71
- # Try all 12 rotations for major & minor
72
- best_score = -1
73
  best_mode = "major"
74
  best_tonic = 0
75
 
76
  for i in range(12):
77
- major_score = np.dot(chroma_mean, rotate_profile(MAJOR_PROFILE, -i))
78
- minor_score = np.dot(chroma_mean, rotate_profile(MINOR_PROFILE, -i))
79
-
80
- if major_score > best_score:
81
- best_score = major_score
82
- best_mode = "major"
83
- best_tonic = i
84
-
85
- if minor_score > best_score:
86
- best_score = minor_score
87
- best_mode = "minor"
88
- best_tonic = i
89
-
90
- # Build name
91
- tonic_name = _tonic_name_from_index(best_tonic)
92
- if best_mode == "major":
93
- key_name = f"{tonic_name} major"
94
- else:
95
- key_name = f"{tonic_name} minor"
96
-
97
- return key_name, best_mode, best_tonic
98
 
 
 
 
99
 
100
- def camelot_from_key(tonic: str, mode: str) -> str:
101
- if mode == "major":
102
- return CAMELOT_MAJOR.get(tonic, "")
 
 
 
 
 
 
 
 
 
 
 
 
103
  else:
104
- return CAMELOT_MINOR.get(tonic, "")
105
-
106
-
107
- # ------------------------------
108
- # Feature engineering (Energy, Danceability, Happiness)
109
- # ------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
  def robust_scale(x: float, lo: float, hi: float) -> float:
112
- """Clamp and scale x∈[lo,hi] to [0,1]."""
113
- return float(np.clip((x - lo) / (hi - lo + 1e-9), 0.0, 1.0))
114
-
115
 
116
- def estimate_features(y: np.ndarray, sr: int, tempo_bpm: float, mode: str) -> Dict[str, float]:
117
- """
118
- Lightweight proxies inspired by common MIR features.
119
- Returns values in [0, 100].
120
- """
121
- # Energy: mean RMS, robust-scaled
122
  rms = librosa.feature.rms(y=y, frame_length=2048, hop_length=512).squeeze()
123
- energy_raw = float(np.mean(rms))
124
- energy_score = robust_scale(energy_raw, lo=0.01, hi=0.2)
125
 
126
- # Rhythm pulse (0..1): average PLP magnitude
127
  try:
128
  plp = librosa.beat.plp(y=y, sr=sr)
129
  pulse = float(np.mean(plp))
130
  except Exception:
131
  pulse = 0.5
132
 
133
- # Tempo preference for dancing: bell centered ~118 BPM
134
- tempo_pref = math.exp(-((tempo_bpm - 118.0) / 50.0) ** 2) # 1 at ~118, smooth drop-off
135
-
136
- # Danceability combines pulse & tempo preference
137
  danceability = 0.6 * tempo_pref + 0.4 * pulse
138
 
139
- # Brightness proxy: spectral centroid / (sr/2)
140
  centroid = librosa.feature.spectral_centroid(y=y, sr=sr).squeeze()
141
- brightness = float(np.mean(centroid)) / (sr / 2.0 + 1e-9)
142
  brightness = np.clip(brightness, 0.0, 1.0)
143
-
144
- # Mode bonus (major tends to "happier" valence)
145
- mode_bonus = 0.15 if mode == "major" else 0.0
146
-
147
- # Tempo influence on "happiness" (moderate-faster feels brighter)
148
- tempo_valence = math.exp(-((tempo_bpm - 120.0) / 60.0) ** 2)
149
-
150
- happiness = 0.5 * brightness + 0.3 * tempo_valence + 0.2 * mode_bonus
151
 
152
  return {
153
- "Energy": round(energy_score * 100, 1),
154
- "Danceability": round(danceability * 100, 1),
155
  "Happiness": round(np.clip(happiness, 0.0, 1.0) * 100, 1),
156
  }
157
 
 
 
 
158
 
159
- # ------------------------------
160
- # Core analysis
161
- # ------------------------------
162
-
163
- def analyze_single(path: str, max_duration_s: float = 240.0) -> Dict[str, str]:
164
- """
165
- Analyze a single audio file and return a row dict.
166
- To keep Spaces snappy, we optionally cap analysis to the first N seconds.
167
- """
168
- filename = os.path.basename(path)
169
-
170
- # Load mono at 22.05k for speed; trim leading/trailing silence
171
  y, sr = librosa.load(path, sr=22050, mono=True, duration=max_duration_s)
172
  y, _ = librosa.effects.trim(y, top_db=40)
173
 
174
- if len(y) == 0:
175
- return {
176
- "File Name": filename,
177
- "Key": "N/A",
178
- "Alt Key": "",
179
- "BPM": "N/A",
180
- "Energy": "N/A",
181
- "Danceability": "N/A",
182
- "Happiness": "N/A",
183
- }
184
-
185
- # Tempo / BPM
186
- try:
187
- tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
188
- bpm = float(tempo)
189
- except Exception:
190
- bpm = float(librosa.beat.tempo(y=y, sr=sr))
191
- bpm_display = int(round(bpm))
192
 
193
- # Key
194
- key_name, mode, tonic_idx = estimate_key(y, sr)
195
- tonic_name = _tonic_name_from_index(tonic_idx)
196
- camelot = camelot_from_key(tonic_name, mode)
197
 
198
- # Extra features
199
- feats = estimate_features(y, sr, bpm, mode)
200
 
201
  return {
202
- "File Name": filename,
203
- "Key": key_name,
204
- "Alt Key": camelot,
205
- "BPM": bpm_display,
206
- "Energy": feats["Energy"],
207
- "Danceability": feats["Danceability"],
208
- "Happiness": feats["Happiness"],
209
  }
210
 
211
-
212
  def analyze_batch(files: List[str], save_results: bool, search: str):
213
- if not files or len(files) == 0:
214
- return pd.DataFrame(columns=["File Name", "Key", "Alt Key", "BPM", "Energy", "Danceability", "Happiness"]), None
215
 
216
  rows = []
217
  for f in files:
218
  try:
219
- rows.append(analyze_single(f))
220
  except Exception as e:
221
- rows.append({
222
- "File Name": os.path.basename(f),
223
- "Key": f"Error: {e}",
224
- "Alt Key": "",
225
- "BPM": "",
226
- "Energy": "",
227
- "Danceability": "",
228
- "Happiness": "",
229
- })
230
-
231
- df = pd.DataFrame(rows, columns=["File Name", "Key", "Alt Key", "BPM", "Energy", "Danceability", "Happiness"])
232
-
233
- # Optional search filter (case-insensitive)
234
- if search and isinstance(search, str) and search.strip():
235
  mask = df.apply(lambda col: col.astype(str).str.contains(search.strip(), case=False, na=False))
236
  df = df[mask.any(axis=1)]
237
 
238
  csv_file = None
239
- if save_results and len(df) > 0:
240
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
241
  df.to_csv(tmp.name, index=False, encoding="utf-8")
242
  csv_file = tmp.name
243
 
244
  return df, csv_file
245
 
246
-
247
- # ------------------------------
248
  # UI
249
- # ------------------------------
250
 
251
  CSS = """
252
  #app-title { font-weight: 700; font-size: 28px; }
@@ -255,26 +321,22 @@ th, td { text-align: left !important; }
255
  """
256
 
257
  with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
258
- gr.Markdown("<div id='app-title'>Audio Key & BPM Finder — with Energy, Danceability, Happiness</div>")
259
  gr.Markdown(
260
- "Upload one or more audio files (mp3/wav/m4a/etc.). "
261
- "The tool estimates musical **Key**, **Camelot (Alt Key)**, **BPM**, and proxy scores for **Energy**, **Danceability**, and **Happiness**."
262
- "<br><span class='small-note'>Note: Energy/Danceability/Happiness are heuristic estimates for quick analysis.</span>"
263
  )
264
 
265
  with gr.Row():
266
  files = gr.File(label="Audio Files", file_count="multiple", type="filepath")
267
  with gr.Row():
268
- search = gr.Textbox(label="Search (filter by file name or any column)", placeholder="Type to filter…", scale=3)
269
  save = gr.Checkbox(label="Save results as CSV", value=False, scale=1)
270
  run = gr.Button("Analyze", variant="primary", scale=1)
271
 
272
- out_df = gr.Dataframe(
273
- headers=["File Name", "Key", "Alt Key", "BPM", "Energy", "Danceability", "Happiness"],
274
- interactive=False,
275
- wrap=True,
276
- label="Results"
277
- )
278
  out_csv = gr.File(label="Download CSV", visible=True)
279
 
280
  run.click(fn=analyze_batch, inputs=[files, save, search], outputs=[out_df, out_csv])
 
1
  import os
 
2
  import math
3
  import tempfile
4
  import warnings
5
+ from typing import Dict, List, Tuple
6
 
7
  import gradio as gr
8
  import numpy as np
9
  import pandas as pd
10
  import librosa
 
11
 
12
  warnings.filterwarnings("ignore", category=UserWarning)
13
  warnings.filterwarnings("ignore", category=FutureWarning)
14
 
15
+ # =========================================================
16
+ # Key detection profiles (two well-known sets) for voting
17
+ # =========================================================
18
 
19
+ # Krumhansl-Schmuckler (Harte)
20
+ KS_MAJOR = np.array([6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88], dtype=float)
21
+ KS_MINOR = np.array([6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17], dtype=float)
22
+
23
+ # Temperley / Kostka–Payne (scaled roughly to similar ranges)
24
+ TP_MAJOR = np.array([0.748, 0.060, 0.488, 0.082, 0.670, 0.460, 0.096, 0.715, 0.104, 0.366, 0.057, 0.400], dtype=float) * 10
25
+ TP_MINOR = np.array([0.712, 0.084, 0.474, 0.618, 0.049, 0.460, 0.105, 0.670, 0.461, 0.044, 0.373, 0.330], dtype=float) * 10
26
 
 
27
  PITCHES_FLAT = ['C', 'Db', 'D', 'Eb', 'E', 'F', 'Gb', 'G', 'Ab', 'A', 'Bb', 'B']
28
 
29
+ CAMELOT_MAJOR = {'B':'1B','F#':'2B','Gb':'2B','Db':'3B','C#':'3B','Ab':'4B','Eb':'5B','Bb':'6B','F':'7B','C':'8B','G':'9B','D':'10B','A':'11B','E':'12B'}
30
+ CAMELOT_MINOR = {'Ab':'1A','G#':'1A','Eb':'2A','D#':'2A','Bb':'3A','A#':'3A','F':'4A','C':'5A','G':'6A','D':'7A','A':'8A','E':'9A','B':'10A','F#':'11A','Gb':'11A','Db':'12A','C#':'12A'}
 
31
 
32
+ # =========================================================
33
+ # Utility helpers
34
+ # =========================================================
 
 
 
 
 
 
 
35
 
36
+ def roll(arr: np.ndarray, steps: int) -> np.ndarray:
37
+ return np.roll(arr, steps)
38
 
39
+ def tonic_from_index(idx: int) -> str:
40
+ return PITCHES_FLAT[int(idx) % 12]
41
 
42
+ def camelot(tonic: str, mode: str) -> str:
43
+ return (CAMELOT_MAJOR if mode == "major" else CAMELOT_MINOR).get(tonic, "")
44
 
45
+ def normalize(v: np.ndarray) -> np.ndarray:
46
+ n = np.linalg.norm(v) + 1e-12
47
+ return v / n
48
 
49
+ # =========================================================
50
+ # Improved BPM estimation (multi-method consensus)
51
+ # =========================================================
52
 
53
+ def pick_best_bpm(y: np.ndarray, sr: int, hop: int = 512) -> Tuple[float, float]:
54
  """
55
+ Returns (bpm, confidence[0..1]).
56
+ Strategy:
57
+ 1) Onset envelope -> autocorrelation peak
58
+ 2) Tempogram peak
59
+ 3) librosa beat tracker tempo
60
+ Then consensus + half/double correction scored against onset envelope.
61
  """
62
+ onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop, aggregate=np.median)
63
+
64
+ # 1) Autocorr peak
65
+ ac = librosa.autocorrelate(onset_env, max_size=onset_env.size // 2)
66
+ # Convert lags to BPM (exclude lag 0)
67
+ lags = np.arange(1, len(ac))
68
+ bpms_ac = 60.0 * sr / (lags * hop)
69
+ # Keep BPM range plausible
70
+ mask = (bpms_ac >= 60) & (bpms_ac <= 200)
71
+ bpms_ac = bpms_ac[mask]
72
+ ac_vals = ac[1:][mask]
73
+ bpm_ac = float(bpms_ac[np.argmax(ac_vals)]) if len(bpms_ac) else 0.0
74
+ conf_ac = float(np.max(ac_vals) / (np.sum(ac_vals) + 1e-12)) if len(ac_vals) else 0.0
75
+
76
+ # 2) Tempogram peak
77
+ tg = librosa.feature.tempogram(onset_envelope=onset_env, sr=sr, hop_length=hop)
78
+ tempi = librosa.beat.tempo(onset_envelope=onset_env, sr=sr, hop_length=hop, aggregate=None)
79
+ # robust choice: most frequent tempo
80
+ if tempi is not None and len(tempi):
81
+ # histogram in 60..200
82
+ t = tempi[(tempi >= 60) & (tempi <= 200)]
83
+ if len(t):
84
+ hist, edges = np.histogram(t, bins=np.arange(60, 202, 1))
85
+ bpm_tg = float(60 + np.argmax(hist))
86
+ conf_tg = float(np.max(hist) / (np.sum(hist) + 1e-12))
87
+ else:
88
+ bpm_tg, conf_tg = 0.0, 0.0
89
+ else:
90
+ bpm_tg, conf_tg = 0.0, 0.0
91
+
92
+ # 3) Beat tracker tempo
93
+ tempo_bt, beats = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr, hop_length=hop)
94
+ bpm_bt = float(tempo_bt)
95
+ conf_bt = 0.5 if beats is not None and len(beats) > 8 else 0.1
96
+
97
+ candidates = [bpm for bpm in [bpm_ac, bpm_tg, bpm_bt] if 30 < bpm < 240]
98
+ if not candidates:
99
+ return max(bpm_bt, 0.0), 0.0
100
+
101
+ # Generate half/double variants and score them by alignment with onsets
102
+ expanded = []
103
+ for bpm in candidates:
104
+ expanded += [bpm/2, bpm, bpm*2]
105
+ expanded = [b for b in expanded if 60 <= b <= 200]
106
+
107
+ def alignment_score(bpm_val: float) -> float:
108
+ # Predict beat locations and sum onset strengths near beats
109
+ period = (60.0 / bpm_val) * sr / hop # beats in frames
110
+ # Start at the strongest onset frame
111
+ start = int(np.argmax(onset_env))
112
+ beat_frames = np.arange(start, len(onset_env), period)
113
+ beat_frames = np.round(beat_frames).astype(int)
114
+ beat_frames = beat_frames[beat_frames < len(onset_env)]
115
+ # window around each beat
116
+ s = 0.0
117
+ for f in beat_frames:
118
+ lo = max(0, f-2)
119
+ hi = min(len(onset_env), f+3)
120
+ s += float(np.max(onset_env[lo:hi]))
121
+ return s / (len(beat_frames) + 1e-12)
122
+
123
+ scored = [(b, alignment_score(b)) for b in expanded]
124
+ best_bpm, best_score = max(scored, key=lambda x: x[1])
125
+
126
+ # Confidence combines alignment and agreement among methods
127
+ agree = np.mean([min(best_bpm, c)/max(best_bpm, c) for c in candidates]) # 1 if identical
128
+ confidence = float(0.7 * (best_score / (np.max(onset_env) + 1e-12)) + 0.3 * agree)
129
+ confidence = float(np.clip(confidence, 0.0, 1.0))
130
+
131
+ return best_bpm, confidence
132
+
133
+ # =========================================================
134
+ # Improved Key estimation
135
+ # =========================================================
136
+
137
+ def beat_sync_chroma(y: np.ndarray, sr: int, hop: int = 512) -> np.ndarray:
138
+ # Harmonic component only to suppress drums
139
  y_harm, _ = librosa.effects.hpss(y)
140
+ # Tuned, high-resolution CQT chroma
141
+ chroma_cqt = librosa.feature.chroma_cqt(
142
+ y=y_harm, sr=sr, hop_length=hop, bins_per_octave=36, window='hann', cqt_mode='full'
143
+ )
144
+ # Timbre-robust CENS chroma
145
+ chroma_cens = librosa.feature.chroma_cens(y=y_harm, sr=sr, hop_length=hop)
146
+ # Weighted sum (CQT carries pitch detail, CENS stabilizes)
147
+ chroma = normalize(0.65 * chroma_cqt + 0.35 * chroma_cens)
148
+
149
+ # Beat-synchronize to reduce local key shifts/percussive bias
150
+ tempo, beats = librosa.beat.beat_track(y=y_harm, sr=sr, hop_length=hop)
151
+ if beats is not None and len(beats) > 2:
152
+ chroma_sync = librosa.util.sync(chroma, beats, aggregate=np.mean)
153
+ else:
154
+ chroma_sync = chroma
155
 
156
+ # Normalize columns and average to pitch-class profile
157
+ chroma_sync = chroma_sync / (np.linalg.norm(chroma_sync, axis=0, keepdims=True) + 1e-12)
158
+ return np.mean(chroma_sync, axis=1)
 
 
 
159
 
160
+ def score_key(pcp: np.ndarray, profiles: Tuple[np.ndarray, np.ndarray]) -> Tuple[str, str, float]:
161
+ maj_prof, min_prof = profiles
162
+ pcp = normalize(pcp)
163
 
164
+ best_score = -1.0
 
165
  best_mode = "major"
166
  best_tonic = 0
167
 
168
  for i in range(12):
169
+ s_maj = float(np.dot(pcp, normalize(roll(maj_prof, -i))))
170
+ s_min = float(np.dot(pcp, normalize(roll(min_prof, -i))))
171
+ if s_maj > best_score:
172
+ best_score, best_mode, best_tonic = s_maj, "major", i
173
+ if s_min > best_score:
174
+ best_score, best_mode, best_tonic = s_min, "minor", i
175
+
176
+ # confidence = margin between best and runner-up
177
+ all_scores = []
178
+ for i in range(12):
179
+ all_scores.append(float(np.dot(pcp, normalize(roll(maj_prof, -i)))))
180
+ all_scores.append(float(np.dot(pcp, normalize(roll(min_prof, -i)))))
181
+ all_scores = np.array(all_scores, dtype=float)
182
+ margin = (np.sort(all_scores)[-1] - np.sort(all_scores)[-2]) / (np.max(all_scores) + 1e-12)
183
+ confidence = float(np.clip(margin, 0.0, 1.0))
 
 
 
 
 
 
184
 
185
+ tonic = tonic_from_index(best_tonic)
186
+ key_name = f"{tonic} {best_mode}"
187
+ return key_name, best_mode, confidence, best_tonic
188
 
189
+ def estimate_key(y: np.ndarray, sr: int) -> Tuple[str, str, float, int]:
190
+ """
191
+ Dual-profile voting: Krumhansl + Temperley.
192
+ We average their confidences and pick the agreement (or strongest if tie).
193
+ """
194
+ pcp = beat_sync_chroma(y, sr)
195
+ k_key, k_mode, k_conf, k_tonic = score_key(pcp, (KS_MAJOR, KS_MINOR))
196
+ t_key, t_mode, t_conf, t_tonic = score_key(pcp, (TP_MAJOR, TP_MINOR))
197
+
198
+ # If both agree on tonic & mode, boost confidence
199
+ if (k_mode == t_mode) and (k_tonic == t_tonic):
200
+ mode = k_mode
201
+ tonic_idx = k_tonic
202
+ name = k_key # same as t_key
203
+ conf = float(np.clip(0.5 * (k_conf + t_conf) + 0.3, 0.0, 1.0))
204
  else:
205
+ # Choose the one with higher confidence, but allow close-call fallback
206
+ if (k_conf >= t_conf + 0.05):
207
+ name, mode, tonic_idx, conf = k_key, k_mode, k_tonic, k_conf * 0.9
208
+ elif (t_conf >= k_conf + 0.05):
209
+ name, mode, tonic_idx, conf = t_key, t_mode, t_tonic, t_conf * 0.9
210
+ else:
211
+ # disagree slightly: pick by proximity to major/minor brightness
212
+ brightness = float(np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))) / (sr/2.0 + 1e-12)
213
+ pick_t = (k_tonic, t_tonic)[int(brightness > 0.5)]
214
+ pick_m = ("minor", "major")[int(brightness > 0.5)]
215
+ if pick_m == k_mode and pick_t == k_tonic:
216
+ name, mode, tonic_idx, conf = k_key, k_mode, k_tonic, (k_conf+t_conf)/2
217
+ else:
218
+ name, mode, tonic_idx, conf = t_key, t_mode, t_tonic, (k_conf+t_conf)/2
219
+
220
+ return name, mode, float(np.clip(conf, 0.0, 1.0)), int(tonic_idx)
221
+
222
+ # =========================================================
223
+ # Extra features
224
+ # =========================================================
225
 
226
  def robust_scale(x: float, lo: float, hi: float) -> float:
227
+ return float(np.clip((x - lo) / (hi - lo + 1e-12), 0.0, 1.0))
 
 
228
 
229
+ def estimate_extras(y: np.ndarray, sr: int, bpm: float, mode: str) -> Dict[str, float]:
 
 
 
 
 
230
  rms = librosa.feature.rms(y=y, frame_length=2048, hop_length=512).squeeze()
231
+ energy = robust_scale(float(np.mean(rms)), lo=0.01, hi=0.2)
 
232
 
 
233
  try:
234
  plp = librosa.beat.plp(y=y, sr=sr)
235
  pulse = float(np.mean(plp))
236
  except Exception:
237
  pulse = 0.5
238
 
239
+ tempo_pref = math.exp(-((bpm - 118.0) / 50.0) ** 2)
 
 
 
240
  danceability = 0.6 * tempo_pref + 0.4 * pulse
241
 
 
242
  centroid = librosa.feature.spectral_centroid(y=y, sr=sr).squeeze()
243
+ brightness = float(np.mean(centroid)) / (sr/2.0 + 1e-12)
244
  brightness = np.clip(brightness, 0.0, 1.0)
245
+ happiness = 0.5 * brightness + 0.3 * math.exp(-((bpm - 120.0) / 60.0) ** 2) + (0.2 if mode == "major" else 0.0)
 
 
 
 
 
 
 
246
 
247
  return {
248
+ "Energy": round(energy * 100, 1),
249
+ "Danceability": round(np.clip(danceability, 0.0, 1.0) * 100, 1),
250
  "Happiness": round(np.clip(happiness, 0.0, 1.0) * 100, 1),
251
  }
252
 
253
+ # =========================================================
254
+ # Core analyzer
255
+ # =========================================================
256
 
257
+ def analyze_one(path: str, max_duration_s: float = 300.0) -> Dict[str, str]:
258
+ fn = os.path.basename(path)
259
+ # Mono 22.05k for speed; trim silence
 
 
 
 
 
 
 
 
 
260
  y, sr = librosa.load(path, sr=22050, mono=True, duration=max_duration_s)
261
  y, _ = librosa.effects.trim(y, top_db=40)
262
 
263
+ if y.size == 0:
264
+ return {"File Name": fn, "Key": "N/A", "Alt Key": "", "BPM": "N/A",
265
+ "Energy": "N/A", "Danceability": "N/A", "Happiness": "N/A"}
266
+
267
+ # BPM (with confidence)
268
+ bpm_val, bpm_conf = pick_best_bpm(y, sr, hop=512)
269
+ bpm_disp = int(round(bpm_val)) if bpm_val > 0 else "N/A"
 
 
 
 
 
 
 
 
 
 
 
270
 
271
+ # Key (with confidence)
272
+ key_name, mode, key_conf, tonic_idx = estimate_key(y, sr)
273
+ camelot_code = camelot(PITCHES_FLAT[tonic_idx], mode)
 
274
 
275
+ extras = estimate_extras(y, sr, bpm_val if bpm_val > 0 else 120.0, mode)
 
276
 
277
  return {
278
+ "File Name": fn,
279
+ "Key": f"{key_name}", # e.g., "Bb minor"
280
+ "Alt Key": camelot_code, # e.g., "3A"
281
+ "BPM": bpm_disp,
282
+ "Energy": extras["Energy"],
283
+ "Danceability": extras["Danceability"],
284
+ "Happiness": extras["Happiness"],
285
  }
286
 
 
287
  def analyze_batch(files: List[str], save_results: bool, search: str):
288
+ if not files:
289
+ return pd.DataFrame(columns=["File Name","Key","Alt Key","BPM","Energy","Danceability","Happiness"]), None
290
 
291
  rows = []
292
  for f in files:
293
  try:
294
+ rows.append(analyze_one(f))
295
  except Exception as e:
296
+ rows.append({"File Name": os.path.basename(f), "Key": f"Error: {e}", "Alt Key": "", "BPM": "",
297
+ "Energy": "", "Danceability": "", "Happiness": ""})
298
+
299
+ df = pd.DataFrame(rows, columns=["File Name","Key","Alt Key","BPM","Energy","Danceability","Happiness"])
300
+
301
+ if search and search.strip():
 
 
 
 
 
 
 
 
302
  mask = df.apply(lambda col: col.astype(str).str.contains(search.strip(), case=False, na=False))
303
  df = df[mask.any(axis=1)]
304
 
305
  csv_file = None
306
+ if save_results and len(df):
307
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
308
  df.to_csv(tmp.name, index=False, encoding="utf-8")
309
  csv_file = tmp.name
310
 
311
  return df, csv_file
312
 
313
+ # =========================================================
 
314
  # UI
315
+ # =========================================================
316
 
317
  CSS = """
318
  #app-title { font-weight: 700; font-size: 28px; }
 
321
  """
322
 
323
  with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
324
+ gr.Markdown("<div id='app-title'>Audio Key & BPM Finder — Accurate Mode</div>")
325
  gr.Markdown(
326
+ "Upload audio (mp3/wav/m4a). The app estimates **Key**, **Camelot (Alt Key)**, and **BPM** using consensus methods, "
327
+ "plus heuristic **Energy**, **Danceability**, **Happiness**."
328
+ "<br><span class='small-note'>Tip: Longer clips (30–120s) improve accuracy. Results are global track estimates.</span>"
329
  )
330
 
331
  with gr.Row():
332
  files = gr.File(label="Audio Files", file_count="multiple", type="filepath")
333
  with gr.Row():
334
+ search = gr.Textbox(label="Search (filter any column)", placeholder="Type to filter…", scale=3)
335
  save = gr.Checkbox(label="Save results as CSV", value=False, scale=1)
336
  run = gr.Button("Analyze", variant="primary", scale=1)
337
 
338
+ out_df = gr.Dataframe(headers=["File Name","Key","Alt Key","BPM","Energy","Danceability","Happiness"],
339
+ interactive=False, wrap=True, label="Results")
 
 
 
 
340
  out_csv = gr.File(label="Download CSV", visible=True)
341
 
342
  run.click(fn=analyze_batch, inputs=[files, save, search], outputs=[out_df, out_csv])