File size: 10,551 Bytes
7f608b7 224fd2d b14d274 7f608b7 b14d274 7f608b7 b14d274 7f608b7 b14d274 7f608b7 224fd2d b14d274 7f608b7 b14d274 7f608b7 b14d274 7f608b7 b14d274 7f608b7 224fd2d 7f608b7 224fd2d 7f608b7 224fd2d 7f608b7 224fd2d 7f608b7 224fd2d 7f608b7 224fd2d 7f608b7 b14d274 7f608b7 b14d274 7f608b7 b14d274 7f608b7 224fd2d 7f608b7 224fd2d 7f608b7 b14d274 7f608b7 b14d274 7f608b7 b14d274 7f608b7 b14d274 7f608b7 224fd2d b14d274 7f608b7 b14d274 7f608b7 b14d274 7f608b7 b14d274 7f608b7 b14d274 7f608b7 224fd2d 7f608b7 b14d274 7f608b7 224fd2d 7f608b7 224fd2d 7f608b7 b14d274 7f608b7 b14d274 7f608b7 b14d274 7f608b7 b14d274 224fd2d b14d274 224fd2d b14d274 7f608b7 224fd2d b14d274 224fd2d b14d274 7f608b7 b14d274 7f608b7 b14d274 224fd2d b14d274 224fd2d b14d274 7f608b7 b14d274 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 | import os, io, math, tempfile, warnings
from typing import Dict, List, Tuple
import gradio as gr
import numpy as np
import pandas as pd
import librosa
from pydub import AudioSegment
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
# ---------- Key profiles ----------
KS_MAJOR = np.array([6.35,2.23,3.48,2.33,4.38,4.09,2.52,5.19,2.39,3.66,2.29,2.88], float)
KS_MINOR = np.array([6.33,2.68,3.52,5.38,2.60,3.53,2.54,4.75,3.98,2.69,3.34,3.17], float)
TP_MAJOR = np.array([0.748,0.060,0.488,0.082,0.670,0.460,0.096,0.715,0.104,0.366,0.057,0.400], float)*10
TP_MINOR = np.array([0.712,0.084,0.474,0.618,0.049,0.460,0.105,0.670,0.461,0.044,0.373,0.330], float)*10
PITCHES_FLAT = ['C','Db','D','Eb','E','F','Gb','G','Ab','A','Bb','B']
CAMELOT_MAJOR = {'B':'1B','F#':'2B','Gb':'2B','Db':'3B','C#':'3B','Ab':'4B','Eb':'5B','Bb':'6B','F':'7B','C':'8B','G':'9B','D':'10B','A':'11B','E':'12B'}
CAMELOT_MINOR = {'Ab':'1A','G#':'1A','Eb':'2A','D#':'2A','Bb':'3A','A#':'3A','F':'4A','C':'5A','G':'6A','D':'7A','A':'8A','E':'9A','B':'10A','F#':'11A','Gb':'11A','Db':'12A','C#':'12A'}
def roll(a, k): return np.roll(a, k)
def norm(v): return v/(np.linalg.norm(v)+1e-12)
def tonic_from_index(i:int)->str: return PITCHES_FLAT[i%12]
def camelot(tonic:str, mode:str)->str: return (CAMELOT_MAJOR if mode=="major" else CAMELOT_MINOR).get(tonic,"")
# ---------- Robust audio loader (fixes “unsupported type/codec”) ----------
def load_audio_any(path: str, sr: int = 22050, duration: float = 300.0):
"""
Try librosa (audioread/ffmpeg). If it fails (unsupported type/codec),
use pydub+ffmpeg to decode to WAV in-memory, then load.
"""
try:
y, sr_out = librosa.load(path, sr=sr, mono=True, duration=duration)
return y, sr_out
except Exception:
# Fallback: decode via pydub -> WAV bytes
seg = AudioSegment.from_file(path) # needs ffmpeg (installed via apt.txt)
if duration:
seg = seg[: int(duration * 1000)]
buf = io.BytesIO()
seg.export(buf, format="wav")
buf.seek(0)
y, sr_out = librosa.load(buf, sr=sr, mono=True)
return y, sr_out
# ---------- BPM (consensus + half/double correction) ----------
def pick_best_bpm(y: np.ndarray, sr: int, hop: int = 512) -> Tuple[float, float]:
onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop, aggregate=np.median)
ac = librosa.autocorrelate(onset_env, max_size=onset_env.size//2)
lags = np.arange(1, len(ac))
bpms_ac = 60.0*sr/(lags*hop)
mask = (bpms_ac>=60)&(bpms_ac<=200)
ac_vals = ac[1:][mask]; bpms_ac = bpms_ac[mask]
bpm_ac = float(bpms_ac[np.argmax(ac_vals)]) if len(bpms_ac) else 0.0
conf_ac = float(np.max(ac_vals)/(np.sum(ac_vals)+1e-12)) if len(ac_vals) else 0.0
tempi = librosa.beat.tempo(onset_envelope=onset_env, sr=sr, hop_length=hop, aggregate=None)
if tempi is not None and len(tempi):
t = tempi[(tempi>=60)&(tempi<=200)]
if len(t):
hist, _ = np.histogram(t, bins=np.arange(60,202,1))
bpm_tg = float(60 + np.argmax(hist)); conf_tg = float(np.max(hist)/(np.sum(hist)+1e-12))
else: bpm_tg, conf_tg = 0.0, 0.0
else: bpm_tg, conf_tg = 0.0, 0.0
tempo_bt, beats = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr, hop_length=hop)
bpm_bt = float(tempo_bt); conf_bt = 0.5 if beats is not None and len(beats)>8 else 0.1
candidates = [b for b in [bpm_ac, bpm_tg, bpm_bt] if 30<b<240]
if not candidates: return max(bpm_bt,0.0), 0.0
expanded = [b for x in candidates for b in (x/2, x, x*2) if 60<=b<=200]
def align_score(bpm_val: float) -> float:
period = (60.0/bpm_val)*sr/hop
start = int(np.argmax(onset_env))
frames = np.round(np.arange(start, len(onset_env), period)).astype(int)
frames = frames[frames<len(onset_env)]
s = 0.0
for f in frames:
lo=max(0,f-2); hi=min(len(onset_env), f+3)
s += float(np.max(onset_env[lo:hi]))
return s/(len(frames)+1e-12)
scored = [(b, align_score(b)) for b in expanded]
best_bpm, best_s = max(scored, key=lambda x:x[1])
agree = np.mean([min(best_bpm,c)/max(best_bpm,c) for c in candidates])
conf = float(np.clip(0.7*(best_s/(np.max(onset_env)+1e-12)) + 0.3*agree, 0.0, 1.0))
return best_bpm, conf
# ---------- Key (beat-sync CQT+CENS, dual-profile vote) ----------
def beat_sync_pcp(y: np.ndarray, sr: int, hop: int = 512) -> np.ndarray:
y_h, _ = librosa.effects.hpss(y)
cqt = librosa.feature.chroma_cqt(y=y_h, sr=sr, hop_length=hop, bins_per_octave=36, cqt_mode="full")
cens = librosa.feature.chroma_cens(y=y_h, sr=sr, hop_length=hop)
chroma = norm(0.65*cqt + 0.35*cens)
_, beats = librosa.beat.beat_track(y=y_h, sr=sr, hop_length=hop)
if beats is not None and len(beats)>2:
chroma = librosa.util.sync(chroma, beats, aggregate=np.mean)
chroma = chroma / (np.linalg.norm(chroma, axis=0, keepdims=True)+1e-12)
return np.mean(chroma, axis=1)
def score_key(pcp: np.ndarray, prof_major: np.ndarray, prof_minor: np.ndarray):
pcp = norm(pcp)
best_score, best_mode, best_tonic = -1.0, "major", 0
all_scores = []
for i in range(12):
sM = float(np.dot(pcp, norm(roll(prof_major, -i))))
sm = float(np.dot(pcp, norm(roll(prof_minor, -i))))
all_scores += [sM, sm]
if sM>best_score: best_score, best_mode, best_tonic = sM, "major", i
if sm>best_score: best_score, best_mode, best_tonic = sm, "minor", i
all_scores = np.array(all_scores)
margin = (np.sort(all_scores)[-1]-np.sort(all_scores)[-2])/(np.max(all_scores)+1e-12)
confidence = float(np.clip(margin, 0.0, 1.0))
return best_mode, best_tonic, confidence
def estimate_key(y: np.ndarray, sr: int):
pcp = beat_sync_pcp(y, sr)
m1, t1, c1 = score_key(pcp, KS_MAJOR, KS_MINOR)
m2, t2, c2 = score_key(pcp, TP_MAJOR, TP_MINOR)
if (m1==m2) and (t1==t2):
mode, tonic, conf = m1, t1, float(np.clip(0.5*(c1+c2)+0.3, 0.0, 1.0))
else:
mode, tonic, conf = (m1, t1, c1) if c1>=c2 else (m2, t2, c2)
name = f"{tonic_from_index(tonic)} {mode}"
return name, mode, conf, tonic
# ---------- Extras ----------
def robust_scale(x, lo, hi): return float(np.clip((x-lo)/(hi-lo+1e-12), 0.0, 1.0))
def estimate_extras(y: np.ndarray, sr: int, bpm: float, mode: str) -> Dict[str, float]:
rms = librosa.feature.rms(y=y, frame_length=2048, hop_length=512).squeeze()
energy = robust_scale(float(np.mean(rms)), 0.01, 0.2)
try:
plp = librosa.beat.plp(y=y, sr=sr); pulse = float(np.mean(plp))
except Exception:
pulse = 0.5
tempo_pref = math.exp(-((bpm-118.0)/50.0)**2)
dance = 0.6*tempo_pref + 0.4*pulse
centroid = librosa.feature.spectral_centroid(y=y, sr=sr).squeeze()
bright = float(np.mean(centroid))/(sr/2.0+1e-12); bright = np.clip(bright,0,1)
happy = 0.5*bright + 0.3*math.exp(-((bpm-120.0)/60.0)**2) + (0.2 if mode=="major" else 0.0)
return {"Energy":round(energy*100,1), "Danceability":round(np.clip(dance,0,1)*100,1), "Happiness":round(np.clip(happy,0,1)*100,1)}
# ---------- Core ----------
def analyze_one(path: str, max_duration_s: float = 300.0) -> Dict[str, str]:
fn = os.path.basename(path)
try:
y, sr = load_audio_any(path, sr=22050, duration=max_duration_s)
except Exception as e:
return {"File Name": fn, "Key": f"Error: {e}", "Alt Key": "", "BPM": "", "Energy": "", "Danceability": "", "Happiness": ""}
y, _ = librosa.effects.trim(y, top_db=40)
if y.size == 0:
return {"File Name": fn, "Key": "N/A", "Alt Key": "", "BPM": "N/A", "Energy": "N/A", "Danceability": "N/A", "Happiness": "N/A"}
bpm_val, _ = pick_best_bpm(y, sr, hop=512)
bpm_disp = int(round(bpm_val)) if bpm_val>0 else "N/A"
key_name, mode, _, tonic = estimate_key(y, sr)
camelot_code = camelot(tonic_from_index(tonic), mode)
extras = estimate_extras(y, sr, bpm_val if bpm_val>0 else 120.0, mode)
return {"File Name": fn, "Key": key_name, "Alt Key": camelot_code, "BPM": bpm_disp,
"Energy": extras["Energy"], "Danceability": extras["Danceability"], "Happiness": extras["Happiness"]}
def analyze_batch(files: List[str], save_results: bool, search: str):
if not files:
return pd.DataFrame(columns=["File Name","Key","Alt Key","BPM","Energy","Danceability","Happiness"]), None
rows = []
for f in files:
try:
rows.append(analyze_one(f))
except Exception as e:
rows.append({"File Name": os.path.basename(f), "Key": f"Error: {e}", "Alt Key": "", "BPM": "", "Energy": "", "Danceability": "", "Happiness": ""})
df = pd.DataFrame(rows, columns=["File Name","Key","Alt Key","BPM","Energy","Danceability","Happiness"])
if search and search.strip():
mask = df.apply(lambda col: col.astype(str).str.contains(search.strip(), case=False, na=False))
df = df[mask.any(axis=1)]
csv_file = None
if save_results and len(df):
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
df.to_csv(tmp.name, index=False, encoding="utf-8")
csv_file = tmp.name
return df, csv_file
# ---------- UI ----------
CSS = """
#app-title { font-weight: 700; font-size: 28px; }
.small-note { font-size: 12px; opacity: 0.8; }
th, td { text-align: left !important; }
"""
with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
gr.Markdown("<div id='app-title'>Audio Key & BPM Finder — Robust Loader</div>")
gr.Markdown("Upload MP3/WAV/M4A, etc. This Space installs **FFmpeg** and falls back to pydub if needed. "
"Outputs **Key**, **Camelot (Alt Key)**, **BPM**, plus **Energy/Danceability/Happiness**.")
with gr.Row():
files = gr.File(label="Audio Files", file_count="multiple", type="filepath")
with gr.Row():
search = gr.Textbox(label="Search (filter any column)", placeholder="Type to filter…", scale=3)
save = gr.Checkbox(label="Save results as CSV", value=False, scale=1)
run = gr.Button("Analyze", variant="primary", scale=1)
out_df = gr.Dataframe(headers=["File Name","Key","Alt Key","BPM","Energy","Danceability","Happiness"],
interactive=False, wrap=True, label="Results")
out_csv = gr.File(label="Download CSV", visible=True)
run.click(analyze_batch, inputs=[files, save, search], outputs=[out_df, out_csv])
if __name__ == "__main__":
demo.launch()
|