|
|
import os, io, math, tempfile, warnings |
|
|
from typing import Dict, List, Tuple |
|
|
|
|
|
import gradio as gr |
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
import librosa |
|
|
from pydub import AudioSegment |
|
|
|
|
|
warnings.filterwarnings("ignore", category=UserWarning) |
|
|
warnings.filterwarnings("ignore", category=FutureWarning) |
|
|
|
|
|
|
|
|
KS_MAJOR = np.array([6.35,2.23,3.48,2.33,4.38,4.09,2.52,5.19,2.39,3.66,2.29,2.88], float) |
|
|
KS_MINOR = np.array([6.33,2.68,3.52,5.38,2.60,3.53,2.54,4.75,3.98,2.69,3.34,3.17], float) |
|
|
|
|
|
TP_MAJOR = np.array([0.748,0.060,0.488,0.082,0.670,0.460,0.096,0.715,0.104,0.366,0.057,0.400], float)*10 |
|
|
TP_MINOR = np.array([0.712,0.084,0.474,0.618,0.049,0.460,0.105,0.670,0.461,0.044,0.373,0.330], float)*10 |
|
|
|
|
|
PITCHES_FLAT = ['C','Db','D','Eb','E','F','Gb','G','Ab','A','Bb','B'] |
|
|
CAMELOT_MAJOR = {'B':'1B','F#':'2B','Gb':'2B','Db':'3B','C#':'3B','Ab':'4B','Eb':'5B','Bb':'6B','F':'7B','C':'8B','G':'9B','D':'10B','A':'11B','E':'12B'} |
|
|
CAMELOT_MINOR = {'Ab':'1A','G#':'1A','Eb':'2A','D#':'2A','Bb':'3A','A#':'3A','F':'4A','C':'5A','G':'6A','D':'7A','A':'8A','E':'9A','B':'10A','F#':'11A','Gb':'11A','Db':'12A','C#':'12A'} |
|
|
|
|
|
def roll(a, k): return np.roll(a, k) |
|
|
def norm(v): return v/(np.linalg.norm(v)+1e-12) |
|
|
def tonic_from_index(i:int)->str: return PITCHES_FLAT[i%12] |
|
|
def camelot(tonic:str, mode:str)->str: return (CAMELOT_MAJOR if mode=="major" else CAMELOT_MINOR).get(tonic,"") |
|
|
|
|
|
|
|
|
def load_audio_any(path: str, sr: int = 22050, duration: float = 300.0): |
|
|
""" |
|
|
Try librosa (audioread/ffmpeg). If it fails (unsupported type/codec), |
|
|
use pydub+ffmpeg to decode to WAV in-memory, then load. |
|
|
""" |
|
|
try: |
|
|
y, sr_out = librosa.load(path, sr=sr, mono=True, duration=duration) |
|
|
return y, sr_out |
|
|
except Exception: |
|
|
|
|
|
seg = AudioSegment.from_file(path) |
|
|
if duration: |
|
|
seg = seg[: int(duration * 1000)] |
|
|
buf = io.BytesIO() |
|
|
seg.export(buf, format="wav") |
|
|
buf.seek(0) |
|
|
y, sr_out = librosa.load(buf, sr=sr, mono=True) |
|
|
return y, sr_out |
|
|
|
|
|
|
|
|
def pick_best_bpm(y: np.ndarray, sr: int, hop: int = 512) -> Tuple[float, float]: |
|
|
onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop, aggregate=np.median) |
|
|
|
|
|
ac = librosa.autocorrelate(onset_env, max_size=onset_env.size//2) |
|
|
lags = np.arange(1, len(ac)) |
|
|
bpms_ac = 60.0*sr/(lags*hop) |
|
|
mask = (bpms_ac>=60)&(bpms_ac<=200) |
|
|
ac_vals = ac[1:][mask]; bpms_ac = bpms_ac[mask] |
|
|
bpm_ac = float(bpms_ac[np.argmax(ac_vals)]) if len(bpms_ac) else 0.0 |
|
|
conf_ac = float(np.max(ac_vals)/(np.sum(ac_vals)+1e-12)) if len(ac_vals) else 0.0 |
|
|
|
|
|
tempi = librosa.beat.tempo(onset_envelope=onset_env, sr=sr, hop_length=hop, aggregate=None) |
|
|
if tempi is not None and len(tempi): |
|
|
t = tempi[(tempi>=60)&(tempi<=200)] |
|
|
if len(t): |
|
|
hist, _ = np.histogram(t, bins=np.arange(60,202,1)) |
|
|
bpm_tg = float(60 + np.argmax(hist)); conf_tg = float(np.max(hist)/(np.sum(hist)+1e-12)) |
|
|
else: bpm_tg, conf_tg = 0.0, 0.0 |
|
|
else: bpm_tg, conf_tg = 0.0, 0.0 |
|
|
|
|
|
tempo_bt, beats = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr, hop_length=hop) |
|
|
bpm_bt = float(tempo_bt); conf_bt = 0.5 if beats is not None and len(beats)>8 else 0.1 |
|
|
|
|
|
candidates = [b for b in [bpm_ac, bpm_tg, bpm_bt] if 30<b<240] |
|
|
if not candidates: return max(bpm_bt,0.0), 0.0 |
|
|
|
|
|
expanded = [b for x in candidates for b in (x/2, x, x*2) if 60<=b<=200] |
|
|
|
|
|
def align_score(bpm_val: float) -> float: |
|
|
period = (60.0/bpm_val)*sr/hop |
|
|
start = int(np.argmax(onset_env)) |
|
|
frames = np.round(np.arange(start, len(onset_env), period)).astype(int) |
|
|
frames = frames[frames<len(onset_env)] |
|
|
s = 0.0 |
|
|
for f in frames: |
|
|
lo=max(0,f-2); hi=min(len(onset_env), f+3) |
|
|
s += float(np.max(onset_env[lo:hi])) |
|
|
return s/(len(frames)+1e-12) |
|
|
|
|
|
scored = [(b, align_score(b)) for b in expanded] |
|
|
best_bpm, best_s = max(scored, key=lambda x:x[1]) |
|
|
agree = np.mean([min(best_bpm,c)/max(best_bpm,c) for c in candidates]) |
|
|
conf = float(np.clip(0.7*(best_s/(np.max(onset_env)+1e-12)) + 0.3*agree, 0.0, 1.0)) |
|
|
return best_bpm, conf |
|
|
|
|
|
|
|
|
def beat_sync_pcp(y: np.ndarray, sr: int, hop: int = 512) -> np.ndarray: |
|
|
y_h, _ = librosa.effects.hpss(y) |
|
|
cqt = librosa.feature.chroma_cqt(y=y_h, sr=sr, hop_length=hop, bins_per_octave=36, cqt_mode="full") |
|
|
cens = librosa.feature.chroma_cens(y=y_h, sr=sr, hop_length=hop) |
|
|
chroma = norm(0.65*cqt + 0.35*cens) |
|
|
|
|
|
_, beats = librosa.beat.beat_track(y=y_h, sr=sr, hop_length=hop) |
|
|
if beats is not None and len(beats)>2: |
|
|
chroma = librosa.util.sync(chroma, beats, aggregate=np.mean) |
|
|
chroma = chroma / (np.linalg.norm(chroma, axis=0, keepdims=True)+1e-12) |
|
|
return np.mean(chroma, axis=1) |
|
|
|
|
|
def score_key(pcp: np.ndarray, prof_major: np.ndarray, prof_minor: np.ndarray): |
|
|
pcp = norm(pcp) |
|
|
best_score, best_mode, best_tonic = -1.0, "major", 0 |
|
|
all_scores = [] |
|
|
for i in range(12): |
|
|
sM = float(np.dot(pcp, norm(roll(prof_major, -i)))) |
|
|
sm = float(np.dot(pcp, norm(roll(prof_minor, -i)))) |
|
|
all_scores += [sM, sm] |
|
|
if sM>best_score: best_score, best_mode, best_tonic = sM, "major", i |
|
|
if sm>best_score: best_score, best_mode, best_tonic = sm, "minor", i |
|
|
all_scores = np.array(all_scores) |
|
|
margin = (np.sort(all_scores)[-1]-np.sort(all_scores)[-2])/(np.max(all_scores)+1e-12) |
|
|
confidence = float(np.clip(margin, 0.0, 1.0)) |
|
|
return best_mode, best_tonic, confidence |
|
|
|
|
|
def estimate_key(y: np.ndarray, sr: int): |
|
|
pcp = beat_sync_pcp(y, sr) |
|
|
m1, t1, c1 = score_key(pcp, KS_MAJOR, KS_MINOR) |
|
|
m2, t2, c2 = score_key(pcp, TP_MAJOR, TP_MINOR) |
|
|
|
|
|
if (m1==m2) and (t1==t2): |
|
|
mode, tonic, conf = m1, t1, float(np.clip(0.5*(c1+c2)+0.3, 0.0, 1.0)) |
|
|
else: |
|
|
mode, tonic, conf = (m1, t1, c1) if c1>=c2 else (m2, t2, c2) |
|
|
|
|
|
name = f"{tonic_from_index(tonic)} {mode}" |
|
|
return name, mode, conf, tonic |
|
|
|
|
|
|
|
|
def robust_scale(x, lo, hi): return float(np.clip((x-lo)/(hi-lo+1e-12), 0.0, 1.0)) |
|
|
def estimate_extras(y: np.ndarray, sr: int, bpm: float, mode: str) -> Dict[str, float]: |
|
|
rms = librosa.feature.rms(y=y, frame_length=2048, hop_length=512).squeeze() |
|
|
energy = robust_scale(float(np.mean(rms)), 0.01, 0.2) |
|
|
try: |
|
|
plp = librosa.beat.plp(y=y, sr=sr); pulse = float(np.mean(plp)) |
|
|
except Exception: |
|
|
pulse = 0.5 |
|
|
tempo_pref = math.exp(-((bpm-118.0)/50.0)**2) |
|
|
dance = 0.6*tempo_pref + 0.4*pulse |
|
|
centroid = librosa.feature.spectral_centroid(y=y, sr=sr).squeeze() |
|
|
bright = float(np.mean(centroid))/(sr/2.0+1e-12); bright = np.clip(bright,0,1) |
|
|
happy = 0.5*bright + 0.3*math.exp(-((bpm-120.0)/60.0)**2) + (0.2 if mode=="major" else 0.0) |
|
|
return {"Energy":round(energy*100,1), "Danceability":round(np.clip(dance,0,1)*100,1), "Happiness":round(np.clip(happy,0,1)*100,1)} |
|
|
|
|
|
|
|
|
def analyze_one(path: str, max_duration_s: float = 300.0) -> Dict[str, str]: |
|
|
fn = os.path.basename(path) |
|
|
try: |
|
|
y, sr = load_audio_any(path, sr=22050, duration=max_duration_s) |
|
|
except Exception as e: |
|
|
return {"File Name": fn, "Key": f"Error: {e}", "Alt Key": "", "BPM": "", "Energy": "", "Danceability": "", "Happiness": ""} |
|
|
|
|
|
y, _ = librosa.effects.trim(y, top_db=40) |
|
|
if y.size == 0: |
|
|
return {"File Name": fn, "Key": "N/A", "Alt Key": "", "BPM": "N/A", "Energy": "N/A", "Danceability": "N/A", "Happiness": "N/A"} |
|
|
|
|
|
bpm_val, _ = pick_best_bpm(y, sr, hop=512) |
|
|
bpm_disp = int(round(bpm_val)) if bpm_val>0 else "N/A" |
|
|
|
|
|
key_name, mode, _, tonic = estimate_key(y, sr) |
|
|
camelot_code = camelot(tonic_from_index(tonic), mode) |
|
|
|
|
|
extras = estimate_extras(y, sr, bpm_val if bpm_val>0 else 120.0, mode) |
|
|
|
|
|
return {"File Name": fn, "Key": key_name, "Alt Key": camelot_code, "BPM": bpm_disp, |
|
|
"Energy": extras["Energy"], "Danceability": extras["Danceability"], "Happiness": extras["Happiness"]} |
|
|
|
|
|
def analyze_batch(files: List[str], save_results: bool, search: str): |
|
|
if not files: |
|
|
return pd.DataFrame(columns=["File Name","Key","Alt Key","BPM","Energy","Danceability","Happiness"]), None |
|
|
rows = [] |
|
|
for f in files: |
|
|
try: |
|
|
rows.append(analyze_one(f)) |
|
|
except Exception as e: |
|
|
rows.append({"File Name": os.path.basename(f), "Key": f"Error: {e}", "Alt Key": "", "BPM": "", "Energy": "", "Danceability": "", "Happiness": ""}) |
|
|
df = pd.DataFrame(rows, columns=["File Name","Key","Alt Key","BPM","Energy","Danceability","Happiness"]) |
|
|
if search and search.strip(): |
|
|
mask = df.apply(lambda col: col.astype(str).str.contains(search.strip(), case=False, na=False)) |
|
|
df = df[mask.any(axis=1)] |
|
|
|
|
|
csv_file = None |
|
|
if save_results and len(df): |
|
|
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv") |
|
|
df.to_csv(tmp.name, index=False, encoding="utf-8") |
|
|
csv_file = tmp.name |
|
|
return df, csv_file |
|
|
|
|
|
|
|
|
CSS = """ |
|
|
#app-title { font-weight: 700; font-size: 28px; } |
|
|
.small-note { font-size: 12px; opacity: 0.8; } |
|
|
th, td { text-align: left !important; } |
|
|
""" |
|
|
with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo: |
|
|
gr.Markdown("<div id='app-title'>Audio Key & BPM Finder — Robust Loader</div>") |
|
|
gr.Markdown("Upload MP3/WAV/M4A, etc. This Space installs **FFmpeg** and falls back to pydub if needed. " |
|
|
"Outputs **Key**, **Camelot (Alt Key)**, **BPM**, plus **Energy/Danceability/Happiness**.") |
|
|
|
|
|
with gr.Row(): |
|
|
files = gr.File(label="Audio Files", file_count="multiple", type="filepath") |
|
|
with gr.Row(): |
|
|
search = gr.Textbox(label="Search (filter any column)", placeholder="Type to filter…", scale=3) |
|
|
save = gr.Checkbox(label="Save results as CSV", value=False, scale=1) |
|
|
run = gr.Button("Analyze", variant="primary", scale=1) |
|
|
|
|
|
out_df = gr.Dataframe(headers=["File Name","Key","Alt Key","BPM","Energy","Danceability","Happiness"], |
|
|
interactive=False, wrap=True, label="Results") |
|
|
out_csv = gr.File(label="Download CSV", visible=True) |
|
|
|
|
|
run.click(analyze_batch, inputs=[files, save, search], outputs=[out_df, out_csv]) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |
|
|
|