Spaces:

Diggz10
/

bpm

Sleeping

App Files Files Community

bpm / app.py

Diggz10

Update app.py

7f608b7 verified 6 months ago

raw

history blame contribute delete

10.6 kB

	import os, io, math, tempfile, warnings
	from typing import Dict, List, Tuple

	import gradio as gr
	import numpy as np
	import pandas as pd
	import librosa
	from pydub import AudioSegment

	warnings.filterwarnings("ignore", category=UserWarning)
	warnings.filterwarnings("ignore", category=FutureWarning)

	# ---------- Key profiles ----------
	KS_MAJOR = np.array([6.35,2.23,3.48,2.33,4.38,4.09,2.52,5.19,2.39,3.66,2.29,2.88], float)
	KS_MINOR = np.array([6.33,2.68,3.52,5.38,2.60,3.53,2.54,4.75,3.98,2.69,3.34,3.17], float)

	TP_MAJOR = np.array([0.748,0.060,0.488,0.082,0.670,0.460,0.096,0.715,0.104,0.366,0.057,0.400], float)*10
	TP_MINOR = np.array([0.712,0.084,0.474,0.618,0.049,0.460,0.105,0.670,0.461,0.044,0.373,0.330], float)*10

	PITCHES_FLAT = ['C','Db','D','Eb','E','F','Gb','G','Ab','A','Bb','B']
	CAMELOT_MAJOR = {'B':'1B','F#':'2B','Gb':'2B','Db':'3B','C#':'3B','Ab':'4B','Eb':'5B','Bb':'6B','F':'7B','C':'8B','G':'9B','D':'10B','A':'11B','E':'12B'}
	CAMELOT_MINOR = {'Ab':'1A','G#':'1A','Eb':'2A','D#':'2A','Bb':'3A','A#':'3A','F':'4A','C':'5A','G':'6A','D':'7A','A':'8A','E':'9A','B':'10A','F#':'11A','Gb':'11A','Db':'12A','C#':'12A'}

	def roll(a, k): return np.roll(a, k)
	def norm(v): return v/(np.linalg.norm(v)+1e-12)
	def tonic_from_index(i:int)->str: return PITCHES_FLAT[i%12]
	def camelot(tonic:str, mode:str)->str: return (CAMELOT_MAJOR if mode=="major" else CAMELOT_MINOR).get(tonic,"")

	# ---------- Robust audio loader (fixes “unsupported type/codec”) ----------
	def load_audio_any(path: str, sr: int = 22050, duration: float = 300.0):
	"""
	Try librosa (audioread/ffmpeg). If it fails (unsupported type/codec),
	use pydub+ffmpeg to decode to WAV in-memory, then load.
	"""
	try:
	y, sr_out = librosa.load(path, sr=sr, mono=True, duration=duration)
	return y, sr_out
	except Exception:
	# Fallback: decode via pydub -> WAV bytes
	seg = AudioSegment.from_file(path) # needs ffmpeg (installed via apt.txt)
	if duration:
	seg = seg[: int(duration * 1000)]
	buf = io.BytesIO()
	seg.export(buf, format="wav")
	buf.seek(0)
	y, sr_out = librosa.load(buf, sr=sr, mono=True)
	return y, sr_out

	# ---------- BPM (consensus + half/double correction) ----------
	def pick_best_bpm(y: np.ndarray, sr: int, hop: int = 512) -> Tuple[float, float]:
	onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop, aggregate=np.median)

	ac = librosa.autocorrelate(onset_env, max_size=onset_env.size//2)
	lags = np.arange(1, len(ac))
	bpms_ac = 60.0sr/(lagshop)
	mask = (bpms_ac>=60)&(bpms_ac<=200)
	ac_vals = ac[1:][mask]; bpms_ac = bpms_ac[mask]
	bpm_ac = float(bpms_ac[np.argmax(ac_vals)]) if len(bpms_ac) else 0.0
	conf_ac = float(np.max(ac_vals)/(np.sum(ac_vals)+1e-12)) if len(ac_vals) else 0.0

	tempi = librosa.beat.tempo(onset_envelope=onset_env, sr=sr, hop_length=hop, aggregate=None)
	if tempi is not None and len(tempi):
	t = tempi[(tempi>=60)&(tempi<=200)]
	if len(t):
	hist, _ = np.histogram(t, bins=np.arange(60,202,1))
	bpm_tg = float(60 + np.argmax(hist)); conf_tg = float(np.max(hist)/(np.sum(hist)+1e-12))
	else: bpm_tg, conf_tg = 0.0, 0.0
	else: bpm_tg, conf_tg = 0.0, 0.0

	tempo_bt, beats = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr, hop_length=hop)
	bpm_bt = float(tempo_bt); conf_bt = 0.5 if beats is not None and len(beats)>8 else 0.1

	candidates = [b for b in [bpm_ac, bpm_tg, bpm_bt] if 30<b<240]
	if not candidates: return max(bpm_bt,0.0), 0.0

	expanded = [b for x in candidates for b in (x/2, x, x*2) if 60<=b<=200]

	def align_score(bpm_val: float) -> float:
	period = (60.0/bpm_val)*sr/hop
	start = int(np.argmax(onset_env))
	frames = np.round(np.arange(start, len(onset_env), period)).astype(int)
	frames = frames[frames<len(onset_env)]
	s = 0.0
	for f in frames:
	lo=max(0,f-2); hi=min(len(onset_env), f+3)
	s += float(np.max(onset_env[lo:hi]))
	return s/(len(frames)+1e-12)

	scored = [(b, align_score(b)) for b in expanded]
	best_bpm, best_s = max(scored, key=lambda x:x[1])
	agree = np.mean([min(best_bpm,c)/max(best_bpm,c) for c in candidates])
	conf = float(np.clip(0.7(best_s/(np.max(onset_env)+1e-12)) + 0.3agree, 0.0, 1.0))
	return best_bpm, conf

	# ---------- Key (beat-sync CQT+CENS, dual-profile vote) ----------
	def beat_sync_pcp(y: np.ndarray, sr: int, hop: int = 512) -> np.ndarray:
	y_h, _ = librosa.effects.hpss(y)
	cqt = librosa.feature.chroma_cqt(y=y_h, sr=sr, hop_length=hop, bins_per_octave=36, cqt_mode="full")
	cens = librosa.feature.chroma_cens(y=y_h, sr=sr, hop_length=hop)
	chroma = norm(0.65cqt + 0.35cens)

	_, beats = librosa.beat.beat_track(y=y_h, sr=sr, hop_length=hop)
	if beats is not None and len(beats)>2:
	chroma = librosa.util.sync(chroma, beats, aggregate=np.mean)
	chroma = chroma / (np.linalg.norm(chroma, axis=0, keepdims=True)+1e-12)
	return np.mean(chroma, axis=1)

	def score_key(pcp: np.ndarray, prof_major: np.ndarray, prof_minor: np.ndarray):
	pcp = norm(pcp)
	best_score, best_mode, best_tonic = -1.0, "major", 0
	all_scores = []
	for i in range(12):
	sM = float(np.dot(pcp, norm(roll(prof_major, -i))))
	sm = float(np.dot(pcp, norm(roll(prof_minor, -i))))
	all_scores += [sM, sm]
	if sM>best_score: best_score, best_mode, best_tonic = sM, "major", i
	if sm>best_score: best_score, best_mode, best_tonic = sm, "minor", i
	all_scores = np.array(all_scores)
	margin = (np.sort(all_scores)[-1]-np.sort(all_scores)[-2])/(np.max(all_scores)+1e-12)
	confidence = float(np.clip(margin, 0.0, 1.0))
	return best_mode, best_tonic, confidence

	def estimate_key(y: np.ndarray, sr: int):
	pcp = beat_sync_pcp(y, sr)
	m1, t1, c1 = score_key(pcp, KS_MAJOR, KS_MINOR)
	m2, t2, c2 = score_key(pcp, TP_MAJOR, TP_MINOR)

	if (m1==m2) and (t1==t2):
	mode, tonic, conf = m1, t1, float(np.clip(0.5*(c1+c2)+0.3, 0.0, 1.0))
	else:
	mode, tonic, conf = (m1, t1, c1) if c1>=c2 else (m2, t2, c2)

	name = f"{tonic_from_index(tonic)} {mode}"
	return name, mode, conf, tonic

	# ---------- Extras ----------
	def robust_scale(x, lo, hi): return float(np.clip((x-lo)/(hi-lo+1e-12), 0.0, 1.0))
	def estimate_extras(y: np.ndarray, sr: int, bpm: float, mode: str) -> Dict[str, float]:
	rms = librosa.feature.rms(y=y, frame_length=2048, hop_length=512).squeeze()
	energy = robust_scale(float(np.mean(rms)), 0.01, 0.2)
	try:
	plp = librosa.beat.plp(y=y, sr=sr); pulse = float(np.mean(plp))
	except Exception:
	pulse = 0.5
	tempo_pref = math.exp(-((bpm-118.0)/50.0)**2)
	dance = 0.6tempo_pref + 0.4pulse
	centroid = librosa.feature.spectral_centroid(y=y, sr=sr).squeeze()
	bright = float(np.mean(centroid))/(sr/2.0+1e-12); bright = np.clip(bright,0,1)
	happy = 0.5bright + 0.3math.exp(-((bpm-120.0)/60.0)**2) + (0.2 if mode=="major" else 0.0)
	return {"Energy":round(energy100,1), "Danceability":round(np.clip(dance,0,1)100,1), "Happiness":round(np.clip(happy,0,1)*100,1)}

	# ---------- Core ----------
	def analyze_one(path: str, max_duration_s: float = 300.0) -> Dict[str, str]:
	fn = os.path.basename(path)
	try:
	y, sr = load_audio_any(path, sr=22050, duration=max_duration_s)
	except Exception as e:
	return {"File Name": fn, "Key": f"Error: {e}", "Alt Key": "", "BPM": "", "Energy": "", "Danceability": "", "Happiness": ""}

	y, _ = librosa.effects.trim(y, top_db=40)
	if y.size == 0:
	return {"File Name": fn, "Key": "N/A", "Alt Key": "", "BPM": "N/A", "Energy": "N/A", "Danceability": "N/A", "Happiness": "N/A"}

	bpm_val, _ = pick_best_bpm(y, sr, hop=512)
	bpm_disp = int(round(bpm_val)) if bpm_val>0 else "N/A"

	key_name, mode, _, tonic = estimate_key(y, sr)
	camelot_code = camelot(tonic_from_index(tonic), mode)

	extras = estimate_extras(y, sr, bpm_val if bpm_val>0 else 120.0, mode)

	return {"File Name": fn, "Key": key_name, "Alt Key": camelot_code, "BPM": bpm_disp,
	"Energy": extras["Energy"], "Danceability": extras["Danceability"], "Happiness": extras["Happiness"]}

	def analyze_batch(files: List[str], save_results: bool, search: str):
	if not files:
	return pd.DataFrame(columns=["File Name","Key","Alt Key","BPM","Energy","Danceability","Happiness"]), None
	rows = []
	for f in files:
	try:
	rows.append(analyze_one(f))
	except Exception as e:
	rows.append({"File Name": os.path.basename(f), "Key": f"Error: {e}", "Alt Key": "", "BPM": "", "Energy": "", "Danceability": "", "Happiness": ""})
	df = pd.DataFrame(rows, columns=["File Name","Key","Alt Key","BPM","Energy","Danceability","Happiness"])
	if search and search.strip():
	mask = df.apply(lambda col: col.astype(str).str.contains(search.strip(), case=False, na=False))
	df = df[mask.any(axis=1)]

	csv_file = None
	if save_results and len(df):
	tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
	df.to_csv(tmp.name, index=False, encoding="utf-8")
	csv_file = tmp.name
	return df, csv_file

	# ---------- UI ----------
	CSS = """
	#app-title { font-weight: 700; font-size: 28px; }
	.small-note { font-size: 12px; opacity: 0.8; }
	th, td { text-align: left !important; }
	"""
	with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
	gr.Markdown("<div id='app-title'>Audio Key & BPM Finder — Robust Loader</div>")
	gr.Markdown("Upload MP3/WAV/M4A, etc. This Space installs FFmpeg and falls back to pydub if needed. "
	"Outputs Key, Camelot (Alt Key), BPM, plus Energy/Danceability/Happiness.")

	with gr.Row():
	files = gr.File(label="Audio Files", file_count="multiple", type="filepath")
	with gr.Row():
	search = gr.Textbox(label="Search (filter any column)", placeholder="Type to filter…", scale=3)
	save = gr.Checkbox(label="Save results as CSV", value=False, scale=1)
	run = gr.Button("Analyze", variant="primary", scale=1)

	out_df = gr.Dataframe(headers=["File Name","Key","Alt Key","BPM","Energy","Danceability","Happiness"],
	interactive=False, wrap=True, label="Results")
	out_csv = gr.File(label="Download CSV", visible=True)

	run.click(analyze_batch, inputs=[files, save, search], outputs=[out_df, out_csv])

	if __name__ == "__main__":
	demo.launch()