Spaces:

AdityaK007
/

MSD

Sleeping

App Files Files Community

MSD / app_good.py

AdityaK007

Update app_good.py

16d47ae verified 3 months ago

raw

history blame contribute delete

16.9 kB

	import gradio as gr
	import librosa
	import numpy as np
	import pandas as pd
	from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
	from sklearn.preprocessing import StandardScaler
	from sklearn.metrics.pairwise import cosine_similarity
	from scipy import signal
	from scipy.signal import get_window as scipy_get_window
	import plotly.express as px
	import plotly.graph_objects as go
	import os
	import tempfile

	# ----------------------------
	# 1. Signal Alignment & Preprocessing
	# ----------------------------
	def align_signals(ref, target):
	"""Aligns target signal to reference signal using Cross-Correlation."""
	ref_norm = librosa.util.normalize(ref)
	target_norm = librosa.util.normalize(target)

	correlation = signal.fftconvolve(target_norm, ref_norm[::-1], mode='full')
	lags = signal.correlation_lags(len(target_norm), len(ref_norm), mode='full')
	lag = lags[np.argmax(correlation)]

	if lag > 0:
	aligned_target = target[lag:]
	aligned_ref = ref
	else:
	aligned_target = target
	aligned_ref = ref[abs(lag):]

	min_len = min(len(aligned_ref), len(aligned_target))
	return aligned_ref[:min_len], aligned_target[:min_len]

	# ----------------------------
	# 2. Segment Audio
	# ----------------------------
	def segment_audio(y, sr, frame_length_ms, hop_length_ms, window_type="hann"):
	frame_length = int(frame_length_ms * sr / 1000)
	hop_length = int(hop_length_ms * sr / 1000)
	window = scipy_get_window(window_type if window_type != "rectangular" else "boxcar", frame_length)
	frames = []
	y_padded = np.pad(y, (0, frame_length), mode='constant')

	for i in range(0, len(y) - frame_length + 1, hop_length):
	frame = y[i:i + frame_length] * window
	frames.append(frame)

	if frames:
	frames = np.array(frames).T
	else:
	frames = np.zeros((frame_length, 1))
	return frames, frame_length

	# ----------------------------
	# 3. Feature Extraction
	# ----------------------------
	def extract_features_with_spectrum(frames, sr):
	features = []
	n_mfcc = 13
	n_fft = min(2048, frames.shape[0])

	for i in range(frames.shape[1]):
	frame = frames[:, i]
	if len(frame) < n_fft or np.max(np.abs(frame)) < 1e-10:
	feat = {k: 0.0 for k in ["rms", "spectral_centroid", "zcr", "spectral_flatness",
	"low_freq_energy", "mid_freq_energy", "high_freq_energy"]}
	for j in range(n_mfcc): feat[f"mfcc_{j+1}"] = 0.0
	feat["spectrum"] = np.zeros((n_fft // 2 + 1, 1))
	features.append(feat)
	continue

	feat = {}
	feat["rms"] = float(np.mean(librosa.feature.rms(y=frame)[0]))
	feat["zcr"] = float(np.mean(librosa.feature.zero_crossing_rate(frame)[0]))

	try: feat["spectral_centroid"] = float(np.mean(librosa.feature.spectral_centroid(y=frame, sr=sr)[0]))
	except: feat["spectral_centroid"] = 0.0

	try: feat["spectral_flatness"] = float(np.mean(librosa.feature.spectral_flatness(y=frame)[0]))
	except: feat["spectral_flatness"] = 0.0

	try:
	mfccs = librosa.feature.mfcc(y=frame, sr=sr, n_mfcc=n_mfcc, n_fft=n_fft)
	for j in range(n_mfcc): feat[f"mfcc_{j+1}"] = float(np.mean(mfccs[j]))
	except:
	for j in range(n_mfcc): feat[f"mfcc_{j+1}"] = 0.0

	try:
	S = np.abs(librosa.stft(frame, n_fft=n_fft))
	S_db = librosa.amplitude_to_db(S, ref=np.max)
	freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
	low_mask = freqs <= 2000
	mid_mask = (freqs > 2000) & (freqs <= 4000)
	high_mask = freqs > 4000
	feat["low_freq_energy"] = float(np.mean(S_db[low_mask])) if np.any(low_mask) else -80.0
	feat["mid_freq_energy"] = float(np.mean(S_db[mid_mask])) if np.any(mid_mask) else -80.0
	feat["high_freq_energy"] = float(np.mean(S_db[high_mask])) if np.any(high_mask) else -80.0
	feat["spectrum"] = S_db
	except:
	feat["low_freq_energy"] = feat["mid_freq_energy"] = feat["high_freq_energy"] = -80.0
	feat["spectrum"] = np.zeros((n_fft // 2 + 1, 1))

	features.append(feat)
	return features

	# ----------------------------
	# 4. Frame Comparison
	# ----------------------------
	def compare_frames_enhanced(near_feats, far_feats, metrics):
	min_len = min(len(near_feats), len(far_feats))
	if min_len == 0: return pd.DataFrame({"frame_index": []})

	results = {"frame_index": list(range(min_len))}
	near_df = pd.DataFrame(near_feats[:min_len])
	far_df = pd.DataFrame(far_feats[:min_len])

	drop_cols = ["spectrum"]
	near_vec = near_df.drop(columns=drop_cols, errors="ignore").select_dtypes(include=[np.number]).values
	far_vec = far_df.drop(columns=drop_cols, errors="ignore").select_dtypes(include=[np.number]).values

	if "Euclidean Distance" in metrics:
	results["euclidean_dist"] = np.linalg.norm(near_vec - far_vec, axis=1).tolist()

	if "Cosine Similarity" in metrics:
	cos_vals = []
	for i in range(min_len):
	a, b = near_vec[i].reshape(1, -1), far_vec[i].reshape(1, -1)
	if np.all(a == 0) or np.all(b == 0): cos_vals.append(0.0)
	else: cos_vals.append(float(cosine_similarity(a, b)[0][0]))
	results["cosine_similarity"] = cos_vals

	if "High-Freq Loss Ratio" in metrics:
	loss_ratios = []
	for i in range(min_len):
	loss_ratios.append(float(near_feats[i]["high_freq_energy"] - far_feats[i]["high_freq_energy"]))
	results["high_freq_loss_db"] = loss_ratios

	overlap_scores = []
	for i in range(min_len):
	near_spec = near_feats[i]["spectrum"].flatten()
	far_spec = far_feats[i]["spectrum"].flatten()
	if np.all(near_spec == 0) or np.all(far_spec == 0): overlap_scores.append(0.0)
	else: overlap_scores.append(float(cosine_similarity(near_spec.reshape(1, -1), far_spec.reshape(1, -1))[0][0]))
	results["spectral_overlap"] = overlap_scores

	combined = []
	for i in range(min_len):
	score = (results["spectral_overlap"][i] * 0.5)
	if "cosine_similarity" in results: score += (results["cosine_similarity"][i] * 0.5)
	combined.append(score)
	results["combined_match_score"] = combined

	return pd.DataFrame(results)

	# ----------------------------
	# 5. Dual Clustering Logic
	# ----------------------------
	def perform_dual_clustering(near_df, far_df, cluster_features, algo, n_clusters, eps):
	"""
	Fits clustering on Near Field (clean), then predicts on Far Field (noisy).
	This ensures Cluster 0 in Near corresponds to the same physical sound in Far.
	"""
	if not cluster_features:
	return near_df, far_df

	valid_features = [f for f in cluster_features if f in near_df.columns]
	if not valid_features:
	return near_df, far_df

	X_near = near_df[valid_features].values
	X_near = np.nan_to_num(X_near)

	X_far = far_df[valid_features].values
	X_far = np.nan_to_num(X_far)

	# We use a Scaler to ensure features are comparable
	scaler = StandardScaler()
	X_near_scaled = scaler.fit_transform(X_near)
	X_far_scaled = scaler.transform(X_far) # Use same scaler for Far

	if algo == "KMeans":
	model = KMeans(n_clusters=min(n_clusters, len(X_near)), random_state=42, n_init=10)
	near_labels = model.fit_predict(X_near_scaled)
	far_labels = model.predict(X_far_scaled) # Predict using Near model
	elif algo == "Agglomerative":
	# Agglomerative cannot "predict" on new data easily, so we cluster independently
	# This is a limitation, but acceptable fallback
	model = AgglomerativeClustering(n_clusters=min(n_clusters, len(X_near)))
	near_labels = model.fit_predict(X_near_scaled)
	far_model = AgglomerativeClustering(n_clusters=min(n_clusters, len(X_far)))
	far_labels = far_model.fit_predict(X_far_scaled)
	elif algo == "DBSCAN":
	# DBSCAN also cannot "predict", must fit_predict.
	model = DBSCAN(eps=eps, min_samples=3)
	near_labels = model.fit_predict(X_near_scaled)
	far_labels = model.fit_predict(X_far_scaled)
	else:
	near_labels = np.zeros(len(X_near))
	far_labels = np.zeros(len(X_far))

	near_df = near_df.copy()
	near_df["cluster"] = near_labels
	near_df["cluster"] = near_df["cluster"].astype(str) # For categorical coloring

	far_df = far_df.copy()
	far_df["cluster"] = far_labels
	far_df["cluster"] = far_df["cluster"].astype(str)

	return near_df, far_df

	# ----------------------------
	# 6. Plotting Helpers
	# ----------------------------
	def generate_cluster_plot(df, x_attr, y_attr, title_suffix):
	if len(df) == 0 or x_attr not in df.columns or y_attr not in df.columns:
	return px.scatter(title="No Data")

	fig = px.scatter(
	df, x=x_attr, y=y_attr, color="cluster",
	title=f"Clustering Analysis ({title_suffix}): {x_attr} vs {y_attr}",
	color_discrete_sequence=px.colors.qualitative.Bold # Consistent colors
	)
	return fig

	def update_cluster_view(view_mode, near_df, far_df, cluster_features):
	if near_df is None or far_df is None:
	return px.scatter(title="Run Analysis First")

	if len(cluster_features) < 2:
	return px.scatter(title="Select at least 2 features")

	x_attr, y_attr = cluster_features[0], cluster_features[1]

	if view_mode == "Near Field":
	return generate_cluster_plot(near_df, x_attr, y_attr, "Near Field")
	else:
	return generate_cluster_plot(far_df, x_attr, y_attr, "Far Field")

	# ----------------------------
	# 7. Main Analysis
	# ----------------------------
	def analyze_audio_pair(
	near_file, far_file,
	frame_length_ms, hop_length_ms, window_type,
	comparison_metrics, cluster_features, clustering_algo, n_clusters, dbscan_eps
	):
	if not near_file or not far_file: raise gr.Error("Upload both files.")

	# Load & Align
	y_near, sr = librosa.load(near_file.name, sr=None)
	y_far, _ = librosa.load(far_file.name, sr=sr)

	y_near = librosa.util.normalize(y_near)
	y_far = librosa.util.normalize(y_far)
	y_near, y_far = align_signals(y_near, y_far)

	# Process
	frames_near, _ = segment_audio(y_near, sr, frame_length_ms, hop_length_ms, window_type)
	frames_far, _ = segment_audio(y_far, sr, frame_length_ms, hop_length_ms, window_type)

	near_feats = extract_features_with_spectrum(frames_near, sr)
	far_feats = extract_features_with_spectrum(frames_far, sr)

	# Comparison Data
	comparison_df = compare_frames_enhanced(near_feats, far_feats, comparison_metrics)

	# Clustering Data
	near_df_raw = pd.DataFrame(near_feats).drop(columns=["spectrum"], errors="ignore")
	far_df_raw = pd.DataFrame(far_feats).drop(columns=["spectrum"], errors="ignore")

	# Perform Dual Clustering
	near_clustered, far_clustered = perform_dual_clustering(
	near_df_raw, far_df_raw, cluster_features, clustering_algo, n_clusters, dbscan_eps
	)

	# 1. Comparison Plot (Dual Axis)
	plot_comparison = go.Figure()
	# Axis 1: Similarity (0-1)
	for col in ["cosine_similarity", "spectral_overlap", "combined_match_score"]:
	if col in comparison_df.columns:
	plot_comparison.add_trace(go.Scatter(x=comparison_df["frame_index"], y=comparison_df[col], name=col, yaxis="y1"))
	# Axis 2: dB Loss
	if "high_freq_loss_db" in comparison_df.columns:
	plot_comparison.add_trace(go.Scatter(x=comparison_df["frame_index"], y=comparison_df["high_freq_loss_db"],
	name="High Freq Loss (dB)", line=dict(color="red", width=1), yaxis="y2"))

	plot_comparison.update_layout(
	title="Comparison Metrics (Dual Axis)",
	yaxis=dict(title="Similarity (0-1)", range=[0, 1.1]),
	yaxis2=dict(title="Energy Diff (dB)", overlaying="y", side="right"),
	legend=dict(x=1.1, y=1)
	)

	# 2. Initial Cluster Plot (Near Field)
	init_cluster_plot = update_cluster_view("Near Field", near_clustered, far_clustered, cluster_features)

	# 3. Spectral Heatmap
	safe_idx = int(len(near_feats)/2)
	diff = near_feats[safe_idx]["spectrum"] - far_feats[safe_idx]["spectrum"]
	spec_heatmap = go.Figure(data=go.Heatmap(z=diff, colorscale='RdBu', zmid=0))
	spec_heatmap.update_layout(title=f"Spectral Diff (Frame {safe_idx})", height=350)

	# 4. Overlay Plot (Simple)
	near_clustered["match_quality"] = comparison_df["combined_match_score"]
	if len(cluster_features) > 0:
	overlay_fig = px.scatter(near_clustered, x=cluster_features[0], y="match_quality", color="cluster",
	title="Cluster vs Quality (Near Field)")
	else:
	overlay_fig = px.scatter(title="No features")

	# Return: Plots + Dataframes for State + Raw Tables
	return (plot_comparison, comparison_df,
	init_cluster_plot, near_clustered, # Table
	spec_heatmap, overlay_fig,
	near_clustered, far_clustered) # States

	def export_results(comparison_df, near_df, far_df):
	temp_dir = tempfile.mkdtemp()
	p1 = os.path.join(temp_dir, "comparison.csv")
	p2 = os.path.join(temp_dir, "near_clusters.csv")
	p3 = os.path.join(temp_dir, "far_clusters.csv")
	comparison_df.to_csv(p1, index=False)
	near_df.to_csv(p2, index=False)
	far_df.to_csv(p3, index=False)
	return [p1, p2, p3]

	# ----------------------------
	# 8. Gradio UI
	# ----------------------------
	feature_list = ["rms", "spectral_centroid", "zcr", "spectral_flatness",
	"low_freq_energy", "mid_freq_energy", "high_freq_energy"] + [f"mfcc_{i}" for i in range(1, 14)]

	with gr.Blocks(title="Audio Field Analyzer", theme=gr.themes.Soft()) as demo:
	# State storage for interactivity
	state_near_df = gr.State()
	state_far_df = gr.State()

	gr.Markdown("# 🎙️ Near vs Far Field Analyzer (Dual-Clustering)")

	with gr.Row():
	near_file = gr.File(label="Near-Field (Ref)", file_types=[".wav"])
	far_file = gr.File(label="Far-Field (Target)", file_types=[".wav"])

	with gr.Accordion("⚙️ Settings", open=False):
	frame_length_ms = gr.Slider(10, 200, value=30, label="Frame Length (ms)")
	hop_length_ms = gr.Slider(5, 100, value=15, label="Hop Length (ms)")
	window_type = gr.Dropdown(["hann", "hamming"], value="hann", label="Window")

	comparison_metrics = gr.CheckboxGroup(["Cosine Similarity", "High-Freq Loss Ratio"],
	value=["Cosine Similarity", "High-Freq Loss Ratio"], label="Metrics")

	cluster_features = gr.CheckboxGroup(feature_list, value=["spectral_centroid", "spectral_flatness"],
	label="Clustering Features")

	clustering_algo = gr.Dropdown(["KMeans", "Agglomerative"], value="KMeans", label="Algorithm")
	n_clusters = gr.Slider(2, 10, value=4, step=1, label="Clusters")
	dbscan_eps = gr.Slider(0.1, 5.0, value=0.5, visible=False)

	btn = gr.Button("🚀 Analyze", variant="primary")

	with gr.Tabs():
	with gr.Tab("📈 Comparison"):
	comp_plot = gr.Plot()
	comp_table = gr.Dataframe()

	with gr.Tab("🧩 Phoneme Clustering"):
	with gr.Row():
	# TOGGLE SWITCH
	view_toggle = gr.Radio(["Near Field", "Far Field"], value="Near Field", label="View Mode")
	cluster_plot = gr.Plot()
	cluster_table = gr.Dataframe()

	with gr.Tab("🔍 Spectral"):
	spec_heatmap = gr.Plot()
	with gr.Tab("🧭 Overlay"):
	overlay_plot = gr.Plot()

	with gr.Tab("📤 Export"):
	export_btn = gr.Button("Download CSVs")
	export_files = gr.Files()

	# Main Analysis Event
	btn.click(
	fn=analyze_audio_pair,
	inputs=[near_file, far_file, frame_length_ms, hop_length_ms, window_type,
	comparison_metrics, cluster_features, clustering_algo, n_clusters, dbscan_eps],
	outputs=[comp_plot, comp_table,
	cluster_plot, cluster_table,
	spec_heatmap, overlay_plot,
	state_near_df, state_far_df] # Save to State
	)

	# Toggle Event (Updates plot without re-running analysis)
	view_toggle.change(
	fn=update_cluster_view,
	inputs=[view_toggle, state_near_df, state_far_df, cluster_features],
	outputs=[cluster_plot]
	)

	export_btn.click(fn=export_results, inputs=[comp_table, state_near_df, state_far_df], outputs=export_files)

	if __name__ == "__main__":
	demo.launch()