Spaces:

AdityaK007
/

MSD

Sleeping

App Files Files Community

AdityaK007 commited on Oct 22, 2025

Commit

5b17c9f

verified ·

1 Parent(s): d1022e8

Create app1.py

Browse files

Files changed (1) hide show

app1.py +478 -0

app1.py ADDED Viewed

	@@ -0,0 +1,478 @@

+import gradio as gr
+import librosa
+import numpy as np
+import pandas as pd
+from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
+from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances
+from scipy.spatial.distance import jensenshannon
+from scipy.stats import pearsonr
+from scipy.signal import get_window as scipy_get_window
+import plotly.express as px
+import plotly.graph_objects as go
+import os
+import tempfile
+# ----------------------------
+# Fixed: Added missing segment_audio function
+# ----------------------------
+def segment_audio(y, sr, frame_length_ms, hop_length_ms, window_type="hann"):
+    """Segment audio into frames with specified windowing"""
+    frame_length = int(frame_length_ms * sr / 1000)
+    hop_length = int(hop_length_ms * sr / 1000)
+    # Get window function
+    if window_type == "rectangular":
+        window = scipy_get_window('boxcar', frame_length)
+    else:
+        window = scipy_get_window(window_type, frame_length)
+    frames = []
+    for i in range(0, len(y) - frame_length + 1, hop_length):
+        frame = y[i:i + frame_length] * window
+        frames.append(frame)
+    # Convert to 2D array (frames x samples)
+    if frames:
+        frames = np.array(frames).T
+    else:
+        # If audio is too short, create at least one frame with zero-padding
+        frames = np.zeros((frame_length, 1))
+    return frames, frame_length
+# ----------------------------
+# Enhanced Feature Extraction (with spectral bins)
+# ----------------------------
+def extract_features_with_spectrum(frames, sr):
+    features = []
+    n_mfcc = 13
+    n_fft = min(2048, frames.shape[0])  # Fixed: Ensure n_fft <= frame length
+    for i in range(frames.shape[1]):
+        frame = frames[:, i]
+        # Skip if frame is too short or silent
+        if len(frame) < n_fft or np.max(np.abs(frame)) < 1e-10:
+            continue
+        feat = {}
+        # Basic features with error handling
+        try:
+            rms = np.mean(librosa.feature.rms(y=frame)[0])
+            feat["rms"] = float(rms)
+        except:
+            feat["rms"] = 0.0
+        try:
+            sc = np.mean(librosa.feature.spectral_centroid(y=frame, sr=sr)[0])
+            feat["spectral_centroid"] = float(sc)
+        except:
+            feat["spectral_centroid"] = 0.0
+        try:
+            zcr = np.mean(librosa.feature.zero_crossing_rate(frame)[0])
+            feat["zcr"] = float(zcr)
+        except:
+            feat["zcr"] = 0.0
+        try:
+            mfccs = librosa.feature.mfcc(y=frame, sr=sr, n_mfcc=n_mfcc, n_fft=n_fft)
+            for j in range(n_mfcc):
+                feat[f"mfcc_{j+1}"] = float(np.mean(mfccs[j]))
+        except:
+            for j in range(n_mfcc):
+                feat[f"mfcc_{j+1}"] = 0.0
+        # Spectral bins for lost frequencies
+        try:
+            S = np.abs(librosa.stft(frame, n_fft=n_fft))
+            S_db = librosa.amplitude_to_db(S, ref=np.max)
+            freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
+            # Split spectrum: low (<2kHz), mid (2-4kHz), high (>4kHz)
+            low_mask = freqs <= 2000
+            mid_mask = (freqs > 2000) & (freqs <= 4000)
+            high_mask = freqs > 4000
+            feat["low_freq_energy"] = float(np.mean(S_db[low_mask])) if np.any(low_mask) else 0.0
+            feat["mid_freq_energy"] = float(np.mean(S_db[mid_mask])) if np.any(mid_mask) else 0.0
+            feat["high_freq_energy"] = float(np.mean(S_db[high_mask])) if np.any(high_mask) else 0.0
+            # Store full spectrum for later (optional)
+            feat["spectrum"] = S_db  # will be used for heatmap
+        except:
+            feat["low_freq_energy"] = 0.0
+            feat["mid_freq_energy"] = 0.0
+            feat["high_freq_energy"] = 0.0
+            feat["spectrum"] = np.zeros((n_fft // 2 + 1, 1))
+        features.append(feat)
+    # Handle case where no features were extracted
+    if not features:
+        # Create one dummy feature set to avoid errors
+        feat = {
+            "rms": 0.0, "spectral_centroid": 0.0, "zcr": 0.0,
+            "low_freq_energy": 0.0, "mid_freq_energy": 0.0, "high_freq_energy": 0.0,
+            "spectrum": np.zeros((n_fft // 2 + 1, 1))
+        }
+        for j in range(n_mfcc):
+            feat[f"mfcc_{j+1}"] = 0.0
+        features.append(feat)
+    return features
+def compare_frames_enhanced(near_feats, far_feats, metrics):
+    min_len = min(len(near_feats), len(far_feats))
+    if min_len == 0:
+        return pd.DataFrame({"frame_index": []})
+    results = {"frame_index": list(range(min_len))}
+    # Prepare vectors
+    near_df = pd.DataFrame([f for f in near_feats[:min_len]])
+    far_df = pd.DataFrame([f for f in far_feats[:min_len]])
+    # Remove non-numeric columns
+    near_vec = near_df.drop(columns=["spectrum"], errors="ignore").values
+    far_vec = far_df.drop(columns=["spectrum"], errors="ignore").values
+    # 1. Euclidean Distance
+    if "Euclidean Distance" in metrics:
+        results["euclidean_dist"] = np.linalg.norm(near_vec - far_vec, axis=1).tolist()
+    # 2. Cosine Similarity
+    if "Cosine Similarity" in metrics:
+        cos_vals = []
+        for i in range(min_len):
+            a, b = near_vec[i].reshape(1, -1), far_vec[i].reshape(1, -1)
+            # Handle zero vectors
+            if np.all(a == 0) and np.all(b == 0):
+                cos_vals.append(1.0)
+            elif np.all(a == 0) or np.all(b == 0):
+                cos_vals.append(0.0)
+            else:
+                cos_vals.append(float(cosine_similarity(a, b)[0][0]))
+        results["cosine_similarity"] = cos_vals
+    # 3. Pearson Correlation
+    if "Pearson Correlation" in metrics:
+        corr_vals = []
+        for i in range(min_len):
+            try:
+                corr, _ = pearsonr(near_vec[i], far_vec[i])
+                corr_vals.append(float(corr) if not np.isnan(corr) else 0.0)
+            except:
+                corr_vals.append(0.0)
+        results["pearson_corr"] = corr_vals
+    # 4. KL Divergence (on normalized features)
+    if "KL Divergence" in metrics:
+        kl_vals = []
+        for i in range(min_len):
+            try:
+                p = near_vec[i] - near_vec[i].min() + 1e-8
+                q = far_vec[i] - far_vec[i].min() + 1e-8
+                p /= p.sum()
+                q /= q.sum()
+                kl = np.sum(p * np.log(p / q))
+                kl_vals.append(float(kl))
+            except:
+                kl_vals.append(0.0)
+        results["kl_divergence"] = kl_vals
+    # 5. Jensen-Shannon Divergence (symmetric, safer)
+    if "Jensen-Shannon Divergence" in metrics:
+        js_vals = []
+        for i in range(min_len):
+            try:
+                p = near_vec[i] - near_vec[i].min() + 1e-8
+                q = far_vec[i] - far_vec[i].min() + 1e-8
+                p /= p.sum()
+                q /= q.sum()
+                js = jensenshannon(p, q)
+                js_vals.append(float(js))
+            except:
+                js_vals.append(0.0)
+        results["js_divergence"] = js_vals
+    # 6. Lost High Frequencies Ratio
+    if "High-Freq Loss Ratio" in metrics:
+        loss_ratios = []
+        for i in range(min_len):
+            try:
+                near_high = near_feats[i]["high_freq_energy"]
+                far_high = far_feats[i]["high_freq_energy"]
+                # Ratio: how much high-freq energy is lost (positive = loss)
+                ratio = near_high - far_high  # in dB
+                loss_ratios.append(float(ratio))
+            except:
+                loss_ratios.append(0.0)
+        results["high_freq_loss_db"] = loss_ratios
+    # 7. Spectral Centroid Shift
+    if "Spectral Centroid Shift" in metrics:
+        shifts = []
+        for i in range(min_len):
+            try:
+                shift = near_feats[i]["spectral_centroid"] - far_feats[i]["spectral_centroid"]
+                shifts.append(float(shift))
+            except:
+                shifts.append(0.0)
+        results["centroid_shift"] = shifts
+    return pd.DataFrame(results)
+def cluster_frames_custom(features_df, cluster_features, algo, n_clusters=5, eps=0.5):
+    if not cluster_features:
+        raise gr.Error("Please select at least one feature for clustering.")
+    if len(features_df) == 0:
+        features_df["cluster"] = []
+        return features_df
+    X = features_df[cluster_features].values
+    if algo == "KMeans":
+        n_clusters = min(n_clusters, len(X))  # Fixed: Cannot have more clusters than samples
+        model = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
+        labels = model.fit_predict(X)
+    elif algo == "Agglomerative":
+        n_clusters = min(n_clusters, len(X))
+        model = AgglomerativeClustering(n_clusters=n_clusters)
+        labels = model.fit_predict(X)
+    elif algo == "DBSCAN":
+        # Fixed: DBSCAN doesn't use n_clusters parameter
+        model = DBSCAN(eps=eps, min_samples=min(3, len(X)))
+        labels = model.fit_predict(X)
+    else:
+        raise ValueError("Unknown clustering algorithm")
+    features_df = features_df.copy()
+    features_df["cluster"] = labels
+    return features_df
+def plot_spectral_difference(near_feats, far_feats, frame_idx=0):
+    if not near_feats or not far_feats or frame_idx >= len(near_feats) or frame_idx >= len(far_feats):
+        # Return empty plot
+        fig = go.Figure()
+        fig.update_layout(title="No data available for spectral analysis", height=300)
+        return fig
+    near_spec = near_feats[frame_idx]["spectrum"]
+    far_spec = far_feats[frame_idx]["spectrum"]
+    # Ensure both spectrograms have the same shape
+    min_freq_bins = min(near_spec.shape[0], far_spec.shape[0])
+    near_spec = near_spec[:min_freq_bins]
+    far_spec = far_spec[:min_freq_bins]
+    diff = near_spec - far_spec  # positive = energy lost in far-field
+    fig = go.Figure(data=go.Heatmap(
+        z=diff,  # Fixed: Removed extra list brackets
+        colorscale='RdBu',
+        zmid=0,
+        colorbar=dict(title="dB Difference")
+    ))
+    fig.update_layout(
+        title=f"Spectral Difference (Frame {frame_idx}): Near - Far",
+        xaxis_title="Time Frames",
+        yaxis_title="Frequency Bins",
+        height=300
+    )
+    return fig
+# ----------------------------
+# Main Analysis Function
+# ----------------------------
+def analyze_audio_pair(
+    near_file,
+    far_file,
+    frame_length_ms,
+    hop_length_ms,
+    window_type,
+    comparison_metrics,
+    cluster_features,
+    clustering_algo,
+    n_clusters,
+    dbscan_eps
+):
+    if not near_file or not far_file:
+        raise gr.Error("Upload both audio files.")
+    try:
+        # Fixed: Use librosa.load instead of non-existent librosa.load_audio
+        y_near, sr_near = librosa.load(near_file.name, sr=None)
+        y_far, sr_far = librosa.load(far_file.name, sr=None)
+    except Exception as e:
+        raise gr.Error(f"Error loading audio files: {str(e)}")
+    if sr_near != sr_far:
+        y_far = librosa.resample(y_far, orig_sr=sr_far, target_sr=sr_near)
+        sr = sr_near
+    else:
+        sr = sr_near
+    frames_near, frame_length = segment_audio(y_near, sr, frame_length_ms, hop_length_ms, window_type)
+    frames_far, _ = segment_audio(y_far, sr, frame_length_ms, hop_length_ms, window_type)
+    near_feats = extract_features_with_spectrum(frames_near, sr)
+    far_feats = extract_features_with_spectrum(frames_far, sr)
+    # Comparison
+    comparison_df = compare_frames_enhanced(near_feats, far_feats, comparison_metrics)
+    # Clustering (on near-field)
+    near_df = pd.DataFrame(near_feats)
+    near_df = near_df.drop(columns=["spectrum"], errors="ignore")
+    clustered_df = cluster_frames_custom(near_df, cluster_features, clustering_algo, n_clusters, dbscan_eps)
+    # Plots
+    plot_comparison = None
+    if comparison_df.shape[1] > 1 and len(comparison_df) > 0:
+        metric_cols = [col for col in comparison_df.columns if col != "frame_index"]
+        if metric_cols:
+            metric_to_plot = metric_cols[0]
+            plot_comparison = px.line(
+                comparison_df,
+                x="frame_index",
+                y=metric_to_plot,
+                title=f"{metric_to_plot.replace('_', ' ').title()} Over Time"
+            )
+        else:
+            plot_comparison = px.line(title="No comparison metrics available")
+    else:
+        plot_comparison = px.line(title="No comparison data available")
+    # Scatter: user-selected features
+    plot_scatter = None
+    if len(cluster_features) >= 2 and len(clustered_df) > 0:
+        x_feat, y_feat = cluster_features[0], cluster_features[1]
+        if x_feat in clustered_df.columns and y_feat in clustered_df.columns:
+            plot_scatter = px.scatter(
+                clustered_df,
+                x=x_feat,
+                y=y_feat,
+                color="cluster",
+                title=f"Clustering: {x_feat} vs {y_feat}",
+                hover_data=["cluster"]
+            )
+        else:
+            plot_scatter = px.scatter(title="Selected features not available in data")
+    else:
+        plot_scatter = px.scatter(title="Select ≥2 features for scatter plot")
+    # Spectral difference heatmap (first frame)
+    spec_heatmap = plot_spectral_difference(near_feats, far_feats, frame_idx=0)
+    return (
+        plot_comparison,
+        comparison_df,
+        plot_scatter,
+        clustered_df,
+        spec_heatmap
+    )
+def export_results(comparison_df, clustered_df):
+    temp_dir = tempfile.mkdtemp()
+    comp_path = os.path.join(temp_dir, "frame_comparisons.csv")
+    cluster_path = os.path.join(temp_dir, "clustered_frames.csv")
+    comparison_df.to_csv(comp_path, index=False)
+    clustered_df.to_csv(cluster_path, index=False)
+    return [comp_path, cluster_path]
+# ----------------------------
+# Gradio UI
+# ----------------------------
+# Get feature names dynamically
+dummy_features = ["rms", "spectral_centroid", "zcr"] + [f"mfcc_{i}" for i in range(1,14)] + \
+                 ["low_freq_energy", "mid_freq_energy", "high_freq_energy"]
+with gr.Blocks(title="Advanced Near vs Far Field Analyzer") as demo:
+    gr.Markdown("# 🎙️ Advanced Near vs Far Field Speech Analyzer")
+    gr.Markdown("Upload simultaneous recordings. Analyze **lost frequencies**, **frame degradation**, and **cluster by custom attributes**.")
+    with gr.Row():
+        near_file = gr.File(label="Near-Field Audio (.wav)", file_types=[".wav"])
+        far_file = gr.File(label="Far-Field Audio (.wav)", file_types=[".wav"])
+    with gr.Accordion("⚙️ Frame Settings", open=True):
+        frame_length_ms = gr.Slider(10, 500, value=50, step=1, label="Frame Length (ms)")
+        hop_length_ms = gr.Slider(1, 250, value=25, step=1, label="Hop Length (ms)")
+        window_type = gr.Dropdown(["hann", "hamming", "rectangular"], value="hann", label="Window Type")
+    with gr.Accordion("📊 Comparison Metrics", open=True):
+        comparison_metrics = gr.CheckboxGroup(
+            choices=[
+                "Euclidean Distance",
+                "Cosine Similarity",
+                "Pearson Correlation",
+                "KL Divergence",
+                "Jensen-Shannon Divergence",
+                "High-Freq Loss Ratio",
+                "Spectral Centroid Shift"
+            ],
+            value=["High-Freq Loss Ratio", "Cosine Similarity"],
+            label="Select Comparison Metrics"
+        )
+    with gr.Accordion("🧩 Clustering Configuration", open=True):
+        cluster_features = gr.CheckboxGroup(
+            choices=dummy_features,
+            value=["rms", "spectral_centroid", "high_freq_energy"],
+            label="Features to Use for Clustering"
+        )
+        clustering_algo = gr.Radio(
+            ["KMeans", "Agglomerative", "DBSCAN"],
+            value="KMeans",
+            label="Clustering Algorithm"
+        )
+        n_clusters = gr.Slider(2, 20, value=5, step=1, label="Number of Clusters (for KMeans/Agglomerative)")
+        dbscan_eps = gr.Slider(0.1, 2.0, value=0.5, step=0.1, label="DBSCAN eps (neighborhood radius)")
+    btn = gr.Button("🚀 Analyze")
+    with gr.Tabs():
+        with gr.Tab("📈 Frame Comparison"):
+            comp_plot = gr.Plot()
+            comp_table = gr.Dataframe()
+        with gr.Tab("🧩 Clustering"):
+            cluster_plot = gr.Plot()
+            cluster_table = gr.Dataframe()
+        with gr.Tab("🔍 Spectral Analysis"):
+            spec_heatmap = gr.Plot(label="Spectral Difference (Near - Far)")
+    with gr.Tab("📤 Export"):
+        export_btn = gr.Button("💾 Download CSVs")
+        export_files = gr.Files()
+    btn.click(
+        fn=analyze_audio_pair,
+        inputs=[
+            near_file, far_file,
+            frame_length_ms, hop_length_ms, window_type,
+            comparison_metrics,
+            cluster_features,
+            clustering_algo,
+            n_clusters,
+            dbscan_eps
+        ],
+        outputs=[comp_plot, comp_table, cluster_plot, cluster_table, spec_heatmap]
+    )
+    export_btn.click(
+        fn=export_results,
+        inputs=[comp_table, cluster_table],
+        outputs=export_files
+    )
+if __name__ == "__main__":
+    demo.launch()