Spaces:

AdityaK007
/

MSD

Sleeping

App Files Files Community

AdityaK007 commited on Nov 28, 2025

Commit

d0d071f

verified ·

1 Parent(s): a2f0ea1

Update app.py

Browse files

Files changed (1) hide show

app.py +246 -133

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ import pandas as pd
 from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
 from sklearn.metrics.pairwise import cosine_similarity
 from scipy.spatial.distance import jensenshannon
-from scipy.stats import pearsonr
 from scipy.signal import get_window as scipy_get_window
 import plotly.express as px
 import plotly.graph_objects as go
@@ -13,16 +13,58 @@ import os
 import tempfile
 # ----------------------------
-# Segment Audio into Frames
 # ----------------------------
 def segment_audio(y, sr, frame_length_ms, hop_length_ms, window_type="hann"):
     frame_length = int(frame_length_ms * sr / 1000)
     hop_length = int(hop_length_ms * sr / 1000)
     window = scipy_get_window(window_type if window_type != "rectangular" else "boxcar", frame_length)
     frames = []
     for i in range(0, len(y) - frame_length + 1, hop_length):
         frame = y[i:i + frame_length] * window
         frames.append(frame)
     if frames:
         frames = np.array(frames).T
     else:
@@ -30,58 +72,72 @@ def segment_audio(y, sr, frame_length_ms, hop_length_ms, window_type="hann"):
     return frames, frame_length
 # ----------------------------
-# Feature Extraction
 # ----------------------------
 def extract_features_with_spectrum(frames, sr):
     features = []
     n_mfcc = 13
     n_fft = min(2048, frames.shape[0])
     for i in range(frames.shape[1]):
         frame = frames[:, i]
         if len(frame) < n_fft or np.max(np.abs(frame)) < 1e-10:
             continue
         feat = {}
-        try:
-            feat["rms"] = float(np.mean(librosa.feature.rms(y=frame)[0]))
-        except: feat["rms"] = 0.0
         try:
             feat["spectral_centroid"] = float(np.mean(librosa.feature.spectral_centroid(y=frame, sr=sr)[0]))
         except: feat["spectral_centroid"] = 0.0
         try:
-            feat["zcr"] = float(np.mean(librosa.feature.zero_crossing_rate(frame)[0]))
-        except: feat["zcr"] = 0.0
         try:
             mfccs = librosa.feature.mfcc(y=frame, sr=sr, n_mfcc=n_mfcc, n_fft=n_fft)
             for j in range(n_mfcc):
                 feat[f"mfcc_{j+1}"] = float(np.mean(mfccs[j]))
         except:
-            for j in range(n_mfcc):
-                feat[f"mfcc_{j+1}"] = 0.0
         try:
             S = np.abs(librosa.stft(frame, n_fft=n_fft))
             S_db = librosa.amplitude_to_db(S, ref=np.max)
             freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
             low_mask = freqs <= 2000
             mid_mask = (freqs > 2000) & (freqs <= 4000)
             high_mask = freqs > 4000
-            feat["low_freq_energy"] = float(np.mean(S_db[low_mask])) if np.any(low_mask) else 0.0
-            feat["mid_freq_energy"] = float(np.mean(S_db[mid_mask])) if np.any(mid_mask) else 0.0
-            feat["high_freq_energy"] = float(np.mean(S_db[high_mask])) if np.any(high_mask) else 0.0
             feat["spectrum"] = S_db
         except:
-            feat["low_freq_energy"] = feat["mid_freq_energy"] = feat["high_freq_energy"] = 0.0
             feat["spectrum"] = np.zeros((n_fft // 2 + 1, 1))
         features.append(feat)
-    if not features:
-        feat = { "rms": 0.0, "spectral_centroid": 0.0, "zcr": 0.0,
-                 "low_freq_energy": 0.0, "mid_freq_energy": 0.0, "high_freq_energy": 0.0,
-                 "spectrum": np.zeros((n_fft // 2 + 1, 1)) }
-        for j in range(n_mfcc): feat[f"mfcc_{j+1}"] = 0.0
-        features.append(feat)
     return features
 # ----------------------------
-# Frame Comparison (core metrics)
 # ----------------------------
 def compare_frames_enhanced(near_feats, far_feats, metrics):
     min_len = min(len(near_feats), len(far_feats))
@@ -91,12 +147,16 @@ def compare_frames_enhanced(near_feats, far_feats, metrics):
     results = {"frame_index": list(range(min_len))}
     near_df = pd.DataFrame([f for f in near_feats[:min_len]])
     far_df = pd.DataFrame([f for f in far_feats[:min_len]])
-    near_vec = near_df.drop(columns=["spectrum"], errors="ignore").values
-    far_vec = far_df.drop(columns=["spectrum"], errors="ignore").values
     # Euclidean Distance
     if "Euclidean Distance" in metrics:
         results["euclidean_dist"] = np.linalg.norm(near_vec - far_vec, axis=1).tolist()
     # Cosine Similarity
     if "Cosine Similarity" in metrics:
         cos_vals = []
@@ -107,35 +167,28 @@ def compare_frames_enhanced(near_feats, far_feats, metrics):
             else:
                 cos_vals.append(float(cosine_similarity(a, b)[0][0]))
         results["cosine_similarity"] = cos_vals
-    # High-Freq Loss Ratio (Quality)
     if "High-Freq Loss Ratio" in metrics:
         loss_ratios = []
         for i in range(min_len):
             near_high = near_feats[i]["high_freq_energy"]
             far_high = far_feats[i]["high_freq_energy"]
-            ratio = max(0.0, 1.0 - abs(near_high - far_high) / (abs(near_high) + 1e-6))
-            loss_ratios.append(float(ratio))
-        results["high_freq_quality"] = loss_ratios
-    # 🔹 Energy Ratio
-    energy_ratio = []
     for i in range(min_len):
-        near_rms = near_feats[i]["rms"]; far_rms = far_feats[i]["rms"]
-        ratio = (far_rms + 1e-6) / (near_rms + 1e-6)
-        energy_ratio.append(float(np.clip(ratio, 0, 1)))
-    results["energy_ratio"] = energy_ratio
-    # 🔹 Clarity Ratio
-    clarity_ratio = []
-    for i in range(min_len):
-        near_low, near_high = near_feats[i]["low_freq_energy"], near_feats[i]["high_freq_energy"]
-        far_low, far_high = far_feats[i]["low_freq_energy"], far_feats[i]["high_freq_energy"]
-        near_ratio, far_ratio = (near_low - near_high), (far_low - far_high)
-        diff = 1 - abs(far_ratio - near_ratio) / (abs(near_ratio) + 1e-6)
-        clarity_ratio.append(np.clip(diff, 0, 1))
-    results["clarity_ratio"] = clarity_ratio
-    # 🔹 Spectral Overlap
     overlap_scores = []
     for i in range(min_len):
         near_spec = near_feats[i]["spectrum"].flatten()
@@ -147,31 +200,39 @@ def compare_frames_enhanced(near_feats, far_feats, metrics):
             overlap_scores.append(overlap)
     results["spectral_overlap"] = overlap_scores
-    # 🔹 Combined Weighted Quality
-    weights = {
-        "cosine_similarity": 0.3,
-        "high_freq_quality": 0.25,
-        "energy_ratio": 0.2,
-        "clarity_ratio": 0.15,
-        "spectral_overlap": 0.1
-    }
-    combined_quality = []
     for i in range(min_len):
-        val = sum(results[k][i] * w for k, w in weights.items() if k in results)
-        combined_quality.append(float(val / sum(weights.values())))
-    results["combined_quality"] = combined_quality
     return pd.DataFrame(results)
 # ----------------------------
-# Clustering + Overlay
 # ----------------------------
 def cluster_frames_custom(features_df, cluster_features, algo, n_clusters=5, eps=0.5):
     if not cluster_features:
-        raise gr.Error("Please select at least one feature for clustering.")
-    if len(features_df) == 0:
-        features_df["cluster"] = []
         return features_df
-    X = features_df[cluster_features].values
     if algo == "KMeans":
         n_clusters = min(n_clusters, len(X))
         model = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
@@ -184,32 +245,35 @@ def cluster_frames_custom(features_df, cluster_features, algo, n_clusters=5, eps
         model = DBSCAN(eps=eps, min_samples=min(3, len(X)))
         labels = model.fit_predict(X)
     else:
-        raise ValueError("Unknown clustering algorithm")
     features_df = features_df.copy()
     features_df["cluster"] = labels
     return features_df
 def plot_spectral_difference(near_feats, far_feats, frame_idx=0):
-    if not near_feats or not far_feats or frame_idx >= len(near_feats) or frame_idx >= len(far_feats):
-        fig = go.Figure(); fig.update_layout(title="No data available"); return fig
-    near_spec = near_feats[frame_idx]["spectrum"]; far_spec = far_feats[frame_idx]["spectrum"]
     min_freq_bins = min(near_spec.shape[0], far_spec.shape[0])
     diff = near_spec[:min_freq_bins] - far_spec[:min_freq_bins]
     fig = go.Figure(data=go.Heatmap(z=diff, colorscale='RdBu', zmid=0))
-    fig.update_layout(title=f"Spectral Difference (Frame {frame_idx})", height=300)
-    return fig
-def plot_cluster_overlay(df, cluster_metric, overlay_metric):
-    if cluster_metric not in df.columns or overlay_metric not in df.columns:
-        fig = go.Figure(); fig.update_layout(title="Metrics not found"); return fig
-    fig = px.scatter(df, x=cluster_metric, y=overlay_metric, color=overlay_metric,
-                     color_continuous_scale='Viridis',
-                     title=f"Cluster Overlay: {cluster_metric} vs {overlay_metric}")
-    fig.update_layout(height=400)
     return fig
 # ----------------------------
-# Main Analysis Function
 # ----------------------------
 def analyze_audio_pair(
     near_file, far_file,
@@ -217,36 +281,72 @@ def analyze_audio_pair(
     comparison_metrics, cluster_features, clustering_algo, n_clusters, dbscan_eps
 ):
     if not near_file or not far_file:
-        raise gr.Error("Upload both audio files.")
     try:
         y_near, sr_near = librosa.load(near_file.name, sr=None)
-        y_far, sr_far = librosa.load(far_file.name, sr=None)
-    except Exception as e:
-        raise gr.Error(f"Error loading audio: {str(e)}")
-    if sr_near != sr_far:
-        y_far = librosa.resample(y_far, orig_sr=sr_far, target_sr=sr_near)
-        sr = sr_near
-    else:
-        sr = sr_near
-    frames_near, _ = segment_audio(y_near, sr, frame_length_ms, hop_length_ms, window_type)
-    frames_far, _ = segment_audio(y_far, sr, frame_length_ms, hop_length_ms, window_type)
-    near_feats = extract_features_with_spectrum(frames_near, sr)
-    far_feats = extract_features_with_spectrum(frames_far, sr)
     comparison_df = compare_frames_enhanced(near_feats, far_feats, comparison_metrics)
     near_df = pd.DataFrame(near_feats).drop(columns=["spectrum"], errors="ignore")
     clustered_df = cluster_frames_custom(near_df, cluster_features, clustering_algo, n_clusters, dbscan_eps)
-    # Plots
-    metric_cols = [col for col in comparison_df.columns if col != "frame_index"]
-    plot_comparison = px.line(comparison_df, x="frame_index", y=metric_cols[0],
-                              title=f"{metric_cols[0].replace('_',' ').title()} Over Time") if metric_cols else px.line()
-    if len(cluster_features) >= 2 and len(clustered_df) > 0:
-        x_feat, y_feat = cluster_features[0], cluster_features[1]
-        plot_scatter = px.scatter(clustered_df, x=x_feat, y=y_feat, color="cluster",
-                                  title=f"Clustering: {x_feat} vs {y_feat}")
     else:
-        plot_scatter = px.scatter(title="Select ≥2 features for clustering")
-    spec_heatmap = plot_spectral_difference(near_feats, far_feats, frame_idx=0)
-    overlay_fig = plot_cluster_overlay(clustered_df, cluster_features[0], "combined_quality")
     return plot_comparison, comparison_df, plot_scatter, clustered_df, spec_heatmap, overlay_fig
 def export_results(comparison_df, clustered_df):
@@ -258,59 +358,72 @@ def export_results(comparison_df, clustered_df):
     return [comp_path, cluster_path]
 # ----------------------------
-# Gradio UI
 # ----------------------------
-dummy_features = ["rms", "spectral_centroid", "zcr"] + [f"mfcc_{i}" for i in range(1,14)] + \
-                 ["low_freq_energy", "mid_freq_energy", "high_freq_energy"]
-with gr.Blocks(title="Advanced Near vs Far Field Analyzer") as demo:
-    gr.Markdown("# 🎙️ Advanced Near vs Far Field Speech Analyzer")
     with gr.Row():
-        near_file = gr.File(label="Near-Field Audio (.wav)", file_types=[".wav"])
-        far_file = gr.File(label="Far-Field Audio (.wav)")
-    with gr.Accordion("⚙️ Frame Settings", open=True):
-        frame_length_ms = gr.Slider(10, 500, value=50, step=1, label="Frame Length (ms)")
-        hop_length_ms = gr.Slider(1, 250, value=25, step=1, label="Hop Length (ms)")
         window_type = gr.Dropdown(["hann", "hamming", "rectangular"], value="hann", label="Window Type")
-    with gr.Accordion("📊 Comparison Metrics", open=True):
         comparison_metrics = gr.CheckboxGroup(
-            choices=[
-                "Euclidean Distance", "Cosine Similarity", "High-Freq Loss Ratio"
-            ],
             value=["Cosine Similarity", "High-Freq Loss Ratio"],
-            label="Select Metrics"
         )
-    with gr.Accordion("🧩 Clustering Configuration", open=True):
         cluster_features = gr.CheckboxGroup(
-            choices=dummy_features, value=["rms", "spectral_centroid", "high_freq_energy"],
-            label="Features for Clustering")
-        clustering_algo = gr.Radio(["KMeans", "Agglomerative", "DBSCAN"], value="KMeans", label="Clustering Algorithm")
-        n_clusters = gr.Slider(2, 20, value=5, step=1, label="Clusters (for KMeans/Agglomerative)")
-        dbscan_eps = gr.Slider(0.1, 2.0, value=0.5, step=0.1, label="DBSCAN eps")
-    btn = gr.Button("🚀 Analyze")
     with gr.Tabs():
-        with gr.Tab("📈 Frame Comparison"):
-            comp_plot = gr.Plot(); comp_table = gr.Dataframe()
-        with gr.Tab("🧩 Clustering"):
-            cluster_plot = gr.Plot(); cluster_table = gr.Dataframe()
-        with gr.Tab("🔍 Spectral Analysis"):
-            spec_heatmap = gr.Plot(label="Spectral Difference (Near - Far)")
-        with gr.Tab("🧭 Metric Overlay"):
-            overlay_plot = gr.Plot(label="Metric Overlay")
     with gr.Tab("📤 Export"):
-        export_btn = gr.Button("💾 Download CSVs"); export_files = gr.Files()
     btn.click(fn=analyze_audio_pair,
               inputs=[near_file, far_file, frame_length_ms, hop_length_ms, window_type,
                       comparison_metrics, cluster_features, clustering_algo, n_clusters, dbscan_eps],
               outputs=[comp_plot, comp_table, cluster_plot, cluster_table, spec_heatmap, overlay_plot])
     export_btn.click(fn=export_results, inputs=[comp_table, cluster_table], outputs=export_files)
 if __name__ == "__main__":
-    demo.launch()

 from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
 from sklearn.metrics.pairwise import cosine_similarity
 from scipy.spatial.distance import jensenshannon
+from scipy import signal
 from scipy.signal import get_window as scipy_get_window
 import plotly.express as px
 import plotly.graph_objects as go
 import tempfile
 # ----------------------------
+# 1. Signal Alignment & Preprocessing (NEW)
+# ----------------------------
+def align_signals(ref, target):
+    """
+    Aligns target signal (Far Field) to reference signal (Near Field)
+    using Cross-Correlation to fix time-of-arrival delays.
+    """
+    # Normalize both to prevent amplitude from skewing correlation
+    ref_norm = librosa.util.normalize(ref)
+    target_norm = librosa.util.normalize(target)
+    # correlated = signal.correlate(target_norm, ref_norm, mode='full')
+    # Use FFT-based correlation for speed on longer audio
+    correlation = signal.fftconvolve(target_norm, ref_norm[::-1], mode='full')
+    lags = signal.correlation_lags(len(target_norm), len(ref_norm), mode='full')
+    lag = lags[np.argmax(correlation)]
+    print(f"Calculated Lag: {lag} samples")
+    if lag > 0:
+        # Target is "ahead" (starts later in the array structure relative to overlap)
+        # Shift target back
+        aligned_target = target[lag:]
+        aligned_ref = ref
+    else:
+        # Target is "behind" (delayed), typical for Far Field
+        # Shift target forward (padding start) or slice Ref
+        # Easier strategy: slice Ref to match where Target starts
+        aligned_target = target
+        aligned_ref = ref[abs(lag):]
+    # Truncate to same length
+    min_len = min(len(aligned_ref), len(aligned_target))
+    return aligned_ref[:min_len], aligned_target[:min_len]
+# ----------------------------
+# 2. Segment Audio into Frames
 # ----------------------------
 def segment_audio(y, sr, frame_length_ms, hop_length_ms, window_type="hann"):
     frame_length = int(frame_length_ms * sr / 1000)
     hop_length = int(hop_length_ms * sr / 1000)
     window = scipy_get_window(window_type if window_type != "rectangular" else "boxcar", frame_length)
     frames = []
+    # Pad to ensure we don't drop the last partial frame
+    y_padded = np.pad(y, (0, frame_length), mode='constant')
     for i in range(0, len(y) - frame_length + 1, hop_length):
         frame = y[i:i + frame_length] * window
         frames.append(frame)
     if frames:
         frames = np.array(frames).T
     else:
     return frames, frame_length
 # ----------------------------
+# 3. Feature Extraction
 # ----------------------------
 def extract_features_with_spectrum(frames, sr):
     features = []
     n_mfcc = 13
     n_fft = min(2048, frames.shape[0])
     for i in range(frames.shape[1]):
         frame = frames[:, i]
+        # Skip empty/silent frames to prevent NaN
         if len(frame) < n_fft or np.max(np.abs(frame)) < 1e-10:
+            feat = {k: 0.0 for k in ["rms", "spectral_centroid", "zcr", "spectral_flatness",
+                                     "low_freq_energy", "mid_freq_energy", "high_freq_energy"]}
+            for j in range(n_mfcc): feat[f"mfcc_{j+1}"] = 0.0
+            feat["spectrum"] = np.zeros((n_fft // 2 + 1, 1))
+            features.append(feat)
             continue
         feat = {}
+        # Basic
+        feat["rms"] = float(np.mean(librosa.feature.rms(y=frame)[0]))
+        feat["zcr"] = float(np.mean(librosa.feature.zero_crossing_rate(frame)[0]))
+        # Spectral
         try:
             feat["spectral_centroid"] = float(np.mean(librosa.feature.spectral_centroid(y=frame, sr=sr)[0]))
         except: feat["spectral_centroid"] = 0.0
+        # Reverb Metric (NEW)
         try:
+            feat["spectral_flatness"] = float(np.mean(librosa.feature.spectral_flatness(y=frame)[0]))
+        except: feat["spectral_flatness"] = 0.0
+        # MFCC
         try:
             mfccs = librosa.feature.mfcc(y=frame, sr=sr, n_mfcc=n_mfcc, n_fft=n_fft)
             for j in range(n_mfcc):
                 feat[f"mfcc_{j+1}"] = float(np.mean(mfccs[j]))
         except:
+            for j in range(n_mfcc): feat[f"mfcc_{j+1}"] = 0.0
+        # Frequency Bands
         try:
             S = np.abs(librosa.stft(frame, n_fft=n_fft))
             S_db = librosa.amplitude_to_db(S, ref=np.max)
             freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
             low_mask = freqs <= 2000
             mid_mask = (freqs > 2000) & (freqs <= 4000)
             high_mask = freqs > 4000
+            feat["low_freq_energy"] = float(np.mean(S_db[low_mask])) if np.any(low_mask) else -80.0
+            feat["mid_freq_energy"] = float(np.mean(S_db[mid_mask])) if np.any(mid_mask) else -80.0
+            feat["high_freq_energy"] = float(np.mean(S_db[high_mask])) if np.any(high_mask) else -80.0
             feat["spectrum"] = S_db
         except:
+            feat["low_freq_energy"] = feat["mid_freq_energy"] = feat["high_freq_energy"] = -80.0
             feat["spectrum"] = np.zeros((n_fft // 2 + 1, 1))
         features.append(feat)
     return features
 # ----------------------------
+# 4. Frame Comparison Logic
 # ----------------------------
 def compare_frames_enhanced(near_feats, far_feats, metrics):
     min_len = min(len(near_feats), len(far_feats))
     results = {"frame_index": list(range(min_len))}
     near_df = pd.DataFrame([f for f in near_feats[:min_len]])
     far_df = pd.DataFrame([f for f in far_feats[:min_len]])
+    # Feature Vectors (exclude non-numeric or high-dim cols)
+    drop_cols = ["spectrum"]
+    near_vec = near_df.drop(columns=drop_cols, errors="ignore").values
+    far_vec = far_df.drop(columns=drop_cols, errors="ignore").values
     # Euclidean Distance
     if "Euclidean Distance" in metrics:
         results["euclidean_dist"] = np.linalg.norm(near_vec - far_vec, axis=1).tolist()
     # Cosine Similarity
     if "Cosine Similarity" in metrics:
         cos_vals = []
             else:
                 cos_vals.append(float(cosine_similarity(a, b)[0][0]))
         results["cosine_similarity"] = cos_vals
+    # High-Freq Loss Ratio
     if "High-Freq Loss Ratio" in metrics:
         loss_ratios = []
         for i in range(min_len):
             near_high = near_feats[i]["high_freq_energy"]
             far_high = far_feats[i]["high_freq_energy"]
+            # Energy is in dB (negative), so we look at the difference
+            # Simple diff: Near (-20dB) - Far (-30dB) = 10dB loss
+            diff = near_high - far_high
+            loss_ratios.append(float(diff))
+        results["high_freq_loss_db"] = loss_ratios
+    # Spectral Flatness Difference (Reverberation Check)
+    flatness_diff = []
     for i in range(min_len):
+        n_flat = near_feats[i]["spectral_flatness"]
+        f_flat = far_feats[i]["spectral_flatness"]
+        flatness_diff.append(f_flat - n_flat) # Postive usually means more noise/reverb
+    results["flatness_increase"] = flatness_diff
+    # Spectral Overlap
     overlap_scores = []
     for i in range(min_len):
         near_spec = near_feats[i]["spectrum"].flatten()
             overlap_scores.append(overlap)
     results["spectral_overlap"] = overlap_scores
+    # Combined Quality Score (0 to 1 approximate)
+    # Higher overlap + Higher Cosine + Lower Loss = Better Quality
+    combined = []
     for i in range(min_len):
+        score = (results["spectral_overlap"][i] * 0.5)
+        if "cosine_similarity" in results:
+             score += (results["cosine_similarity"][i] * 0.5)
+        combined.append(score)
+    results["combined_match_score"] = combined
     return pd.DataFrame(results)
 # ----------------------------
+# 5. Clustering & Visualization
 # ----------------------------
 def cluster_frames_custom(features_df, cluster_features, algo, n_clusters=5, eps=0.5):
     if not cluster_features:
+        return features_df
+    # Ensure selected features exist in DF
+    valid_features = [f for f in cluster_features if f in features_df.columns]
+    if not valid_features:
         return features_df
+    X = features_df[valid_features].values
+    # Handle NaN/Inf just in case
+    X = np.nan_to_num(X)
+    if len(X) < 5:
+        features_df["cluster"] = -1
+        return features_df
     if algo == "KMeans":
         n_clusters = min(n_clusters, len(X))
         model = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
         model = DBSCAN(eps=eps, min_samples=min(3, len(X)))
         labels = model.fit_predict(X)
     else:
+        labels = np.zeros(len(X))
     features_df = features_df.copy()
     features_df["cluster"] = labels
     return features_df
 def plot_spectral_difference(near_feats, far_feats, frame_idx=0):
+    if not near_feats or not far_feats:
+        fig = go.Figure(); fig.update_layout(title="No data"); return fig
+    safe_idx = min(frame_idx, len(near_feats)-1, len(far_feats)-1)
+    near_spec = near_feats[safe_idx]["spectrum"]
+    far_spec = far_feats[safe_idx]["spectrum"]
     min_freq_bins = min(near_spec.shape[0], far_spec.shape[0])
     diff = near_spec[:min_freq_bins] - far_spec[:min_freq_bins]
     fig = go.Figure(data=go.Heatmap(z=diff, colorscale='RdBu', zmid=0))
+    fig.update_layout(
+        title=f"Spectral Difference (Frame {safe_idx}) [Near - Far]",
+        yaxis_title="Frequency Bin",
+        xaxis_title="Time (within frame)",
+        height=350
+    )
     return fig
 # ----------------------------
+# 6. Main Analysis Logic
 # ----------------------------
 def analyze_audio_pair(
     near_file, far_file,
     comparison_metrics, cluster_features, clustering_algo, n_clusters, dbscan_eps
 ):
     if not near_file or not far_file:
+        raise gr.Error("Please upload both audio files.")
+    # 1. Load Audio
+    # Load Near
     try:
         y_near, sr_near = librosa.load(near_file.name, sr=None)
+    except:
+        raise gr.Error("Failed to load Near Field audio.")
+    # Load Far (Force resample to match Near)
+    try:
+        y_far, sr_far = librosa.load(far_file.name, sr=sr_near)
+    except:
+        raise gr.Error("Failed to load Far Field audio.")
+    # 2. Normalize and Align (CRITICAL STEP)
+    y_near = librosa.util.normalize(y_near)
+    y_far = librosa.util.normalize(y_far)
+    gr.Info("Aligning signals (calculating time delay)...")
+    y_near, y_far = align_signals(y_near, y_far)
+    # 3. Segment
+    frames_near, _ = segment_audio(y_near, sr_near, frame_length_ms, hop_length_ms, window_type)
+    frames_far, _ = segment_audio(y_far, sr_near, frame_length_ms, hop_length_ms, window_type)
+    # 4. Extract
+    gr.Info("Extracting features...")
+    near_feats = extract_features_with_spectrum(frames_near, sr_near)
+    far_feats = extract_features_with_spectrum(frames_far, sr_near)
+    # 5. Compare
     comparison_df = compare_frames_enhanced(near_feats, far_feats, comparison_metrics)
+    # 6. Cluster (on Near field features usually, to classify phonemes)
     near_df = pd.DataFrame(near_feats).drop(columns=["spectrum"], errors="ignore")
     clustered_df = cluster_frames_custom(near_df, cluster_features, clustering_algo, n_clusters, dbscan_eps)
+    # 7. Visuals
+    metric_cols = [c for c in comparison_df.columns if c != "frame_index"]
+    if metric_cols:
+        plot_comparison = px.line(comparison_df, x="frame_index", y=metric_cols,
+                                  title="Frame-by-Frame Comparison Metrics")
+    else:
+        plot_comparison = px.line(title="No metrics selected")
+    if len(cluster_features) >= 2:
+        x_f, y_f = cluster_features[0], cluster_features[1]
+        plot_scatter = px.scatter(clustered_df, x=x_f, y=y_f, color="cluster",
+                                  title=f"Clustering Analysis (Near Field): {x_f} vs {y_f}")
+    else:
+        plot_scatter = px.scatter(title="Select at least 2 features to visualize clusters")
+    spec_heatmap = plot_spectral_difference(near_feats, far_feats, frame_idx=int(len(near_feats)/2))
+    # Metric Overlay: Combine Clustering with Quality
+    # Add combined score to clustered df for visualization
+    clustered_df["match_quality"] = comparison_df["combined_match_score"]
+    if len(cluster_features) > 0:
+        overlay_fig = px.scatter(clustered_df, x=cluster_features[0], y="match_quality",
+                                 color="cluster",
+                                 title=f"Cluster vs. Match Quality ({cluster_features[0]})")
     else:
+        overlay_fig = px.scatter(title="Not enough data for overlay")
     return plot_comparison, comparison_df, plot_scatter, clustered_df, spec_heatmap, overlay_fig
 def export_results(comparison_df, clustered_df):
     return [comp_path, cluster_path]
 # ----------------------------
+# 7. Gradio UI
 # ----------------------------
+# Expanded feature list for UI
+feature_list = ["rms", "spectral_centroid", "zcr", "spectral_flatness",
+                "low_freq_energy", "mid_freq_energy", "high_freq_energy"] + \
+               [f"mfcc_{i}" for i in range(1, 14)]
+with gr.Blocks(title="Corrected Near vs Far Field Analyzer", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("""
+    # 🎙️ Corrected Near vs Far Field Analyzer
+    **Now includes:** Automatic Time Alignment (Cross-Correlation), Normalization, and Reverb Detection.
+    """)
     with gr.Row():
+        with gr.Column():
+            near_file = gr.File(label="Near-Field Audio (Reference)", file_types=[".wav", ".mp3"])
+        with gr.Column():
+            far_file = gr.File(label="Far-Field Audio (Target)", file_types=[".wav", ".mp3"])
+    with gr.Accordion("⚙️ Analysis Settings", open=False):
+        with gr.Row():
+            frame_length_ms = gr.Slider(10, 200, value=30, step=5, label="Frame Length (ms)")
+            hop_length_ms = gr.Slider(5, 100, value=15, step=5, label="Hop Length (ms)")
         window_type = gr.Dropdown(["hann", "hamming", "rectangular"], value="hann", label="Window Type")
+    with gr.Accordion("📊 Metrics & Clustering", open=False):
         comparison_metrics = gr.CheckboxGroup(
+            choices=["Euclidean Distance", "Cosine Similarity", "High-Freq Loss Ratio"],
             value=["Cosine Similarity", "High-Freq Loss Ratio"],
+            label="Comparison Metrics"
         )
         cluster_features = gr.CheckboxGroup(
+            choices=feature_list,
+            value=["spectral_centroid", "spectral_flatness", "high_freq_energy"],
+            label="Features for Clustering (Select >= 2)"
+        )
+        with gr.Row():
+            clustering_algo = gr.Dropdown(["KMeans", "Agglomerative", "DBSCAN"], value="KMeans", label="Algorithm")
+            n_clusters = gr.Slider(2, 10, value=4, step=1, label="Num Clusters")
+            dbscan_eps = gr.Slider(0.1, 5.0, value=0.5, label="DBSCAN Epsilon")
+    btn = gr.Button("🚀 Align & Analyze", variant="primary")
     with gr.Tabs():
+        with gr.Tab("📈 Time Series Comparison"):
+            comp_plot = gr.Plot()
+            comp_table = gr.Dataframe(height=200)
+        with gr.Tab("🧩 Phoneme Clustering"):
+            cluster_plot = gr.Plot()
+            cluster_table = gr.Dataframe(height=200)
+        with gr.Tab("🔍 Spectral Check"):
+            gr.Markdown("Difference Heatmap (Near - Far). Blue = Near has more energy. Red = Far has more energy.")
+            spec_heatmap = gr.Plot()
+        with gr.Tab("🧭 Quality Overlay"):
+            overlay_plot = gr.Plot()
     with gr.Tab("📤 Export"):
+        export_btn = gr.Button("💾 Download Results")
+        export_files = gr.Files()
     btn.click(fn=analyze_audio_pair,
               inputs=[near_file, far_file, frame_length_ms, hop_length_ms, window_type,
                       comparison_metrics, cluster_features, clustering_algo, n_clusters, dbscan_eps],
               outputs=[comp_plot, comp_table, cluster_plot, cluster_table, spec_heatmap, overlay_plot])
     export_btn.click(fn=export_results, inputs=[comp_table, cluster_table], outputs=export_files)
 if __name__ == "__main__":
+    demo.launch()