AdityaK007 commited on
Commit
d0d071f
Β·
verified Β·
1 Parent(s): a2f0ea1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +246 -133
app.py CHANGED
@@ -5,7 +5,7 @@ import pandas as pd
5
  from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
6
  from sklearn.metrics.pairwise import cosine_similarity
7
  from scipy.spatial.distance import jensenshannon
8
- from scipy.stats import pearsonr
9
  from scipy.signal import get_window as scipy_get_window
10
  import plotly.express as px
11
  import plotly.graph_objects as go
@@ -13,16 +13,58 @@ import os
13
  import tempfile
14
 
15
  # ----------------------------
16
- # Segment Audio into Frames
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  # ----------------------------
18
  def segment_audio(y, sr, frame_length_ms, hop_length_ms, window_type="hann"):
19
  frame_length = int(frame_length_ms * sr / 1000)
20
  hop_length = int(hop_length_ms * sr / 1000)
21
  window = scipy_get_window(window_type if window_type != "rectangular" else "boxcar", frame_length)
22
  frames = []
 
 
 
 
23
  for i in range(0, len(y) - frame_length + 1, hop_length):
24
  frame = y[i:i + frame_length] * window
25
  frames.append(frame)
 
26
  if frames:
27
  frames = np.array(frames).T
28
  else:
@@ -30,58 +72,72 @@ def segment_audio(y, sr, frame_length_ms, hop_length_ms, window_type="hann"):
30
  return frames, frame_length
31
 
32
  # ----------------------------
33
- # Feature Extraction
34
  # ----------------------------
35
  def extract_features_with_spectrum(frames, sr):
36
  features = []
37
  n_mfcc = 13
38
  n_fft = min(2048, frames.shape[0])
 
39
  for i in range(frames.shape[1]):
40
  frame = frames[:, i]
 
 
41
  if len(frame) < n_fft or np.max(np.abs(frame)) < 1e-10:
 
 
 
 
 
42
  continue
 
43
  feat = {}
44
- try:
45
- feat["rms"] = float(np.mean(librosa.feature.rms(y=frame)[0]))
46
- except: feat["rms"] = 0.0
 
 
47
  try:
48
  feat["spectral_centroid"] = float(np.mean(librosa.feature.spectral_centroid(y=frame, sr=sr)[0]))
49
  except: feat["spectral_centroid"] = 0.0
 
 
50
  try:
51
- feat["zcr"] = float(np.mean(librosa.feature.zero_crossing_rate(frame)[0]))
52
- except: feat["zcr"] = 0.0
 
 
53
  try:
54
  mfccs = librosa.feature.mfcc(y=frame, sr=sr, n_mfcc=n_mfcc, n_fft=n_fft)
55
  for j in range(n_mfcc):
56
  feat[f"mfcc_{j+1}"] = float(np.mean(mfccs[j]))
57
  except:
58
- for j in range(n_mfcc):
59
- feat[f"mfcc_{j+1}"] = 0.0
 
60
  try:
61
  S = np.abs(librosa.stft(frame, n_fft=n_fft))
62
  S_db = librosa.amplitude_to_db(S, ref=np.max)
63
  freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
 
64
  low_mask = freqs <= 2000
65
  mid_mask = (freqs > 2000) & (freqs <= 4000)
66
  high_mask = freqs > 4000
67
- feat["low_freq_energy"] = float(np.mean(S_db[low_mask])) if np.any(low_mask) else 0.0
68
- feat["mid_freq_energy"] = float(np.mean(S_db[mid_mask])) if np.any(mid_mask) else 0.0
69
- feat["high_freq_energy"] = float(np.mean(S_db[high_mask])) if np.any(high_mask) else 0.0
 
70
  feat["spectrum"] = S_db
71
  except:
72
- feat["low_freq_energy"] = feat["mid_freq_energy"] = feat["high_freq_energy"] = 0.0
73
  feat["spectrum"] = np.zeros((n_fft // 2 + 1, 1))
 
74
  features.append(feat)
75
- if not features:
76
- feat = { "rms": 0.0, "spectral_centroid": 0.0, "zcr": 0.0,
77
- "low_freq_energy": 0.0, "mid_freq_energy": 0.0, "high_freq_energy": 0.0,
78
- "spectrum": np.zeros((n_fft // 2 + 1, 1)) }
79
- for j in range(n_mfcc): feat[f"mfcc_{j+1}"] = 0.0
80
- features.append(feat)
81
  return features
82
 
83
  # ----------------------------
84
- # Frame Comparison (core metrics)
85
  # ----------------------------
86
  def compare_frames_enhanced(near_feats, far_feats, metrics):
87
  min_len = min(len(near_feats), len(far_feats))
@@ -91,12 +147,16 @@ def compare_frames_enhanced(near_feats, far_feats, metrics):
91
  results = {"frame_index": list(range(min_len))}
92
  near_df = pd.DataFrame([f for f in near_feats[:min_len]])
93
  far_df = pd.DataFrame([f for f in far_feats[:min_len]])
94
- near_vec = near_df.drop(columns=["spectrum"], errors="ignore").values
95
- far_vec = far_df.drop(columns=["spectrum"], errors="ignore").values
 
 
 
96
 
97
  # Euclidean Distance
98
  if "Euclidean Distance" in metrics:
99
  results["euclidean_dist"] = np.linalg.norm(near_vec - far_vec, axis=1).tolist()
 
100
  # Cosine Similarity
101
  if "Cosine Similarity" in metrics:
102
  cos_vals = []
@@ -107,35 +167,28 @@ def compare_frames_enhanced(near_feats, far_feats, metrics):
107
  else:
108
  cos_vals.append(float(cosine_similarity(a, b)[0][0]))
109
  results["cosine_similarity"] = cos_vals
110
- # High-Freq Loss Ratio (Quality)
 
111
  if "High-Freq Loss Ratio" in metrics:
112
  loss_ratios = []
113
  for i in range(min_len):
114
  near_high = near_feats[i]["high_freq_energy"]
115
  far_high = far_feats[i]["high_freq_energy"]
116
- ratio = max(0.0, 1.0 - abs(near_high - far_high) / (abs(near_high) + 1e-6))
117
- loss_ratios.append(float(ratio))
118
- results["high_freq_quality"] = loss_ratios
 
 
119
 
120
- # πŸ”Ή Energy Ratio
121
- energy_ratio = []
122
  for i in range(min_len):
123
- near_rms = near_feats[i]["rms"]; far_rms = far_feats[i]["rms"]
124
- ratio = (far_rms + 1e-6) / (near_rms + 1e-6)
125
- energy_ratio.append(float(np.clip(ratio, 0, 1)))
126
- results["energy_ratio"] = energy_ratio
127
 
128
- # πŸ”Ή Clarity Ratio
129
- clarity_ratio = []
130
- for i in range(min_len):
131
- near_low, near_high = near_feats[i]["low_freq_energy"], near_feats[i]["high_freq_energy"]
132
- far_low, far_high = far_feats[i]["low_freq_energy"], far_feats[i]["high_freq_energy"]
133
- near_ratio, far_ratio = (near_low - near_high), (far_low - far_high)
134
- diff = 1 - abs(far_ratio - near_ratio) / (abs(near_ratio) + 1e-6)
135
- clarity_ratio.append(np.clip(diff, 0, 1))
136
- results["clarity_ratio"] = clarity_ratio
137
-
138
- # πŸ”Ή Spectral Overlap
139
  overlap_scores = []
140
  for i in range(min_len):
141
  near_spec = near_feats[i]["spectrum"].flatten()
@@ -147,31 +200,39 @@ def compare_frames_enhanced(near_feats, far_feats, metrics):
147
  overlap_scores.append(overlap)
148
  results["spectral_overlap"] = overlap_scores
149
 
150
- # πŸ”Ή Combined Weighted Quality
151
- weights = {
152
- "cosine_similarity": 0.3,
153
- "high_freq_quality": 0.25,
154
- "energy_ratio": 0.2,
155
- "clarity_ratio": 0.15,
156
- "spectral_overlap": 0.1
157
- }
158
- combined_quality = []
159
  for i in range(min_len):
160
- val = sum(results[k][i] * w for k, w in weights.items() if k in results)
161
- combined_quality.append(float(val / sum(weights.values())))
162
- results["combined_quality"] = combined_quality
 
 
 
163
  return pd.DataFrame(results)
164
 
165
  # ----------------------------
166
- # Clustering + Overlay
167
  # ----------------------------
168
  def cluster_frames_custom(features_df, cluster_features, algo, n_clusters=5, eps=0.5):
169
  if not cluster_features:
170
- raise gr.Error("Please select at least one feature for clustering.")
171
- if len(features_df) == 0:
172
- features_df["cluster"] = []
 
 
173
  return features_df
174
- X = features_df[cluster_features].values
 
 
 
 
 
 
 
 
 
175
  if algo == "KMeans":
176
  n_clusters = min(n_clusters, len(X))
177
  model = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
@@ -184,32 +245,35 @@ def cluster_frames_custom(features_df, cluster_features, algo, n_clusters=5, eps
184
  model = DBSCAN(eps=eps, min_samples=min(3, len(X)))
185
  labels = model.fit_predict(X)
186
  else:
187
- raise ValueError("Unknown clustering algorithm")
 
188
  features_df = features_df.copy()
189
  features_df["cluster"] = labels
190
  return features_df
191
 
192
  def plot_spectral_difference(near_feats, far_feats, frame_idx=0):
193
- if not near_feats or not far_feats or frame_idx >= len(near_feats) or frame_idx >= len(far_feats):
194
- fig = go.Figure(); fig.update_layout(title="No data available"); return fig
195
- near_spec = near_feats[frame_idx]["spectrum"]; far_spec = far_feats[frame_idx]["spectrum"]
 
 
 
 
 
196
  min_freq_bins = min(near_spec.shape[0], far_spec.shape[0])
197
  diff = near_spec[:min_freq_bins] - far_spec[:min_freq_bins]
 
198
  fig = go.Figure(data=go.Heatmap(z=diff, colorscale='RdBu', zmid=0))
199
- fig.update_layout(title=f"Spectral Difference (Frame {frame_idx})", height=300)
200
- return fig
201
-
202
- def plot_cluster_overlay(df, cluster_metric, overlay_metric):
203
- if cluster_metric not in df.columns or overlay_metric not in df.columns:
204
- fig = go.Figure(); fig.update_layout(title="Metrics not found"); return fig
205
- fig = px.scatter(df, x=cluster_metric, y=overlay_metric, color=overlay_metric,
206
- color_continuous_scale='Viridis',
207
- title=f"Cluster Overlay: {cluster_metric} vs {overlay_metric}")
208
- fig.update_layout(height=400)
209
  return fig
210
 
211
  # ----------------------------
212
- # Main Analysis Function
213
  # ----------------------------
214
  def analyze_audio_pair(
215
  near_file, far_file,
@@ -217,36 +281,72 @@ def analyze_audio_pair(
217
  comparison_metrics, cluster_features, clustering_algo, n_clusters, dbscan_eps
218
  ):
219
  if not near_file or not far_file:
220
- raise gr.Error("Upload both audio files.")
 
 
 
221
  try:
222
  y_near, sr_near = librosa.load(near_file.name, sr=None)
223
- y_far, sr_far = librosa.load(far_file.name, sr=None)
224
- except Exception as e:
225
- raise gr.Error(f"Error loading audio: {str(e)}")
226
- if sr_near != sr_far:
227
- y_far = librosa.resample(y_far, orig_sr=sr_far, target_sr=sr_near)
228
- sr = sr_near
229
- else:
230
- sr = sr_near
231
- frames_near, _ = segment_audio(y_near, sr, frame_length_ms, hop_length_ms, window_type)
232
- frames_far, _ = segment_audio(y_far, sr, frame_length_ms, hop_length_ms, window_type)
233
- near_feats = extract_features_with_spectrum(frames_near, sr)
234
- far_feats = extract_features_with_spectrum(frames_far, sr)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
  comparison_df = compare_frames_enhanced(near_feats, far_feats, comparison_metrics)
 
 
236
  near_df = pd.DataFrame(near_feats).drop(columns=["spectrum"], errors="ignore")
237
  clustered_df = cluster_frames_custom(near_df, cluster_features, clustering_algo, n_clusters, dbscan_eps)
238
- # Plots
239
- metric_cols = [col for col in comparison_df.columns if col != "frame_index"]
240
- plot_comparison = px.line(comparison_df, x="frame_index", y=metric_cols[0],
241
- title=f"{metric_cols[0].replace('_',' ').title()} Over Time") if metric_cols else px.line()
242
- if len(cluster_features) >= 2 and len(clustered_df) > 0:
243
- x_feat, y_feat = cluster_features[0], cluster_features[1]
244
- plot_scatter = px.scatter(clustered_df, x=x_feat, y=y_feat, color="cluster",
245
- title=f"Clustering: {x_feat} vs {y_feat}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
  else:
247
- plot_scatter = px.scatter(title="Select β‰₯2 features for clustering")
248
- spec_heatmap = plot_spectral_difference(near_feats, far_feats, frame_idx=0)
249
- overlay_fig = plot_cluster_overlay(clustered_df, cluster_features[0], "combined_quality")
250
  return plot_comparison, comparison_df, plot_scatter, clustered_df, spec_heatmap, overlay_fig
251
 
252
  def export_results(comparison_df, clustered_df):
@@ -258,59 +358,72 @@ def export_results(comparison_df, clustered_df):
258
  return [comp_path, cluster_path]
259
 
260
  # ----------------------------
261
- # Gradio UI
262
  # ----------------------------
263
- dummy_features = ["rms", "spectral_centroid", "zcr"] + [f"mfcc_{i}" for i in range(1,14)] + \
264
- ["low_freq_energy", "mid_freq_energy", "high_freq_energy"]
 
 
265
 
266
- with gr.Blocks(title="Advanced Near vs Far Field Analyzer") as demo:
267
- gr.Markdown("# πŸŽ™οΈ Advanced Near vs Far Field Speech Analyzer")
 
 
 
 
268
  with gr.Row():
269
- near_file = gr.File(label="Near-Field Audio (.wav)", file_types=[".wav"])
270
- far_file = gr.File(label="Far-Field Audio (.wav)")
 
 
271
 
272
- with gr.Accordion("βš™οΈ Frame Settings", open=True):
273
- frame_length_ms = gr.Slider(10, 500, value=50, step=1, label="Frame Length (ms)")
274
- hop_length_ms = gr.Slider(1, 250, value=25, step=1, label="Hop Length (ms)")
 
275
  window_type = gr.Dropdown(["hann", "hamming", "rectangular"], value="hann", label="Window Type")
276
 
277
- with gr.Accordion("πŸ“Š Comparison Metrics", open=True):
278
  comparison_metrics = gr.CheckboxGroup(
279
- choices=[
280
- "Euclidean Distance", "Cosine Similarity", "High-Freq Loss Ratio"
281
- ],
282
  value=["Cosine Similarity", "High-Freq Loss Ratio"],
283
- label="Select Metrics"
284
  )
285
-
286
- with gr.Accordion("🧩 Clustering Configuration", open=True):
287
  cluster_features = gr.CheckboxGroup(
288
- choices=dummy_features, value=["rms", "spectral_centroid", "high_freq_energy"],
289
- label="Features for Clustering")
290
- clustering_algo = gr.Radio(["KMeans", "Agglomerative", "DBSCAN"], value="KMeans", label="Clustering Algorithm")
291
- n_clusters = gr.Slider(2, 20, value=5, step=1, label="Clusters (for KMeans/Agglomerative)")
292
- dbscan_eps = gr.Slider(0.1, 2.0, value=0.5, step=0.1, label="DBSCAN eps")
 
 
 
293
 
294
- btn = gr.Button("πŸš€ Analyze")
295
 
296
  with gr.Tabs():
297
- with gr.Tab("πŸ“ˆ Frame Comparison"):
298
- comp_plot = gr.Plot(); comp_table = gr.Dataframe()
299
- with gr.Tab("🧩 Clustering"):
300
- cluster_plot = gr.Plot(); cluster_table = gr.Dataframe()
301
- with gr.Tab("πŸ” Spectral Analysis"):
302
- spec_heatmap = gr.Plot(label="Spectral Difference (Near - Far)")
303
- with gr.Tab("🧭 Metric Overlay"):
304
- overlay_plot = gr.Plot(label="Metric Overlay")
 
 
 
305
 
306
  with gr.Tab("πŸ“€ Export"):
307
- export_btn = gr.Button("πŸ’Ύ Download CSVs"); export_files = gr.Files()
 
308
 
309
  btn.click(fn=analyze_audio_pair,
310
  inputs=[near_file, far_file, frame_length_ms, hop_length_ms, window_type,
311
  comparison_metrics, cluster_features, clustering_algo, n_clusters, dbscan_eps],
312
  outputs=[comp_plot, comp_table, cluster_plot, cluster_table, spec_heatmap, overlay_plot])
 
313
  export_btn.click(fn=export_results, inputs=[comp_table, cluster_table], outputs=export_files)
314
 
315
  if __name__ == "__main__":
316
- demo.launch()
 
5
  from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
6
  from sklearn.metrics.pairwise import cosine_similarity
7
  from scipy.spatial.distance import jensenshannon
8
+ from scipy import signal
9
  from scipy.signal import get_window as scipy_get_window
10
  import plotly.express as px
11
  import plotly.graph_objects as go
 
13
  import tempfile
14
 
15
  # ----------------------------
16
+ # 1. Signal Alignment & Preprocessing (NEW)
17
+ # ----------------------------
18
+ def align_signals(ref, target):
19
+ """
20
+ Aligns target signal (Far Field) to reference signal (Near Field)
21
+ using Cross-Correlation to fix time-of-arrival delays.
22
+ """
23
+ # Normalize both to prevent amplitude from skewing correlation
24
+ ref_norm = librosa.util.normalize(ref)
25
+ target_norm = librosa.util.normalize(target)
26
+
27
+ # correlated = signal.correlate(target_norm, ref_norm, mode='full')
28
+ # Use FFT-based correlation for speed on longer audio
29
+ correlation = signal.fftconvolve(target_norm, ref_norm[::-1], mode='full')
30
+ lags = signal.correlation_lags(len(target_norm), len(ref_norm), mode='full')
31
+
32
+ lag = lags[np.argmax(correlation)]
33
+
34
+ print(f"Calculated Lag: {lag} samples")
35
+
36
+ if lag > 0:
37
+ # Target is "ahead" (starts later in the array structure relative to overlap)
38
+ # Shift target back
39
+ aligned_target = target[lag:]
40
+ aligned_ref = ref
41
+ else:
42
+ # Target is "behind" (delayed), typical for Far Field
43
+ # Shift target forward (padding start) or slice Ref
44
+ # Easier strategy: slice Ref to match where Target starts
45
+ aligned_target = target
46
+ aligned_ref = ref[abs(lag):]
47
+
48
+ # Truncate to same length
49
+ min_len = min(len(aligned_ref), len(aligned_target))
50
+ return aligned_ref[:min_len], aligned_target[:min_len]
51
+
52
+ # ----------------------------
53
+ # 2. Segment Audio into Frames
54
  # ----------------------------
55
  def segment_audio(y, sr, frame_length_ms, hop_length_ms, window_type="hann"):
56
  frame_length = int(frame_length_ms * sr / 1000)
57
  hop_length = int(hop_length_ms * sr / 1000)
58
  window = scipy_get_window(window_type if window_type != "rectangular" else "boxcar", frame_length)
59
  frames = []
60
+
61
+ # Pad to ensure we don't drop the last partial frame
62
+ y_padded = np.pad(y, (0, frame_length), mode='constant')
63
+
64
  for i in range(0, len(y) - frame_length + 1, hop_length):
65
  frame = y[i:i + frame_length] * window
66
  frames.append(frame)
67
+
68
  if frames:
69
  frames = np.array(frames).T
70
  else:
 
72
  return frames, frame_length
73
 
74
  # ----------------------------
75
+ # 3. Feature Extraction
76
  # ----------------------------
77
  def extract_features_with_spectrum(frames, sr):
78
  features = []
79
  n_mfcc = 13
80
  n_fft = min(2048, frames.shape[0])
81
+
82
  for i in range(frames.shape[1]):
83
  frame = frames[:, i]
84
+
85
+ # Skip empty/silent frames to prevent NaN
86
  if len(frame) < n_fft or np.max(np.abs(frame)) < 1e-10:
87
+ feat = {k: 0.0 for k in ["rms", "spectral_centroid", "zcr", "spectral_flatness",
88
+ "low_freq_energy", "mid_freq_energy", "high_freq_energy"]}
89
+ for j in range(n_mfcc): feat[f"mfcc_{j+1}"] = 0.0
90
+ feat["spectrum"] = np.zeros((n_fft // 2 + 1, 1))
91
+ features.append(feat)
92
  continue
93
+
94
  feat = {}
95
+ # Basic
96
+ feat["rms"] = float(np.mean(librosa.feature.rms(y=frame)[0]))
97
+ feat["zcr"] = float(np.mean(librosa.feature.zero_crossing_rate(frame)[0]))
98
+
99
+ # Spectral
100
  try:
101
  feat["spectral_centroid"] = float(np.mean(librosa.feature.spectral_centroid(y=frame, sr=sr)[0]))
102
  except: feat["spectral_centroid"] = 0.0
103
+
104
+ # Reverb Metric (NEW)
105
  try:
106
+ feat["spectral_flatness"] = float(np.mean(librosa.feature.spectral_flatness(y=frame)[0]))
107
+ except: feat["spectral_flatness"] = 0.0
108
+
109
+ # MFCC
110
  try:
111
  mfccs = librosa.feature.mfcc(y=frame, sr=sr, n_mfcc=n_mfcc, n_fft=n_fft)
112
  for j in range(n_mfcc):
113
  feat[f"mfcc_{j+1}"] = float(np.mean(mfccs[j]))
114
  except:
115
+ for j in range(n_mfcc): feat[f"mfcc_{j+1}"] = 0.0
116
+
117
+ # Frequency Bands
118
  try:
119
  S = np.abs(librosa.stft(frame, n_fft=n_fft))
120
  S_db = librosa.amplitude_to_db(S, ref=np.max)
121
  freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
122
+
123
  low_mask = freqs <= 2000
124
  mid_mask = (freqs > 2000) & (freqs <= 4000)
125
  high_mask = freqs > 4000
126
+
127
+ feat["low_freq_energy"] = float(np.mean(S_db[low_mask])) if np.any(low_mask) else -80.0
128
+ feat["mid_freq_energy"] = float(np.mean(S_db[mid_mask])) if np.any(mid_mask) else -80.0
129
+ feat["high_freq_energy"] = float(np.mean(S_db[high_mask])) if np.any(high_mask) else -80.0
130
  feat["spectrum"] = S_db
131
  except:
132
+ feat["low_freq_energy"] = feat["mid_freq_energy"] = feat["high_freq_energy"] = -80.0
133
  feat["spectrum"] = np.zeros((n_fft // 2 + 1, 1))
134
+
135
  features.append(feat)
136
+
 
 
 
 
 
137
  return features
138
 
139
  # ----------------------------
140
+ # 4. Frame Comparison Logic
141
  # ----------------------------
142
  def compare_frames_enhanced(near_feats, far_feats, metrics):
143
  min_len = min(len(near_feats), len(far_feats))
 
147
  results = {"frame_index": list(range(min_len))}
148
  near_df = pd.DataFrame([f for f in near_feats[:min_len]])
149
  far_df = pd.DataFrame([f for f in far_feats[:min_len]])
150
+
151
+ # Feature Vectors (exclude non-numeric or high-dim cols)
152
+ drop_cols = ["spectrum"]
153
+ near_vec = near_df.drop(columns=drop_cols, errors="ignore").values
154
+ far_vec = far_df.drop(columns=drop_cols, errors="ignore").values
155
 
156
  # Euclidean Distance
157
  if "Euclidean Distance" in metrics:
158
  results["euclidean_dist"] = np.linalg.norm(near_vec - far_vec, axis=1).tolist()
159
+
160
  # Cosine Similarity
161
  if "Cosine Similarity" in metrics:
162
  cos_vals = []
 
167
  else:
168
  cos_vals.append(float(cosine_similarity(a, b)[0][0]))
169
  results["cosine_similarity"] = cos_vals
170
+
171
+ # High-Freq Loss Ratio
172
  if "High-Freq Loss Ratio" in metrics:
173
  loss_ratios = []
174
  for i in range(min_len):
175
  near_high = near_feats[i]["high_freq_energy"]
176
  far_high = far_feats[i]["high_freq_energy"]
177
+ # Energy is in dB (negative), so we look at the difference
178
+ # Simple diff: Near (-20dB) - Far (-30dB) = 10dB loss
179
+ diff = near_high - far_high
180
+ loss_ratios.append(float(diff))
181
+ results["high_freq_loss_db"] = loss_ratios
182
 
183
+ # Spectral Flatness Difference (Reverberation Check)
184
+ flatness_diff = []
185
  for i in range(min_len):
186
+ n_flat = near_feats[i]["spectral_flatness"]
187
+ f_flat = far_feats[i]["spectral_flatness"]
188
+ flatness_diff.append(f_flat - n_flat) # Postive usually means more noise/reverb
189
+ results["flatness_increase"] = flatness_diff
190
 
191
+ # Spectral Overlap
 
 
 
 
 
 
 
 
 
 
192
  overlap_scores = []
193
  for i in range(min_len):
194
  near_spec = near_feats[i]["spectrum"].flatten()
 
200
  overlap_scores.append(overlap)
201
  results["spectral_overlap"] = overlap_scores
202
 
203
+ # Combined Quality Score (0 to 1 approximate)
204
+ # Higher overlap + Higher Cosine + Lower Loss = Better Quality
205
+ combined = []
 
 
 
 
 
 
206
  for i in range(min_len):
207
+ score = (results["spectral_overlap"][i] * 0.5)
208
+ if "cosine_similarity" in results:
209
+ score += (results["cosine_similarity"][i] * 0.5)
210
+ combined.append(score)
211
+ results["combined_match_score"] = combined
212
+
213
  return pd.DataFrame(results)
214
 
215
  # ----------------------------
216
+ # 5. Clustering & Visualization
217
  # ----------------------------
218
  def cluster_frames_custom(features_df, cluster_features, algo, n_clusters=5, eps=0.5):
219
  if not cluster_features:
220
+ return features_df
221
+
222
+ # Ensure selected features exist in DF
223
+ valid_features = [f for f in cluster_features if f in features_df.columns]
224
+ if not valid_features:
225
  return features_df
226
+
227
+ X = features_df[valid_features].values
228
+
229
+ # Handle NaN/Inf just in case
230
+ X = np.nan_to_num(X)
231
+
232
+ if len(X) < 5:
233
+ features_df["cluster"] = -1
234
+ return features_df
235
+
236
  if algo == "KMeans":
237
  n_clusters = min(n_clusters, len(X))
238
  model = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
 
245
  model = DBSCAN(eps=eps, min_samples=min(3, len(X)))
246
  labels = model.fit_predict(X)
247
  else:
248
+ labels = np.zeros(len(X))
249
+
250
  features_df = features_df.copy()
251
  features_df["cluster"] = labels
252
  return features_df
253
 
254
  def plot_spectral_difference(near_feats, far_feats, frame_idx=0):
255
+ if not near_feats or not far_feats:
256
+ fig = go.Figure(); fig.update_layout(title="No data"); return fig
257
+
258
+ safe_idx = min(frame_idx, len(near_feats)-1, len(far_feats)-1)
259
+
260
+ near_spec = near_feats[safe_idx]["spectrum"]
261
+ far_spec = far_feats[safe_idx]["spectrum"]
262
+
263
  min_freq_bins = min(near_spec.shape[0], far_spec.shape[0])
264
  diff = near_spec[:min_freq_bins] - far_spec[:min_freq_bins]
265
+
266
  fig = go.Figure(data=go.Heatmap(z=diff, colorscale='RdBu', zmid=0))
267
+ fig.update_layout(
268
+ title=f"Spectral Difference (Frame {safe_idx}) [Near - Far]",
269
+ yaxis_title="Frequency Bin",
270
+ xaxis_title="Time (within frame)",
271
+ height=350
272
+ )
 
 
 
 
273
  return fig
274
 
275
  # ----------------------------
276
+ # 6. Main Analysis Logic
277
  # ----------------------------
278
  def analyze_audio_pair(
279
  near_file, far_file,
 
281
  comparison_metrics, cluster_features, clustering_algo, n_clusters, dbscan_eps
282
  ):
283
  if not near_file or not far_file:
284
+ raise gr.Error("Please upload both audio files.")
285
+
286
+ # 1. Load Audio
287
+ # Load Near
288
  try:
289
  y_near, sr_near = librosa.load(near_file.name, sr=None)
290
+ except:
291
+ raise gr.Error("Failed to load Near Field audio.")
292
+
293
+ # Load Far (Force resample to match Near)
294
+ try:
295
+ y_far, sr_far = librosa.load(far_file.name, sr=sr_near)
296
+ except:
297
+ raise gr.Error("Failed to load Far Field audio.")
298
+
299
+ # 2. Normalize and Align (CRITICAL STEP)
300
+ y_near = librosa.util.normalize(y_near)
301
+ y_far = librosa.util.normalize(y_far)
302
+
303
+ gr.Info("Aligning signals (calculating time delay)...")
304
+ y_near, y_far = align_signals(y_near, y_far)
305
+
306
+ # 3. Segment
307
+ frames_near, _ = segment_audio(y_near, sr_near, frame_length_ms, hop_length_ms, window_type)
308
+ frames_far, _ = segment_audio(y_far, sr_near, frame_length_ms, hop_length_ms, window_type)
309
+
310
+ # 4. Extract
311
+ gr.Info("Extracting features...")
312
+ near_feats = extract_features_with_spectrum(frames_near, sr_near)
313
+ far_feats = extract_features_with_spectrum(frames_far, sr_near)
314
+
315
+ # 5. Compare
316
  comparison_df = compare_frames_enhanced(near_feats, far_feats, comparison_metrics)
317
+
318
+ # 6. Cluster (on Near field features usually, to classify phonemes)
319
  near_df = pd.DataFrame(near_feats).drop(columns=["spectrum"], errors="ignore")
320
  clustered_df = cluster_frames_custom(near_df, cluster_features, clustering_algo, n_clusters, dbscan_eps)
321
+
322
+ # 7. Visuals
323
+ metric_cols = [c for c in comparison_df.columns if c != "frame_index"]
324
+ if metric_cols:
325
+ plot_comparison = px.line(comparison_df, x="frame_index", y=metric_cols,
326
+ title="Frame-by-Frame Comparison Metrics")
327
+ else:
328
+ plot_comparison = px.line(title="No metrics selected")
329
+
330
+ if len(cluster_features) >= 2:
331
+ x_f, y_f = cluster_features[0], cluster_features[1]
332
+ plot_scatter = px.scatter(clustered_df, x=x_f, y=y_f, color="cluster",
333
+ title=f"Clustering Analysis (Near Field): {x_f} vs {y_f}")
334
+ else:
335
+ plot_scatter = px.scatter(title="Select at least 2 features to visualize clusters")
336
+
337
+ spec_heatmap = plot_spectral_difference(near_feats, far_feats, frame_idx=int(len(near_feats)/2))
338
+
339
+ # Metric Overlay: Combine Clustering with Quality
340
+ # Add combined score to clustered df for visualization
341
+ clustered_df["match_quality"] = comparison_df["combined_match_score"]
342
+
343
+ if len(cluster_features) > 0:
344
+ overlay_fig = px.scatter(clustered_df, x=cluster_features[0], y="match_quality",
345
+ color="cluster",
346
+ title=f"Cluster vs. Match Quality ({cluster_features[0]})")
347
  else:
348
+ overlay_fig = px.scatter(title="Not enough data for overlay")
349
+
 
350
  return plot_comparison, comparison_df, plot_scatter, clustered_df, spec_heatmap, overlay_fig
351
 
352
  def export_results(comparison_df, clustered_df):
 
358
  return [comp_path, cluster_path]
359
 
360
  # ----------------------------
361
+ # 7. Gradio UI
362
  # ----------------------------
363
+ # Expanded feature list for UI
364
+ feature_list = ["rms", "spectral_centroid", "zcr", "spectral_flatness",
365
+ "low_freq_energy", "mid_freq_energy", "high_freq_energy"] + \
366
+ [f"mfcc_{i}" for i in range(1, 14)]
367
 
368
+ with gr.Blocks(title="Corrected Near vs Far Field Analyzer", theme=gr.themes.Soft()) as demo:
369
+ gr.Markdown("""
370
+ # πŸŽ™οΈ Corrected Near vs Far Field Analyzer
371
+ **Now includes:** Automatic Time Alignment (Cross-Correlation), Normalization, and Reverb Detection.
372
+ """)
373
+
374
  with gr.Row():
375
+ with gr.Column():
376
+ near_file = gr.File(label="Near-Field Audio (Reference)", file_types=[".wav", ".mp3"])
377
+ with gr.Column():
378
+ far_file = gr.File(label="Far-Field Audio (Target)", file_types=[".wav", ".mp3"])
379
 
380
+ with gr.Accordion("βš™οΈ Analysis Settings", open=False):
381
+ with gr.Row():
382
+ frame_length_ms = gr.Slider(10, 200, value=30, step=5, label="Frame Length (ms)")
383
+ hop_length_ms = gr.Slider(5, 100, value=15, step=5, label="Hop Length (ms)")
384
  window_type = gr.Dropdown(["hann", "hamming", "rectangular"], value="hann", label="Window Type")
385
 
386
+ with gr.Accordion("πŸ“Š Metrics & Clustering", open=False):
387
  comparison_metrics = gr.CheckboxGroup(
388
+ choices=["Euclidean Distance", "Cosine Similarity", "High-Freq Loss Ratio"],
 
 
389
  value=["Cosine Similarity", "High-Freq Loss Ratio"],
390
+ label="Comparison Metrics"
391
  )
 
 
392
  cluster_features = gr.CheckboxGroup(
393
+ choices=feature_list,
394
+ value=["spectral_centroid", "spectral_flatness", "high_freq_energy"],
395
+ label="Features for Clustering (Select >= 2)"
396
+ )
397
+ with gr.Row():
398
+ clustering_algo = gr.Dropdown(["KMeans", "Agglomerative", "DBSCAN"], value="KMeans", label="Algorithm")
399
+ n_clusters = gr.Slider(2, 10, value=4, step=1, label="Num Clusters")
400
+ dbscan_eps = gr.Slider(0.1, 5.0, value=0.5, label="DBSCAN Epsilon")
401
 
402
+ btn = gr.Button("πŸš€ Align & Analyze", variant="primary")
403
 
404
  with gr.Tabs():
405
+ with gr.Tab("πŸ“ˆ Time Series Comparison"):
406
+ comp_plot = gr.Plot()
407
+ comp_table = gr.Dataframe(height=200)
408
+ with gr.Tab("🧩 Phoneme Clustering"):
409
+ cluster_plot = gr.Plot()
410
+ cluster_table = gr.Dataframe(height=200)
411
+ with gr.Tab("πŸ” Spectral Check"):
412
+ gr.Markdown("Difference Heatmap (Near - Far). Blue = Near has more energy. Red = Far has more energy.")
413
+ spec_heatmap = gr.Plot()
414
+ with gr.Tab("🧭 Quality Overlay"):
415
+ overlay_plot = gr.Plot()
416
 
417
  with gr.Tab("πŸ“€ Export"):
418
+ export_btn = gr.Button("πŸ’Ύ Download Results")
419
+ export_files = gr.Files()
420
 
421
  btn.click(fn=analyze_audio_pair,
422
  inputs=[near_file, far_file, frame_length_ms, hop_length_ms, window_type,
423
  comparison_metrics, cluster_features, clustering_algo, n_clusters, dbscan_eps],
424
  outputs=[comp_plot, comp_table, cluster_plot, cluster_table, spec_heatmap, overlay_plot])
425
+
426
  export_btn.click(fn=export_results, inputs=[comp_table, cluster_table], outputs=export_files)
427
 
428
  if __name__ == "__main__":
429
+ demo.launch()