AdityaK007 commited on
Commit
5b17c9f
Β·
verified Β·
1 Parent(s): d1022e8

Create app1.py

Browse files
Files changed (1) hide show
  1. app1.py +478 -0
app1.py ADDED
@@ -0,0 +1,478 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import librosa
3
+ import numpy as np
4
+ import pandas as pd
5
+ from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
6
+ from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances
7
+ from scipy.spatial.distance import jensenshannon
8
+ from scipy.stats import pearsonr
9
+ from scipy.signal import get_window as scipy_get_window
10
+ import plotly.express as px
11
+ import plotly.graph_objects as go
12
+ import os
13
+ import tempfile
14
+
15
+ # ----------------------------
16
+ # Fixed: Added missing segment_audio function
17
+ # ----------------------------
18
+
19
+ def segment_audio(y, sr, frame_length_ms, hop_length_ms, window_type="hann"):
20
+ """Segment audio into frames with specified windowing"""
21
+ frame_length = int(frame_length_ms * sr / 1000)
22
+ hop_length = int(hop_length_ms * sr / 1000)
23
+
24
+ # Get window function
25
+ if window_type == "rectangular":
26
+ window = scipy_get_window('boxcar', frame_length)
27
+ else:
28
+ window = scipy_get_window(window_type, frame_length)
29
+
30
+ frames = []
31
+ for i in range(0, len(y) - frame_length + 1, hop_length):
32
+ frame = y[i:i + frame_length] * window
33
+ frames.append(frame)
34
+
35
+ # Convert to 2D array (frames x samples)
36
+ if frames:
37
+ frames = np.array(frames).T
38
+ else:
39
+ # If audio is too short, create at least one frame with zero-padding
40
+ frames = np.zeros((frame_length, 1))
41
+
42
+ return frames, frame_length
43
+
44
+ # ----------------------------
45
+ # Enhanced Feature Extraction (with spectral bins)
46
+ # ----------------------------
47
+
48
+ def extract_features_with_spectrum(frames, sr):
49
+ features = []
50
+ n_mfcc = 13
51
+ n_fft = min(2048, frames.shape[0]) # Fixed: Ensure n_fft <= frame length
52
+
53
+ for i in range(frames.shape[1]):
54
+ frame = frames[:, i]
55
+
56
+ # Skip if frame is too short or silent
57
+ if len(frame) < n_fft or np.max(np.abs(frame)) < 1e-10:
58
+ continue
59
+
60
+ feat = {}
61
+
62
+ # Basic features with error handling
63
+ try:
64
+ rms = np.mean(librosa.feature.rms(y=frame)[0])
65
+ feat["rms"] = float(rms)
66
+ except:
67
+ feat["rms"] = 0.0
68
+
69
+ try:
70
+ sc = np.mean(librosa.feature.spectral_centroid(y=frame, sr=sr)[0])
71
+ feat["spectral_centroid"] = float(sc)
72
+ except:
73
+ feat["spectral_centroid"] = 0.0
74
+
75
+ try:
76
+ zcr = np.mean(librosa.feature.zero_crossing_rate(frame)[0])
77
+ feat["zcr"] = float(zcr)
78
+ except:
79
+ feat["zcr"] = 0.0
80
+
81
+ try:
82
+ mfccs = librosa.feature.mfcc(y=frame, sr=sr, n_mfcc=n_mfcc, n_fft=n_fft)
83
+ for j in range(n_mfcc):
84
+ feat[f"mfcc_{j+1}"] = float(np.mean(mfccs[j]))
85
+ except:
86
+ for j in range(n_mfcc):
87
+ feat[f"mfcc_{j+1}"] = 0.0
88
+
89
+ # Spectral bins for lost frequencies
90
+ try:
91
+ S = np.abs(librosa.stft(frame, n_fft=n_fft))
92
+ S_db = librosa.amplitude_to_db(S, ref=np.max)
93
+ freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
94
+
95
+ # Split spectrum: low (<2kHz), mid (2-4kHz), high (>4kHz)
96
+ low_mask = freqs <= 2000
97
+ mid_mask = (freqs > 2000) & (freqs <= 4000)
98
+ high_mask = freqs > 4000
99
+
100
+ feat["low_freq_energy"] = float(np.mean(S_db[low_mask])) if np.any(low_mask) else 0.0
101
+ feat["mid_freq_energy"] = float(np.mean(S_db[mid_mask])) if np.any(mid_mask) else 0.0
102
+ feat["high_freq_energy"] = float(np.mean(S_db[high_mask])) if np.any(high_mask) else 0.0
103
+
104
+ # Store full spectrum for later (optional)
105
+ feat["spectrum"] = S_db # will be used for heatmap
106
+ except:
107
+ feat["low_freq_energy"] = 0.0
108
+ feat["mid_freq_energy"] = 0.0
109
+ feat["high_freq_energy"] = 0.0
110
+ feat["spectrum"] = np.zeros((n_fft // 2 + 1, 1))
111
+
112
+ features.append(feat)
113
+
114
+ # Handle case where no features were extracted
115
+ if not features:
116
+ # Create one dummy feature set to avoid errors
117
+ feat = {
118
+ "rms": 0.0, "spectral_centroid": 0.0, "zcr": 0.0,
119
+ "low_freq_energy": 0.0, "mid_freq_energy": 0.0, "high_freq_energy": 0.0,
120
+ "spectrum": np.zeros((n_fft // 2 + 1, 1))
121
+ }
122
+ for j in range(n_mfcc):
123
+ feat[f"mfcc_{j+1}"] = 0.0
124
+ features.append(feat)
125
+
126
+ return features
127
+
128
+ def compare_frames_enhanced(near_feats, far_feats, metrics):
129
+ min_len = min(len(near_feats), len(far_feats))
130
+ if min_len == 0:
131
+ return pd.DataFrame({"frame_index": []})
132
+
133
+ results = {"frame_index": list(range(min_len))}
134
+
135
+ # Prepare vectors
136
+ near_df = pd.DataFrame([f for f in near_feats[:min_len]])
137
+ far_df = pd.DataFrame([f for f in far_feats[:min_len]])
138
+
139
+ # Remove non-numeric columns
140
+ near_vec = near_df.drop(columns=["spectrum"], errors="ignore").values
141
+ far_vec = far_df.drop(columns=["spectrum"], errors="ignore").values
142
+
143
+ # 1. Euclidean Distance
144
+ if "Euclidean Distance" in metrics:
145
+ results["euclidean_dist"] = np.linalg.norm(near_vec - far_vec, axis=1).tolist()
146
+
147
+ # 2. Cosine Similarity
148
+ if "Cosine Similarity" in metrics:
149
+ cos_vals = []
150
+ for i in range(min_len):
151
+ a, b = near_vec[i].reshape(1, -1), far_vec[i].reshape(1, -1)
152
+ # Handle zero vectors
153
+ if np.all(a == 0) and np.all(b == 0):
154
+ cos_vals.append(1.0)
155
+ elif np.all(a == 0) or np.all(b == 0):
156
+ cos_vals.append(0.0)
157
+ else:
158
+ cos_vals.append(float(cosine_similarity(a, b)[0][0]))
159
+ results["cosine_similarity"] = cos_vals
160
+
161
+ # 3. Pearson Correlation
162
+ if "Pearson Correlation" in metrics:
163
+ corr_vals = []
164
+ for i in range(min_len):
165
+ try:
166
+ corr, _ = pearsonr(near_vec[i], far_vec[i])
167
+ corr_vals.append(float(corr) if not np.isnan(corr) else 0.0)
168
+ except:
169
+ corr_vals.append(0.0)
170
+ results["pearson_corr"] = corr_vals
171
+
172
+ # 4. KL Divergence (on normalized features)
173
+ if "KL Divergence" in metrics:
174
+ kl_vals = []
175
+ for i in range(min_len):
176
+ try:
177
+ p = near_vec[i] - near_vec[i].min() + 1e-8
178
+ q = far_vec[i] - far_vec[i].min() + 1e-8
179
+ p /= p.sum()
180
+ q /= q.sum()
181
+ kl = np.sum(p * np.log(p / q))
182
+ kl_vals.append(float(kl))
183
+ except:
184
+ kl_vals.append(0.0)
185
+ results["kl_divergence"] = kl_vals
186
+
187
+ # 5. Jensen-Shannon Divergence (symmetric, safer)
188
+ if "Jensen-Shannon Divergence" in metrics:
189
+ js_vals = []
190
+ for i in range(min_len):
191
+ try:
192
+ p = near_vec[i] - near_vec[i].min() + 1e-8
193
+ q = far_vec[i] - far_vec[i].min() + 1e-8
194
+ p /= p.sum()
195
+ q /= q.sum()
196
+ js = jensenshannon(p, q)
197
+ js_vals.append(float(js))
198
+ except:
199
+ js_vals.append(0.0)
200
+ results["js_divergence"] = js_vals
201
+
202
+ # 6. Lost High Frequencies Ratio
203
+ if "High-Freq Loss Ratio" in metrics:
204
+ loss_ratios = []
205
+ for i in range(min_len):
206
+ try:
207
+ near_high = near_feats[i]["high_freq_energy"]
208
+ far_high = far_feats[i]["high_freq_energy"]
209
+ # Ratio: how much high-freq energy is lost (positive = loss)
210
+ ratio = near_high - far_high # in dB
211
+ loss_ratios.append(float(ratio))
212
+ except:
213
+ loss_ratios.append(0.0)
214
+ results["high_freq_loss_db"] = loss_ratios
215
+
216
+ # 7. Spectral Centroid Shift
217
+ if "Spectral Centroid Shift" in metrics:
218
+ shifts = []
219
+ for i in range(min_len):
220
+ try:
221
+ shift = near_feats[i]["spectral_centroid"] - far_feats[i]["spectral_centroid"]
222
+ shifts.append(float(shift))
223
+ except:
224
+ shifts.append(0.0)
225
+ results["centroid_shift"] = shifts
226
+
227
+ return pd.DataFrame(results)
228
+
229
+ def cluster_frames_custom(features_df, cluster_features, algo, n_clusters=5, eps=0.5):
230
+ if not cluster_features:
231
+ raise gr.Error("Please select at least one feature for clustering.")
232
+
233
+ if len(features_df) == 0:
234
+ features_df["cluster"] = []
235
+ return features_df
236
+
237
+ X = features_df[cluster_features].values
238
+
239
+ if algo == "KMeans":
240
+ n_clusters = min(n_clusters, len(X)) # Fixed: Cannot have more clusters than samples
241
+ model = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
242
+ labels = model.fit_predict(X)
243
+ elif algo == "Agglomerative":
244
+ n_clusters = min(n_clusters, len(X))
245
+ model = AgglomerativeClustering(n_clusters=n_clusters)
246
+ labels = model.fit_predict(X)
247
+ elif algo == "DBSCAN":
248
+ # Fixed: DBSCAN doesn't use n_clusters parameter
249
+ model = DBSCAN(eps=eps, min_samples=min(3, len(X)))
250
+ labels = model.fit_predict(X)
251
+ else:
252
+ raise ValueError("Unknown clustering algorithm")
253
+
254
+ features_df = features_df.copy()
255
+ features_df["cluster"] = labels
256
+ return features_df
257
+
258
+ def plot_spectral_difference(near_feats, far_feats, frame_idx=0):
259
+ if not near_feats or not far_feats or frame_idx >= len(near_feats) or frame_idx >= len(far_feats):
260
+ # Return empty plot
261
+ fig = go.Figure()
262
+ fig.update_layout(title="No data available for spectral analysis", height=300)
263
+ return fig
264
+
265
+ near_spec = near_feats[frame_idx]["spectrum"]
266
+ far_spec = far_feats[frame_idx]["spectrum"]
267
+
268
+ # Ensure both spectrograms have the same shape
269
+ min_freq_bins = min(near_spec.shape[0], far_spec.shape[0])
270
+ near_spec = near_spec[:min_freq_bins]
271
+ far_spec = far_spec[:min_freq_bins]
272
+
273
+ diff = near_spec - far_spec # positive = energy lost in far-field
274
+
275
+ fig = go.Figure(data=go.Heatmap(
276
+ z=diff, # Fixed: Removed extra list brackets
277
+ colorscale='RdBu',
278
+ zmid=0,
279
+ colorbar=dict(title="dB Difference")
280
+ ))
281
+ fig.update_layout(
282
+ title=f"Spectral Difference (Frame {frame_idx}): Near - Far",
283
+ xaxis_title="Time Frames",
284
+ yaxis_title="Frequency Bins",
285
+ height=300
286
+ )
287
+ return fig
288
+
289
+ # ----------------------------
290
+ # Main Analysis Function
291
+ # ----------------------------
292
+
293
+ def analyze_audio_pair(
294
+ near_file,
295
+ far_file,
296
+ frame_length_ms,
297
+ hop_length_ms,
298
+ window_type,
299
+ comparison_metrics,
300
+ cluster_features,
301
+ clustering_algo,
302
+ n_clusters,
303
+ dbscan_eps
304
+ ):
305
+ if not near_file or not far_file:
306
+ raise gr.Error("Upload both audio files.")
307
+
308
+ try:
309
+ # Fixed: Use librosa.load instead of non-existent librosa.load_audio
310
+ y_near, sr_near = librosa.load(near_file.name, sr=None)
311
+ y_far, sr_far = librosa.load(far_file.name, sr=None)
312
+ except Exception as e:
313
+ raise gr.Error(f"Error loading audio files: {str(e)}")
314
+
315
+ if sr_near != sr_far:
316
+ y_far = librosa.resample(y_far, orig_sr=sr_far, target_sr=sr_near)
317
+ sr = sr_near
318
+ else:
319
+ sr = sr_near
320
+
321
+ frames_near, frame_length = segment_audio(y_near, sr, frame_length_ms, hop_length_ms, window_type)
322
+ frames_far, _ = segment_audio(y_far, sr, frame_length_ms, hop_length_ms, window_type)
323
+
324
+ near_feats = extract_features_with_spectrum(frames_near, sr)
325
+ far_feats = extract_features_with_spectrum(frames_far, sr)
326
+
327
+ # Comparison
328
+ comparison_df = compare_frames_enhanced(near_feats, far_feats, comparison_metrics)
329
+
330
+ # Clustering (on near-field)
331
+ near_df = pd.DataFrame(near_feats)
332
+ near_df = near_df.drop(columns=["spectrum"], errors="ignore")
333
+ clustered_df = cluster_frames_custom(near_df, cluster_features, clustering_algo, n_clusters, dbscan_eps)
334
+
335
+ # Plots
336
+ plot_comparison = None
337
+ if comparison_df.shape[1] > 1 and len(comparison_df) > 0:
338
+ metric_cols = [col for col in comparison_df.columns if col != "frame_index"]
339
+ if metric_cols:
340
+ metric_to_plot = metric_cols[0]
341
+ plot_comparison = px.line(
342
+ comparison_df,
343
+ x="frame_index",
344
+ y=metric_to_plot,
345
+ title=f"{metric_to_plot.replace('_', ' ').title()} Over Time"
346
+ )
347
+ else:
348
+ plot_comparison = px.line(title="No comparison metrics available")
349
+ else:
350
+ plot_comparison = px.line(title="No comparison data available")
351
+
352
+ # Scatter: user-selected features
353
+ plot_scatter = None
354
+ if len(cluster_features) >= 2 and len(clustered_df) > 0:
355
+ x_feat, y_feat = cluster_features[0], cluster_features[1]
356
+ if x_feat in clustered_df.columns and y_feat in clustered_df.columns:
357
+ plot_scatter = px.scatter(
358
+ clustered_df,
359
+ x=x_feat,
360
+ y=y_feat,
361
+ color="cluster",
362
+ title=f"Clustering: {x_feat} vs {y_feat}",
363
+ hover_data=["cluster"]
364
+ )
365
+ else:
366
+ plot_scatter = px.scatter(title="Selected features not available in data")
367
+ else:
368
+ plot_scatter = px.scatter(title="Select β‰₯2 features for scatter plot")
369
+
370
+ # Spectral difference heatmap (first frame)
371
+ spec_heatmap = plot_spectral_difference(near_feats, far_feats, frame_idx=0)
372
+
373
+ return (
374
+ plot_comparison,
375
+ comparison_df,
376
+ plot_scatter,
377
+ clustered_df,
378
+ spec_heatmap
379
+ )
380
+
381
+ def export_results(comparison_df, clustered_df):
382
+ temp_dir = tempfile.mkdtemp()
383
+ comp_path = os.path.join(temp_dir, "frame_comparisons.csv")
384
+ cluster_path = os.path.join(temp_dir, "clustered_frames.csv")
385
+ comparison_df.to_csv(comp_path, index=False)
386
+ clustered_df.to_csv(cluster_path, index=False)
387
+ return [comp_path, cluster_path]
388
+
389
+ # ----------------------------
390
+ # Gradio UI
391
+ # ----------------------------
392
+
393
+ # Get feature names dynamically
394
+ dummy_features = ["rms", "spectral_centroid", "zcr"] + [f"mfcc_{i}" for i in range(1,14)] + \
395
+ ["low_freq_energy", "mid_freq_energy", "high_freq_energy"]
396
+
397
+ with gr.Blocks(title="Advanced Near vs Far Field Analyzer") as demo:
398
+ gr.Markdown("# πŸŽ™οΈ Advanced Near vs Far Field Speech Analyzer")
399
+ gr.Markdown("Upload simultaneous recordings. Analyze **lost frequencies**, **frame degradation**, and **cluster by custom attributes**.")
400
+
401
+ with gr.Row():
402
+ near_file = gr.File(label="Near-Field Audio (.wav)", file_types=[".wav"])
403
+ far_file = gr.File(label="Far-Field Audio (.wav)", file_types=[".wav"])
404
+
405
+ with gr.Accordion("βš™οΈ Frame Settings", open=True):
406
+ frame_length_ms = gr.Slider(10, 500, value=50, step=1, label="Frame Length (ms)")
407
+ hop_length_ms = gr.Slider(1, 250, value=25, step=1, label="Hop Length (ms)")
408
+ window_type = gr.Dropdown(["hann", "hamming", "rectangular"], value="hann", label="Window Type")
409
+
410
+ with gr.Accordion("πŸ“Š Comparison Metrics", open=True):
411
+ comparison_metrics = gr.CheckboxGroup(
412
+ choices=[
413
+ "Euclidean Distance",
414
+ "Cosine Similarity",
415
+ "Pearson Correlation",
416
+ "KL Divergence",
417
+ "Jensen-Shannon Divergence",
418
+ "High-Freq Loss Ratio",
419
+ "Spectral Centroid Shift"
420
+ ],
421
+ value=["High-Freq Loss Ratio", "Cosine Similarity"],
422
+ label="Select Comparison Metrics"
423
+ )
424
+
425
+ with gr.Accordion("🧩 Clustering Configuration", open=True):
426
+ cluster_features = gr.CheckboxGroup(
427
+ choices=dummy_features,
428
+ value=["rms", "spectral_centroid", "high_freq_energy"],
429
+ label="Features to Use for Clustering"
430
+ )
431
+ clustering_algo = gr.Radio(
432
+ ["KMeans", "Agglomerative", "DBSCAN"],
433
+ value="KMeans",
434
+ label="Clustering Algorithm"
435
+ )
436
+ n_clusters = gr.Slider(2, 20, value=5, step=1, label="Number of Clusters (for KMeans/Agglomerative)")
437
+ dbscan_eps = gr.Slider(0.1, 2.0, value=0.5, step=0.1, label="DBSCAN eps (neighborhood radius)")
438
+
439
+ btn = gr.Button("πŸš€ Analyze")
440
+
441
+ with gr.Tabs():
442
+ with gr.Tab("πŸ“ˆ Frame Comparison"):
443
+ comp_plot = gr.Plot()
444
+ comp_table = gr.Dataframe()
445
+
446
+ with gr.Tab("🧩 Clustering"):
447
+ cluster_plot = gr.Plot()
448
+ cluster_table = gr.Dataframe()
449
+
450
+ with gr.Tab("πŸ” Spectral Analysis"):
451
+ spec_heatmap = gr.Plot(label="Spectral Difference (Near - Far)")
452
+
453
+ with gr.Tab("πŸ“€ Export"):
454
+ export_btn = gr.Button("πŸ’Ύ Download CSVs")
455
+ export_files = gr.Files()
456
+
457
+ btn.click(
458
+ fn=analyze_audio_pair,
459
+ inputs=[
460
+ near_file, far_file,
461
+ frame_length_ms, hop_length_ms, window_type,
462
+ comparison_metrics,
463
+ cluster_features,
464
+ clustering_algo,
465
+ n_clusters,
466
+ dbscan_eps
467
+ ],
468
+ outputs=[comp_plot, comp_table, cluster_plot, cluster_table, spec_heatmap]
469
+ )
470
+
471
+ export_btn.click(
472
+ fn=export_results,
473
+ inputs=[comp_table, cluster_table],
474
+ outputs=export_files
475
+ )
476
+
477
+ if __name__ == "__main__":
478
+ demo.launch()