AdityaK007 commited on
Commit
8230f46
·
verified ·
1 Parent(s): 3a782b8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +167 -527
app.py CHANGED
@@ -1,538 +1,178 @@
 
1
  import gradio as gr
2
- import librosa
3
- import numpy as np
4
- import pandas as pd
5
- from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
6
- from sklearn.metrics.pairwise import cosine_similarity
7
- from scipy.spatial.distance import jensenshannon
8
- from scipy.stats import pearsonr
9
- from scipy.signal import get_window as scipy_get_window
10
- import plotly.express as px
11
- import plotly.graph_objects as go
12
- import os
13
- import tempfile
14
-
15
- # ----------------------------
16
- # Audio Segmentation
17
- # ----------------------------
18
-
19
- def segment_audio(y, sr, frame_length_ms, hop_length_ms, window_type="hann"):
20
- """Segment audio into frames with specified windowing"""
21
- frame_length = int(frame_length_ms * sr / 1000)
22
- hop_length = int(hop_length_ms * sr / 1000)
23
-
24
- if frame_length > len(y):
25
- frame_length = len(y)
26
- hop_length = max(1, frame_length // 2)
27
-
28
- # Get window function
29
- if window_type == "rectangular":
30
- window = scipy_get_window('boxcar', frame_length)
31
- else:
32
- window = scipy_get_window(window_type, frame_length)
33
-
34
- frames = []
35
- for i in range(0, len(y) - frame_length + 1, hop_length):
36
- frame = y[i:i + frame_length] * window
37
- frames.append(frame)
38
-
39
- # Convert to 2D array (frames x samples)
40
- if frames:
41
- frames = np.array(frames).T
42
- else:
43
- # If audio is too short, create at least one frame with zero-padding
44
- frames = np.zeros((frame_length, 1))
45
-
46
- return frames, frame_length
47
-
48
- # ----------------------------
49
- # Enhanced Feature Extraction
50
- # ----------------------------
51
-
52
- def extract_features_with_spectrum(frames, sr):
53
- features = []
54
- n_mfcc = 13
55
- n_fft = min(2048, frames.shape[0])
56
-
57
- for i in range(frames.shape[1]):
58
- frame = frames[:, i]
59
-
60
- # Skip if frame is too short or silent
61
- if len(frame) < n_fft or np.max(np.abs(frame)) < 1e-10:
62
- continue
63
-
64
- feat = {}
65
-
66
- # Basic features
67
- try:
68
- rms = np.mean(librosa.feature.rms(y=frame)[0])
69
- feat["rms"] = float(rms)
70
- except:
71
- feat["rms"] = 0.0
72
-
73
- try:
74
- sc = np.mean(librosa.feature.spectral_centroid(y=frame, sr=sr)[0])
75
- feat["spectral_centroid"] = float(sc)
76
- except:
77
- feat["spectral_centroid"] = 0.0
78
-
79
- try:
80
- zcr = np.mean(librosa.feature.zero_crossing_rate(frame)[0])
81
- feat["zcr"] = float(zcr)
82
- except:
83
- feat["zcr"] = 0.0
84
-
85
- try:
86
- mfccs = librosa.feature.mfcc(y=frame, sr=sr, n_mfcc=n_mfcc, n_fft=n_fft)
87
- for j in range(n_mfcc):
88
- feat[f"mfcc_{j+1}"] = float(np.mean(mfccs[j]))
89
- except:
90
- for j in range(n_mfcc):
91
- feat[f"mfcc_{j+1}"] = 0.0
92
-
93
- # Spectral features for quality assessment
94
- try:
95
- S = np.abs(librosa.stft(frame, n_fft=n_fft))
96
- S_db = librosa.amplitude_to_db(S, ref=np.max)
97
- freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
98
-
99
- # Frequency bands for quality assessment
100
- low_mask = freqs <= 500
101
- mid_mask = (freqs > 500) & (freqs <= 4000) # Speech range
102
- high_mask = freqs > 4000
103
-
104
- feat["low_freq_energy"] = float(np.mean(S_db[low_mask])) if np.any(low_mask) else -80.0
105
- feat["mid_freq_energy"] = float(np.mean(S_db[mid_mask])) if np.any(mid_mask) else -80.0
106
- feat["high_freq_energy"] = float(np.mean(S_db[high_mask])) if np.any(high_mask) else -80.0
107
-
108
- # Spectral rolloff (85%)
109
- rolloff = np.mean(librosa.feature.spectral_rolloff(y=frame, sr=sr, roll_percent=0.85)[0])
110
- feat["spectral_rolloff"] = float(rolloff)
111
-
112
- # Spectral bandwidth
113
- bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=frame, sr=sr)[0])
114
- feat["spectral_bandwidth"] = float(bandwidth)
115
-
116
- # Spectral flatness (noisiness)
117
- flatness = np.mean(librosa.feature.spectral_flatness(y=frame)[0])
118
- feat["spectral_flatness"] = float(flatness)
119
-
120
- feat["spectrum"] = S_db
121
- except:
122
- feat["low_freq_energy"] = -80.0
123
- feat["mid_freq_energy"] = -80.0
124
- feat["high_freq_energy"] = -80.0
125
- feat["spectral_rolloff"] = 0.0
126
- feat["spectral_bandwidth"] = 0.0
127
- feat["spectral_flatness"] = 0.0
128
- feat["spectrum"] = np.zeros((n_fft // 2 + 1, 1))
129
-
130
- features.append(feat)
131
-
132
- if not features:
133
- feat = {
134
- "rms": 0.0, "spectral_centroid": 0.0, "zcr": 0.0,
135
- "low_freq_energy": -80.0, "mid_freq_energy": -80.0, "high_freq_energy": -80.0,
136
- "spectral_rolloff": 0.0, "spectral_bandwidth": 0.0, "spectral_flatness": 0.0,
137
- "spectrum": np.zeros((n_fft // 2 + 1, 1))
138
- }
139
- for j in range(n_mfcc):
140
- feat[f"mfcc_{j+1}"] = 0.0
141
- features.append(feat)
142
-
143
- return features
144
-
145
- # ----------------------------
146
- # Frame-wise Quality Metrics (0-1 scale)
147
- # ----------------------------
148
-
149
- def calculate_frame_quality_metrics(near_feats, far_feats):
150
- """Calculate multiple quality metrics between 0 and 1 for each frame"""
151
- min_len = min(len(near_feats), len(far_feats))
152
- if min_len == 0:
153
- return pd.DataFrame({"frame_index": []})
154
-
155
- results = {"frame_index": list(range(min_len))}
156
-
157
- # Prepare feature vectors (excluding spectrum)
158
- near_df = pd.DataFrame([f for f in near_feats[:min_len]])
159
- far_df = pd.DataFrame([f for f in far_feats[:min_len]])
160
- feature_cols = [col for col in near_df.columns if col != "spectrum"]
161
- near_vec = near_df[feature_cols].values
162
- far_vec = far_df[feature_cols].values
163
-
164
- # 1. Spectral Similarity Score (0-1)
165
- spectral_scores = []
166
- for i in range(min_len):
167
- try:
168
- # Compare spectral distributions using cosine similarity
169
- near_spectral = np.array([near_feats[i]["low_freq_energy"],
170
- near_feats[i]["mid_freq_energy"],
171
- near_feats[i]["high_freq_energy"]])
172
- far_spectral = np.array([far_feats[i]["low_freq_energy"],
173
- far_feats[i]["mid_freq_energy"],
174
- far_feats[i]["high_freq_energy"]])
175
-
176
- # Convert to positive values and normalize
177
- near_spectral = near_spectral - near_spectral.min() + 1e-8
178
- far_spectral = far_spectral - far_spectral.min() + 1e-8
179
- near_spectral = near_spectral / near_spectral.sum()
180
- far_spectral = far_spectral / far_spectral.sum()
181
-
182
- # Use cosine similarity on spectral distribution
183
- spec_sim = cosine_similarity([near_spectral], [far_spectral])[0][0]
184
- spectral_scores.append(max(0, min(1, spec_sim)))
185
- except:
186
- spectral_scores.append(0.5)
187
- results["spectral_similarity"] = spectral_scores
188
-
189
- # 2. High-Frequency Preservation Score (0-1)
190
- hf_scores = []
191
- for i in range(min_len):
192
- try:
193
- near_hf = near_feats[i]["high_freq_energy"]
194
- far_hf = far_feats[i]["high_freq_energy"]
195
-
196
- # Normalize HF energy difference (assuming -80dB to 0dB range)
197
- hf_diff = near_hf - far_hf
198
- # Convert to 0-1 scale: 0dB difference = 1.0, 40dB loss = 0.0
199
- hf_score = max(0, min(1, 1.0 - (max(0, hf_diff) / 40.0)))
200
- hf_scores.append(hf_score)
201
- except:
202
- hf_scores.append(0.5)
203
- results["high_freq_preservation"] = hf_scores
204
-
205
- # 3. MFCC Structural Similarity (0-1)
206
- mfcc_scores = []
207
- for i in range(min_len):
208
  try:
209
- # Extract MFCC features
210
- near_mfcc = np.array([near_feats[i][f"mfcc_{j+1}"] for j in range(13)])
211
- far_mfcc = np.array([far_feats[i][f"mfcc_{j+1}"] for j in range(13)])
212
-
213
- # Normalize and compute cosine similarity
214
- near_mfcc_norm = (near_mfcc - near_mfcc.mean()) / (near_mfcc.std() + 1e-8)
215
- far_mfcc_norm = (far_mfcc - far_mfcc.mean()) / (far_mfcc.std() + 1e-8)
216
-
217
- mfcc_sim = cosine_similarity([near_mfcc_norm], [far_mfcc_norm])[0][0]
218
- mfcc_scores.append(max(0, min(1, (mfcc_sim + 1) / 2))) # Convert -1:1 to 0:1
219
  except:
220
- mfcc_scores.append(0.5)
221
- results["mfcc_similarity"] = mfcc_scores
222
-
223
- # 4. Temporal Consistency Score (RMS stability)
224
- temporal_scores = []
225
- for i in range(min_len):
226
  try:
227
- near_rms = near_feats[i]["rms"]
228
- far_rms = far_feats[i]["rms"]
229
-
230
- # Ratio of RMS energies (closer to 1 is better)
231
- rms_ratio = min(near_rms, far_rms) / (max(near_rms, far_rms) + 1e-8)
232
- temporal_scores.append(float(rms_ratio))
233
  except:
234
- temporal_scores.append(0.5)
235
- results["temporal_consistency"] = temporal_scores
236
-
237
- # 5. Spectral Centroid Stability (0-1)
238
- centroid_scores = []
239
- for i in range(min_len):
240
  try:
241
- near_sc = near_feats[i]["spectral_centroid"]
242
- far_sc = far_feats[i]["spectral_centroid"]
243
-
244
- # Ratio of spectral centroids
245
- sc_ratio = min(near_sc, far_sc) / (max(near_sc, far_sc) + 1e-8)
246
- centroid_scores.append(float(sc_ratio))
247
  except:
248
- centroid_scores.append(0.5)
249
- results["spectral_centroid_stability"] = centroid_scores
250
-
251
- # 6. Overall Audio Quality Score (Compound Metric)
252
- quality_scores = []
253
- for i in range(min_len):
254
- # Weighted combination of all metrics
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  weights = {
256
- 'spectral_similarity': 0.25, # Spectral distribution match
257
- 'high_freq_preservation': 0.30, # HF content preservation (most important)
258
- 'mfcc_similarity': 0.20, # Structural similarity
259
- 'temporal_consistency': 0.15, # Amplitude consistency
260
- 'spectral_centroid_stability': 0.10 # Spectral shape stability
261
  }
262
-
263
- total_score = 0
264
- for metric, weight in weights.items():
265
- total_score += results[metric][i] * weight
266
-
267
- quality_scores.append(max(0, min(1, total_score)))
268
-
269
- results["overall_quality"] = quality_scores
270
-
271
- # 7. Quality Degradation Level
272
- degradation_levels = []
273
- for score in quality_scores:
274
- if score >= 0.8:
275
- degradation_levels.append("Excellent")
276
- elif score >= 0.6:
277
- degradation_levels.append("Good")
278
- elif score >= 0.4:
279
- degradation_levels.append("Moderate")
280
- elif score >= 0.2:
281
- degradation_levels.append("Poor")
282
- else:
283
- degradation_levels.append("Very Poor")
284
-
285
- results["degradation_level"] = degradation_levels
286
-
287
- return pd.DataFrame(results)
288
-
289
- # ----------------------------
290
- # Clustering and Visualization
291
- # ----------------------------
292
-
293
- def cluster_frames_custom(features_df, cluster_features, algo, n_clusters=5, eps=0.5):
294
- if not cluster_features:
295
- raise gr.Error("Please select at least one feature for clustering.")
296
-
297
- if len(features_df) == 0:
298
- features_df["cluster"] = []
299
- return features_df
300
-
301
- X = features_df[cluster_features].values
302
-
303
- if algo == "KMeans":
304
- n_clusters = min(n_clusters, len(X))
305
- model = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
306
- labels = model.fit_predict(X)
307
- elif algo == "Agglomerative":
308
- n_clusters = min(n_clusters, len(X))
309
- model = AgglomerativeClustering(n_clusters=n_clusters)
310
- labels = model.fit_predict(X)
311
- elif algo == "DBSCAN":
312
- model = DBSCAN(eps=eps, min_samples=min(3, len(X)))
313
- labels = model.fit_predict(X)
314
  else:
315
- raise ValueError("Unknown clustering algorithm")
316
-
317
- features_df = features_df.copy()
318
- features_df["cluster"] = labels
319
- return features_df
320
-
321
- def plot_spectral_difference(near_feats, far_feats, frame_idx=0):
322
- if not near_feats or not far_feats or frame_idx >= len(near_feats) or frame_idx >= len(far_feats):
323
- fig = go.Figure()
324
- fig.update_layout(title="No data available for spectral analysis", height=300)
325
- return fig
326
-
327
- near_spec = near_feats[frame_idx]["spectrum"]
328
- far_spec = far_feats[frame_idx]["spectrum"]
329
-
330
- min_freq_bins = min(near_spec.shape[0], far_spec.shape[0])
331
- min_time_frames = min(near_spec.shape[1], far_spec.shape[1])
332
- near_spec = near_spec[:min_freq_bins, :min_time_frames]
333
- far_spec = far_spec[:min_freq_bins, :min_time_frames]
334
-
335
- diff = near_spec - far_spec
336
-
337
- fig = go.Figure(data=go.Heatmap(
338
- z=diff,
339
- colorscale='RdBu',
340
- zmid=0,
341
- colorbar=dict(title="dB Difference")
342
- ))
343
- fig.update_layout(
344
- title=f"Spectral Difference (Frame {frame_idx}): Near - Far",
345
- xaxis_title="Time Frames",
346
- yaxis_title="Frequency Bins",
347
- height=300
348
- )
349
- return fig
350
-
351
- # ----------------------------
352
- # Main Analysis Function
353
- # ----------------------------
354
-
355
- def analyze_audio_pair(
356
- near_file,
357
- far_file,
358
- frame_length_ms,
359
- hop_length_ms,
360
- window_type,
361
- cluster_features,
362
- clustering_algo,
363
- n_clusters,
364
- dbscan_eps
365
- ):
366
- if not near_file or not far_file:
367
- raise gr.Error("Upload both audio files.")
368
-
369
- try:
370
- y_near, sr_near = librosa.load(near_file.name, sr=None)
371
- y_far, sr_far = librosa.load(far_file.name, sr=None)
372
- except Exception as e:
373
- raise gr.Error(f"Error loading audio files: {str(e)}")
374
-
375
- if sr_near != sr_far:
376
- y_far = librosa.resample(y_far, orig_sr=sr_far, target_sr=sr_near)
377
- sr = sr_near
378
- else:
379
- sr = sr_near
380
-
381
- frames_near, frame_length = segment_audio(y_near, sr, frame_length_ms, hop_length_ms, window_type)
382
- frames_far, _ = segment_audio(y_far, sr, frame_length_ms, hop_length_ms, window_type)
383
-
384
- near_feats = extract_features_with_spectrum(frames_near, sr)
385
- far_feats = extract_features_with_spectrum(frames_far, sr)
386
-
387
- # Calculate frame-wise quality metrics
388
- comparison_df = calculate_frame_quality_metrics(near_feats, far_feats)
389
-
390
- # Clustering (on near-field)
391
- near_df = pd.DataFrame(near_feats)
392
- near_df = near_df.drop(columns=["spectrum"], errors="ignore")
393
- clustered_df = cluster_frames_custom(near_df, cluster_features, clustering_algo, n_clusters, dbscan_eps)
394
-
395
- # Plots
396
- plot_comparison = None
397
- if len(comparison_df) > 0:
398
- plot_comparison = px.line(
399
- comparison_df,
400
- x="frame_index",
401
- y="overall_quality",
402
- title="Overall Audio Quality Score Over Time (0-1 scale)",
403
- labels={"overall_quality": "Quality Score", "frame_index": "Frame Index"}
404
- )
405
- plot_comparison.update_yaxes(range=[0, 1])
406
- else:
407
- plot_comparison = px.line(title="No comparison data available")
408
-
409
- # Quality distribution plot
410
- quality_dist_plot = None
411
- if len(comparison_df) > 0:
412
- quality_dist_plot = px.histogram(
413
- comparison_df,
414
- x="overall_quality",
415
- title="Distribution of Audio Quality Scores",
416
- nbins=20,
417
- labels={"overall_quality": "Quality Score"}
418
- )
419
- quality_dist_plot.update_xaxes(range=[0, 1])
420
- else:
421
- quality_dist_plot = px.histogram(title="No quality data available")
422
-
423
- # Scatter plot
424
- plot_scatter = None
425
- if len(cluster_features) >= 2 and len(clustered_df) > 0:
426
- x_feat, y_feat = cluster_features[0], cluster_features[1]
427
- if x_feat in clustered_df.columns and y_feat in clustered_df.columns:
428
- plot_scatter = px.scatter(
429
- clustered_df,
430
- x=x_feat,
431
- y=y_feat,
432
- color="cluster",
433
- title=f"Clustering: {x_feat} vs {y_feat}",
434
- hover_data=["cluster"]
435
- )
436
- else:
437
- plot_scatter = px.scatter(title="Selected features not available in data")
438
- else:
439
- plot_scatter = px.scatter(title="Select ≥2 features for scatter plot")
440
-
441
- # Spectral difference heatmap
442
- spec_heatmap = plot_spectral_difference(near_feats, far_feats, frame_idx=0)
443
-
444
- return (
445
- plot_comparison,
446
- quality_dist_plot,
447
- comparison_df,
448
- plot_scatter,
449
- clustered_df,
450
- spec_heatmap
451
- )
452
-
453
- def export_results(comparison_df, clustered_df):
454
- temp_dir = tempfile.mkdtemp()
455
- comp_path = os.path.join(temp_dir, "frame_quality_scores.csv")
456
- cluster_path = os.path.join(temp_dir, "clustered_frames.csv")
457
- comparison_df.to_csv(comp_path, index=False)
458
- clustered_df.to_csv(cluster_path, index=False)
459
- return [comp_path, cluster_path]
460
-
461
- # ----------------------------
462
- # Gradio UI
463
- # ----------------------------
464
-
465
- dummy_features = ["rms", "spectral_centroid", "zcr", "spectral_rolloff",
466
- "spectral_bandwidth", "spectral_flatness"] + \
467
- [f"mfcc_{i}" for i in range(1,14)] + \
468
- ["low_freq_energy", "mid_freq_energy", "high_freq_energy"]
469
-
470
- with gr.Blocks(title="Audio Quality Analyzer") as demo:
471
- gr.Markdown("# 🎙️ Near vs Far Field Audio Quality Analyzer")
472
- gr.Markdown("**Quantify audio degradation per frame (0-1 scale)** - Compare near-field vs far-field recording quality")
473
-
474
- with gr.Row():
475
- near_file = gr.File(label="Near-Field Audio (.wav)", file_types=[".wav"])
476
- far_file = gr.File(label="Far-Field Audio (.wav)", file_types=[".wav"])
477
-
478
- with gr.Accordion("⚙️ Frame Settings", open=True):
479
- frame_length_ms = gr.Slider(10, 500, value=50, step=1, label="Frame Length (ms)")
480
- hop_length_ms = gr.Slider(1, 250, value=25, step=1, label="Hop Length (ms)")
481
- window_type = gr.Dropdown(["hann", "hamming", "rectangular"], value="hann", label="Window Type")
482
-
483
- with gr.Accordion("🧩 Clustering Configuration", open=False):
484
- cluster_features = gr.CheckboxGroup(
485
- choices=dummy_features,
486
- value=["rms", "spectral_centroid", "high_freq_energy"],
487
- label="Features to Use for Clustering"
488
- )
489
- clustering_algo = gr.Radio(
490
- ["KMeans", "Agglomerative", "DBSCAN"],
491
- value="KMeans",
492
- label="Clustering Algorithm"
493
- )
494
- n_clusters = gr.Slider(2, 20, value=5, step=1, label="Number of Clusters (for KMeans/Agglomerative)")
495
- dbscan_eps = gr.Slider(0.1, 2.0, value=0.5, step=0.1, label="DBSCAN eps (neighborhood radius)")
496
-
497
- btn = gr.Button("🚀 Analyze Audio Quality")
498
-
499
- with gr.Tabs():
500
- with gr.Tab("📊 Quality Analysis"):
501
- with gr.Row():
502
- comp_plot = gr.Plot(label="Quality Over Time")
503
- quality_dist_plot = gr.Plot(label="Quality Distribution")
504
- comp_table = gr.Dataframe(label="Frame-wise Quality Scores")
505
-
506
- with gr.Tab("🧩 Clustering"):
507
- cluster_plot = gr.Plot()
508
- cluster_table = gr.Dataframe()
509
-
510
- with gr.Tab("🔍 Spectral Analysis"):
511
- spec_heatmap = gr.Plot(label="Spectral Difference (Near - Far)")
512
-
513
- with gr.Tab("📤 Export"):
514
- gr.Markdown("### Download Analysis Results")
515
- export_btn = gr.Button("💾 Download CSV Files")
516
- export_files = gr.Files()
517
-
518
- btn.click(
519
- fn=analyze_audio_pair,
520
- inputs=[
521
- near_file, far_file,
522
- frame_length_ms, hop_length_ms, window_type,
523
- cluster_features,
524
- clustering_algo,
525
- n_clusters,
526
- dbscan_eps
527
- ],
528
- outputs=[comp_plot, quality_dist_plot, comp_table, cluster_plot, cluster_table, spec_heatmap]
529
- )
530
-
531
- export_btn.click(
532
- fn=export_results,
533
- inputs=[comp_table, cluster_table],
534
- outputs=export_files
535
- )
536
-
537
  if __name__ == "__main__":
538
- demo.launch()
 
1
+ # app.py
2
  import gradio as gr
3
+ import numpy as np, soundfile as sf
4
+ import librosa, scipy
5
+ from pesq import pesq
6
+ from pystoi import stoi
7
+ from sklearn.ensemble import RandomForestRegressor
8
+ import pyroomacoustics as pra
9
+
10
+ # ------------- utility fns -------------
11
+ def load_audio(path, sr=16000):
12
+ y, sr0 = sf.read(path)
13
+ if y.ndim>1:
14
+ y = np.mean(y,axis=1)
15
+ if sr0 != sr:
16
+ y = librosa.resample(y, orig_sr=sr0, target_sr=sr)
17
+ y = y - np.mean(y)
18
+ if np.max(np.abs(y))>0:
19
+ y = y / np.max(np.abs(y))
20
+ return y, sr
21
+
22
+ def frame_audio(y, sr, win_ms=25, hop_ms=10):
23
+ win = int(win_ms*sr/1000)
24
+ hop = int(hop_ms*sr/1000)
25
+ frames = librosa.util.frame(y, frame_length=win, hop_length=hop).T
26
+ return frames, win, hop
27
+
28
+ def hf_energy_db(frame, sr, low=4000):
29
+ S = np.abs(librosa.stft(frame, n_fft=1024, win_length=len(frame), center=False))
30
+ freqs = librosa.fft_frequencies(sr=sr, n_fft=1024)
31
+ mask = freqs >= low
32
+ if mask.sum()==0:
33
+ return -120.0
34
+ E = 20*np.log10(np.maximum(1e-12, np.mean(S[mask])))
35
+ return float(E)
36
+
37
+ def frame_features(near_frame, far_frame, sr):
38
+ # spectral centroid, rms, zcr, hi-freq energy, coherence estimate via cross-spectrum
39
+ feats = {}
40
+ # rms
41
+ feats['rms_near'] = float(np.mean(near_frame**2)) if near_frame is not None else 0.0
42
+ feats['rms_far'] = float(np.mean(far_frame**2))
43
+ feats['centroid_near'] = float(np.mean(librosa.feature.spectral_centroid(y=near_frame, sr=sr))) if near_frame is not None else 0.0
44
+ feats['centroid_far'] = float(np.mean(librosa.feature.spectral_centroid(y=far_frame, sr=sr)))
45
+ feats['hi_near_db'] = hf_energy_db(near_frame, sr, low=4000) if near_frame is not None else -120.0
46
+ feats['hi_far_db'] = hf_energy_db(far_frame, sr, low=4000)
47
+ # basic coherence: compute magnitude-squared coherence using scipy.signal.coherence
48
+ try:
49
+ f, Cxy = scipy.signal.coherence(near_frame, far_frame, fs=sr, nperseg=min(len(near_frame),256))
50
+ feats['coherence_mean'] = float(np.mean(Cxy))
51
+ except:
52
+ feats['coherence_mean'] = 0.0
53
+ return feats
54
+
55
+ # quick DRR proxy using energy early vs late (simple heuristic)
56
+ def estimate_drr_from_pair(near, far, sr, early_ms=50):
57
+ # align roughly and compare early energy ratio (heuristic)
58
+ early = int(early_ms*sr/1000)
59
+ if len(near) < early or len(far) < early:
60
+ return 0.0
61
+ # direct energy proxy from near vs far first early segment
62
+ en_near = np.sum(near[:early]**2)
63
+ en_far = np.sum(far[:early]**2)
64
+ # avoid div0
65
+ if en_far<=1e-12:
66
+ return 0.0
67
+ drr_db = 10*np.log10((en_near+1e-12)/(en_far+1e-12))
68
+ return float(drr_db)
69
+
70
+ def normalize_metric(val, vmin, vmax):
71
+ return float(np.clip((val - vmin)/(vmax-vmin), 0, 1))
72
+
73
+ # ------------- scoring pipeline -------------
74
+ def score_pair(near_path, far_path):
75
+ sr = 16000
76
+ far, _ = load_audio(far_path, sr=sr)
77
+ near = None
78
+ if near_path:
79
+ near, _ = load_audio(near_path, sr=sr)
80
+
81
+ # global intrusive metrics (if near exists):
82
+ pesq_score = None
83
+ stoi_score = None
84
+ sisdr = None
85
+ if near is not None:
86
+ # align lengths
87
+ L = min(len(near), len(far))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  try:
89
+ pesq_score = pesq(sr, near[:L], far[:L], 'wb')
 
 
 
 
 
 
 
 
 
90
  except:
91
+ pesq_score = None
 
 
 
 
 
92
  try:
93
+ stoi_score = stoi(near[:L], far[:L], sr, extended=False)
 
 
 
 
 
94
  except:
95
+ stoi_score = None
96
+ # sisdr quick: use pra.metrics
 
 
 
 
97
  try:
98
+ sisdr = float(pra.metrics.sdr(near[:L], far[:L])[0])
 
 
 
 
 
99
  except:
100
+ sisdr = None
101
+
102
+ # frame-level features
103
+ frames_far, win, hop = frame_audio(far, sr)
104
+ frames_near = None
105
+ if near is not None:
106
+ near_cut = near[:len(far)]
107
+ frames_near, _, _ = frame_audio(near_cut, sr,)
108
+
109
+ feats = []
110
+ for i in range(len(frames_far)):
111
+ nf = frames_near[i] if frames_near is not None and i < len(frames_near) else None
112
+ ff = frames_far[i]
113
+ feats.append(frame_features(nf, ff, sr))
114
+
115
+ # aggregate metrics: example normalization ranges (you should tune)
116
+ # PESQ ~ [1..4.5], STOI [0..1], DRR [-20..20 dB], coherence [0..1], hi-loss in dB [-40..10]
117
+ q_pesq = normalize_metric(pesq_score if pesq_score is not None else 2.5, 1.0, 4.5)
118
+ q_stoi = normalize_metric(stoi_score if stoi_score is not None else 0.5, 0.0, 1.0)
119
+ # DRR: estimate using early energy proxy between near&far across whole file
120
+ q_drr = 0.5
121
+ if near is not None:
122
+ drr = estimate_drr_from_pair(near, far, sr)
123
+ q_drr = normalize_metric(drr, -20, 20)
124
+ # hi-freq loss average
125
+ hi_loss = np.mean([f['hi_near_db'] - f['hi_far_db'] if 'hi_near_db' in f else 0.0 for f in feats])
126
+ q_hf = normalize_metric(-hi_loss, -40, 0) # smaller loss -> higher score
127
+
128
+ # coherence average
129
+ q_coh = np.mean([f['coherence_mean'] for f in feats])
130
+
131
+ # example weighted aggregate (intrusive case)
132
+ if near is not None:
133
  weights = {
134
+ 'pesq':0.30, 'stoi':0.20, 'drr':0.20, 'hf':0.10, 'coh':0.20
 
 
 
 
135
  }
136
+ score = (weights['pesq']*q_pesq + weights['stoi']*q_stoi + weights['drr']*q_drr + weights['hf']*q_hf + weights['coh']*q_coh) / sum(weights.values())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  else:
138
+ # non-intrusive fallback: combine hf, coherence, centroid shift heuristics
139
+ avg_centroid_far = np.mean([f['centroid_far'] for f in feats])
140
+ q_centroid = normalize_metric(avg_centroid_far, 500, 3500)
141
+ score = (0.4*q_coh + 0.4*q_hf + 0.2*q_centroid)
142
+
143
+ percent = float(score*100)
144
+ # frames needing fix
145
+ frame_scores = []
146
+ for f in feats:
147
+ # example per-frame heuristic: combine coherence & hf loss
148
+ s = 0.6*f['coherence_mean'] + 0.4*normalize_metric( -(f['hi_near_db'] - f['hi_far_db']), -40,0 )
149
+ frame_scores.append(float(s))
150
+ problem_frames = [i for i,v in enumerate(frame_scores) if v < 0.5]
151
+
152
+ return {
153
+ "score_percent": percent,
154
+ "pesq": pesq_score,
155
+ "stoi": stoi_score,
156
+ "drr_db": drr if 'drr' in locals() else None,
157
+ "avg_coherence": q_coh,
158
+ "hi_loss_db": hi_loss,
159
+ "problem_frames": problem_frames
160
+ }
161
+
162
+ # ------------- Gradio UI -------------
163
+ def analyze(near, far):
164
+ res = score_pair(near.name if near else None, far.name)
165
+ html = f"<h3>Far-field quality: {res['score_percent']:.1f}%</h3>"
166
+ html += "<ul>"
167
+ html += f"<li>PESQ: {res['pesq']}</li>"
168
+ html += f"<li>STOI: {res['stoi']}</li>"
169
+ html += f"<li>DRR (proxy, dB): {res['drr_db']}</li>"
170
+ html += f"<li>Avg coherence: {res['avg_coherence']:.3f}</li>"
171
+ html += f"<li>Avg high-freq loss (dB): {res['hi_loss_db']:.2f}</li>"
172
+ html += f"<li>Problem frames (indices): {res['problem_frames']}</li>"
173
+ html += "</ul>"
174
+ return html
175
+
176
+ iface = gr.Interface(fn=analyze, inputs=[gr.File(label="Near (optional)"), gr.File(label="Far")], outputs=gr.HTML, title="Far-field degradation score")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  if __name__ == "__main__":
178
+ iface.launch()