AdityaK007 commited on
Commit
d26a738
Β·
verified Β·
1 Parent(s): 75c973d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +307 -169
app.py CHANGED
@@ -1,178 +1,316 @@
1
- # app.py
2
  import gradio as gr
3
- import numpy as np, soundfile as sf
4
- import librosa, scipy
5
- from pesq import pesq
6
- from pystoi import stoi
7
- from sklearn.ensemble import RandomForestRegressor
8
- import pyroomacoustics as pra
9
-
10
- # ------------- utility fns -------------
11
- def load_audio(path, sr=16000):
12
- y, sr0 = sf.read(path)
13
- if y.ndim>1:
14
- y = np.mean(y,axis=1)
15
- if sr0 != sr:
16
- y = librosa.resample(y, orig_sr=sr0, target_sr=sr)
17
- y = y - np.mean(y)
18
- if np.max(np.abs(y))>0:
19
- y = y / np.max(np.abs(y))
20
- return y, sr
21
-
22
- def frame_audio(y, sr, win_ms=25, hop_ms=10):
23
- win = int(win_ms*sr/1000)
24
- hop = int(hop_ms*sr/1000)
25
- frames = librosa.util.frame(y, frame_length=win, hop_length=hop).T
26
- return frames, win, hop
27
-
28
- def hf_energy_db(frame, sr, low=4000):
29
- S = np.abs(librosa.stft(frame, n_fft=1024, win_length=len(frame), center=False))
30
- freqs = librosa.fft_frequencies(sr=sr, n_fft=1024)
31
- mask = freqs >= low
32
- if mask.sum()==0:
33
- return -120.0
34
- E = 20*np.log10(np.maximum(1e-12, np.mean(S[mask])))
35
- return float(E)
36
-
37
- def frame_features(near_frame, far_frame, sr):
38
- # spectral centroid, rms, zcr, hi-freq energy, coherence estimate via cross-spectrum
39
- feats = {}
40
- # rms
41
- feats['rms_near'] = float(np.mean(near_frame**2)) if near_frame is not None else 0.0
42
- feats['rms_far'] = float(np.mean(far_frame**2))
43
- feats['centroid_near'] = float(np.mean(librosa.feature.spectral_centroid(y=near_frame, sr=sr))) if near_frame is not None else 0.0
44
- feats['centroid_far'] = float(np.mean(librosa.feature.spectral_centroid(y=far_frame, sr=sr)))
45
- feats['hi_near_db'] = hf_energy_db(near_frame, sr, low=4000) if near_frame is not None else -120.0
46
- feats['hi_far_db'] = hf_energy_db(far_frame, sr, low=4000)
47
- # basic coherence: compute magnitude-squared coherence using scipy.signal.coherence
48
- try:
49
- f, Cxy = scipy.signal.coherence(near_frame, far_frame, fs=sr, nperseg=min(len(near_frame),256))
50
- feats['coherence_mean'] = float(np.mean(Cxy))
51
- except:
52
- feats['coherence_mean'] = 0.0
53
- return feats
54
-
55
- # quick DRR proxy using energy early vs late (simple heuristic)
56
- def estimate_drr_from_pair(near, far, sr, early_ms=50):
57
- # align roughly and compare early energy ratio (heuristic)
58
- early = int(early_ms*sr/1000)
59
- if len(near) < early or len(far) < early:
60
- return 0.0
61
- # direct energy proxy from near vs far first early segment
62
- en_near = np.sum(near[:early]**2)
63
- en_far = np.sum(far[:early]**2)
64
- # avoid div0
65
- if en_far<=1e-12:
66
- return 0.0
67
- drr_db = 10*np.log10((en_near+1e-12)/(en_far+1e-12))
68
- return float(drr_db)
69
-
70
- def normalize_metric(val, vmin, vmax):
71
- return float(np.clip((val - vmin)/(vmax-vmin), 0, 1))
72
-
73
- # ------------- scoring pipeline -------------
74
- def score_pair(near_path, far_path):
75
- sr = 16000
76
- far, _ = load_audio(far_path, sr=sr)
77
- near = None
78
- if near_path:
79
- near, _ = load_audio(near_path, sr=sr)
80
-
81
- # global intrusive metrics (if near exists):
82
- pesq_score = None
83
- stoi_score = None
84
- sisdr = None
85
- if near is not None:
86
- # align lengths
87
- L = min(len(near), len(far))
88
  try:
89
- pesq_score = pesq(sr, near[:L], far[:L], 'wb')
90
- except:
91
- pesq_score = None
92
  try:
93
- stoi_score = stoi(near[:L], far[:L], sr, extended=False)
 
 
 
 
 
 
 
 
94
  except:
95
- stoi_score = None
96
- # sisdr quick: use pra.metrics
97
  try:
98
- sisdr = float(pra.metrics.sdr(near[:L], far[:L])[0])
 
 
 
 
 
 
 
 
 
99
  except:
100
- sisdr = None
101
-
102
- # frame-level features
103
- frames_far, win, hop = frame_audio(far, sr)
104
- frames_near = None
105
- if near is not None:
106
- near_cut = near[:len(far)]
107
- frames_near, _, _ = frame_audio(near_cut, sr,)
108
-
109
- feats = []
110
- for i in range(len(frames_far)):
111
- nf = frames_near[i] if frames_near is not None and i < len(frames_near) else None
112
- ff = frames_far[i]
113
- feats.append(frame_features(nf, ff, sr))
114
-
115
- # aggregate metrics: example normalization ranges (you should tune)
116
- # PESQ ~ [1..4.5], STOI [0..1], DRR [-20..20 dB], coherence [0..1], hi-loss in dB [-40..10]
117
- q_pesq = normalize_metric(pesq_score if pesq_score is not None else 2.5, 1.0, 4.5)
118
- q_stoi = normalize_metric(stoi_score if stoi_score is not None else 0.5, 0.0, 1.0)
119
- # DRR: estimate using early energy proxy between near&far across whole file
120
- q_drr = 0.5
121
- if near is not None:
122
- drr = estimate_drr_from_pair(near, far, sr)
123
- q_drr = normalize_metric(drr, -20, 20)
124
- # hi-freq loss average
125
- hi_loss = np.mean([f['hi_near_db'] - f['hi_far_db'] if 'hi_near_db' in f else 0.0 for f in feats])
126
- q_hf = normalize_metric(-hi_loss, -40, 0) # smaller loss -> higher score
127
-
128
- # coherence average
129
- q_coh = np.mean([f['coherence_mean'] for f in feats])
130
-
131
- # example weighted aggregate (intrusive case)
132
- if near is not None:
133
- weights = {
134
- 'pesq':0.30, 'stoi':0.20, 'drr':0.20, 'hf':0.10, 'coh':0.20
135
- }
136
- score = (weights['pesq']*q_pesq + weights['stoi']*q_stoi + weights['drr']*q_drr + weights['hf']*q_hf + weights['coh']*q_coh) / sum(weights.values())
137
- else:
138
- # non-intrusive fallback: combine hf, coherence, centroid shift heuristics
139
- avg_centroid_far = np.mean([f['centroid_far'] for f in feats])
140
- q_centroid = normalize_metric(avg_centroid_far, 500, 3500)
141
- score = (0.4*q_coh + 0.4*q_hf + 0.2*q_centroid)
142
-
143
- percent = float(score*100)
144
- # frames needing fix
145
- frame_scores = []
146
- for f in feats:
147
- # example per-frame heuristic: combine coherence & hf loss
148
- s = 0.6*f['coherence_mean'] + 0.4*normalize_metric( -(f['hi_near_db'] - f['hi_far_db']), -40,0 )
149
- frame_scores.append(float(s))
150
- problem_frames = [i for i,v in enumerate(frame_scores) if v < 0.5]
151
-
152
- return {
153
- "score_percent": percent,
154
- "pesq": pesq_score,
155
- "stoi": stoi_score,
156
- "drr_db": drr if 'drr' in locals() else None,
157
- "avg_coherence": q_coh,
158
- "hi_loss_db": hi_loss,
159
- "problem_frames": problem_frames
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
 
162
- # ------------- Gradio UI -------------
163
- def analyze(near, far):
164
- res = score_pair(near.name if near else None, far.name)
165
- html = f"<h3>Far-field quality: {res['score_percent']:.1f}%</h3>"
166
- html += "<ul>"
167
- html += f"<li>PESQ: {res['pesq']}</li>"
168
- html += f"<li>STOI: {res['stoi']}</li>"
169
- html += f"<li>DRR (proxy, dB): {res['drr_db']}</li>"
170
- html += f"<li>Avg coherence: {res['avg_coherence']:.3f}</li>"
171
- html += f"<li>Avg high-freq loss (dB): {res['hi_loss_db']:.2f}</li>"
172
- html += f"<li>Problem frames (indices): {res['problem_frames']}</li>"
173
- html += "</ul>"
174
- return html
175
-
176
- iface = gr.Interface(fn=analyze, inputs=[gr.File(label="Near (optional)"), gr.File(label="Far")], outputs=gr.HTML, title="Far-field degradation score")
177
  if __name__ == "__main__":
178
- iface.launch()
 
 
1
  import gradio as gr
2
+ import librosa
3
+ import numpy as np
4
+ import pandas as pd
5
+ from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
6
+ from sklearn.metrics.pairwise import cosine_similarity
7
+ from scipy.spatial.distance import jensenshannon
8
+ from scipy.stats import pearsonr
9
+ from scipy.signal import get_window as scipy_get_window
10
+ import plotly.express as px
11
+ import plotly.graph_objects as go
12
+ import os
13
+ import tempfile
14
+
15
+ # ----------------------------
16
+ # Segment Audio into Frames
17
+ # ----------------------------
18
+ def segment_audio(y, sr, frame_length_ms, hop_length_ms, window_type="hann"):
19
+ frame_length = int(frame_length_ms * sr / 1000)
20
+ hop_length = int(hop_length_ms * sr / 1000)
21
+ window = scipy_get_window(window_type if window_type != "rectangular" else "boxcar", frame_length)
22
+ frames = []
23
+ for i in range(0, len(y) - frame_length + 1, hop_length):
24
+ frame = y[i:i + frame_length] * window
25
+ frames.append(frame)
26
+ if frames:
27
+ frames = np.array(frames).T
28
+ else:
29
+ frames = np.zeros((frame_length, 1))
30
+ return frames, frame_length
31
+
32
+ # ----------------------------
33
+ # Feature Extraction
34
+ # ----------------------------
35
+ def extract_features_with_spectrum(frames, sr):
36
+ features = []
37
+ n_mfcc = 13
38
+ n_fft = min(2048, frames.shape[0])
39
+ for i in range(frames.shape[1]):
40
+ frame = frames[:, i]
41
+ if len(frame) < n_fft or np.max(np.abs(frame)) < 1e-10:
42
+ continue
43
+ feat = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  try:
45
+ feat["rms"] = float(np.mean(librosa.feature.rms(y=frame)[0]))
46
+ except: feat["rms"] = 0.0
 
47
  try:
48
+ feat["spectral_centroid"] = float(np.mean(librosa.feature.spectral_centroid(y=frame, sr=sr)[0]))
49
+ except: feat["spectral_centroid"] = 0.0
50
+ try:
51
+ feat["zcr"] = float(np.mean(librosa.feature.zero_crossing_rate(frame)[0]))
52
+ except: feat["zcr"] = 0.0
53
+ try:
54
+ mfccs = librosa.feature.mfcc(y=frame, sr=sr, n_mfcc=n_mfcc, n_fft=n_fft)
55
+ for j in range(n_mfcc):
56
+ feat[f"mfcc_{j+1}"] = float(np.mean(mfccs[j]))
57
  except:
58
+ for j in range(n_mfcc):
59
+ feat[f"mfcc_{j+1}"] = 0.0
60
  try:
61
+ S = np.abs(librosa.stft(frame, n_fft=n_fft))
62
+ S_db = librosa.amplitude_to_db(S, ref=np.max)
63
+ freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
64
+ low_mask = freqs <= 2000
65
+ mid_mask = (freqs > 2000) & (freqs <= 4000)
66
+ high_mask = freqs > 4000
67
+ feat["low_freq_energy"] = float(np.mean(S_db[low_mask])) if np.any(low_mask) else 0.0
68
+ feat["mid_freq_energy"] = float(np.mean(S_db[mid_mask])) if np.any(mid_mask) else 0.0
69
+ feat["high_freq_energy"] = float(np.mean(S_db[high_mask])) if np.any(high_mask) else 0.0
70
+ feat["spectrum"] = S_db
71
  except:
72
+ feat["low_freq_energy"] = feat["mid_freq_energy"] = feat["high_freq_energy"] = 0.0
73
+ feat["spectrum"] = np.zeros((n_fft // 2 + 1, 1))
74
+ features.append(feat)
75
+ if not features:
76
+ feat = { "rms": 0.0, "spectral_centroid": 0.0, "zcr": 0.0,
77
+ "low_freq_energy": 0.0, "mid_freq_energy": 0.0, "high_freq_energy": 0.0,
78
+ "spectrum": np.zeros((n_fft // 2 + 1, 1)) }
79
+ for j in range(n_mfcc): feat[f"mfcc_{j+1}"] = 0.0
80
+ features.append(feat)
81
+ return features
82
+
83
+ # ----------------------------
84
+ # Frame Comparison (core metrics)
85
+ # ----------------------------
86
+ def compare_frames_enhanced(near_feats, far_feats, metrics):
87
+ min_len = min(len(near_feats), len(far_feats))
88
+ if min_len == 0:
89
+ return pd.DataFrame({"frame_index": []})
90
+
91
+ results = {"frame_index": list(range(min_len))}
92
+ near_df = pd.DataFrame([f for f in near_feats[:min_len]])
93
+ far_df = pd.DataFrame([f for f in far_feats[:min_len]])
94
+ near_vec = near_df.drop(columns=["spectrum"], errors="ignore").values
95
+ far_vec = far_df.drop(columns=["spectrum"], errors="ignore").values
96
+
97
+ # Euclidean Distance
98
+ if "Euclidean Distance" in metrics:
99
+ results["euclidean_dist"] = np.linalg.norm(near_vec - far_vec, axis=1).tolist()
100
+ # Cosine Similarity
101
+ if "Cosine Similarity" in metrics:
102
+ cos_vals = []
103
+ for i in range(min_len):
104
+ a, b = near_vec[i].reshape(1, -1), far_vec[i].reshape(1, -1)
105
+ if np.all(a == 0) or np.all(b == 0):
106
+ cos_vals.append(0.0)
107
+ else:
108
+ cos_vals.append(float(cosine_similarity(a, b)[0][0]))
109
+ results["cosine_similarity"] = cos_vals
110
+ # High-Freq Loss Ratio (Quality)
111
+ if "High-Freq Loss Ratio" in metrics:
112
+ loss_ratios = []
113
+ for i in range(min_len):
114
+ near_high = near_feats[i]["high_freq_energy"]
115
+ far_high = far_feats[i]["high_freq_energy"]
116
+ ratio = max(0.0, 1.0 - abs(near_high - far_high) / (abs(near_high) + 1e-6))
117
+ loss_ratios.append(float(ratio))
118
+ results["high_freq_quality"] = loss_ratios
119
+
120
+ # πŸ”Ή Energy Ratio
121
+ energy_ratio = []
122
+ for i in range(min_len):
123
+ near_rms = near_feats[i]["rms"]; far_rms = far_feats[i]["rms"]
124
+ ratio = (far_rms + 1e-6) / (near_rms + 1e-6)
125
+ energy_ratio.append(float(np.clip(ratio, 0, 1)))
126
+ results["energy_ratio"] = energy_ratio
127
+
128
+ # πŸ”Ή Clarity Ratio
129
+ clarity_ratio = []
130
+ for i in range(min_len):
131
+ near_low, near_high = near_feats[i]["low_freq_energy"], near_feats[i]["high_freq_energy"]
132
+ far_low, far_high = far_feats[i]["low_freq_energy"], far_feats[i]["high_freq_energy"]
133
+ near_ratio, far_ratio = (near_low - near_high), (far_low - far_high)
134
+ diff = 1 - abs(far_ratio - near_ratio) / (abs(near_ratio) + 1e-6)
135
+ clarity_ratio.append(np.clip(diff, 0, 1))
136
+ results["clarity_ratio"] = clarity_ratio
137
+
138
+ # πŸ”Ή Spectral Overlap
139
+ overlap_scores = []
140
+ for i in range(min_len):
141
+ near_spec = near_feats[i]["spectrum"].flatten()
142
+ far_spec = far_feats[i]["spectrum"].flatten()
143
+ if np.all(near_spec == 0) or np.all(far_spec == 0):
144
+ overlap_scores.append(0.0)
145
+ else:
146
+ overlap = float(cosine_similarity(near_spec.reshape(1, -1), far_spec.reshape(1, -1))[0][0])
147
+ overlap_scores.append(overlap)
148
+ results["spectral_overlap"] = overlap_scores
149
+
150
+ # πŸ”Ή Combined Weighted Quality
151
+ weights = {
152
+ "cosine_similarity": 0.3,
153
+ "high_freq_quality": 0.25,
154
+ "energy_ratio": 0.2,
155
+ "clarity_ratio": 0.15,
156
+ "spectral_overlap": 0.1
157
  }
158
+ combined_quality = []
159
+ for i in range(min_len):
160
+ val = sum(results[k][i] * w for k, w in weights.items() if k in results)
161
+ combined_quality.append(float(val / sum(weights.values())))
162
+ results["combined_quality"] = combined_quality
163
+ return pd.DataFrame(results)
164
+
165
+ # ----------------------------
166
+ # Clustering + Overlay
167
+ # ----------------------------
168
+ def cluster_frames_custom(features_df, cluster_features, algo, n_clusters=5, eps=0.5):
169
+ if not cluster_features:
170
+ raise gr.Error("Please select at least one feature for clustering.")
171
+ if len(features_df) == 0:
172
+ features_df["cluster"] = []
173
+ return features_df
174
+ X = features_df[cluster_features].values
175
+ if algo == "KMeans":
176
+ n_clusters = min(n_clusters, len(X))
177
+ model = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
178
+ labels = model.fit_predict(X)
179
+ elif algo == "Agglomerative":
180
+ n_clusters = min(n_clusters, len(X))
181
+ model = AgglomerativeClustering(n_clusters=n_clusters)
182
+ labels = model.fit_predict(X)
183
+ elif algo == "DBSCAN":
184
+ model = DBSCAN(eps=eps, min_samples=min(3, len(X)))
185
+ labels = model.fit_predict(X)
186
+ else:
187
+ raise ValueError("Unknown clustering algorithm")
188
+ features_df = features_df.copy()
189
+ features_df["cluster"] = labels
190
+ return features_df
191
+
192
+ def plot_spectral_difference(near_feats, far_feats, frame_idx=0):
193
+ if not near_feats or not far_feats or frame_idx >= len(near_feats) or frame_idx >= len(far_feats):
194
+ fig = go.Figure(); fig.update_layout(title="No data available"); return fig
195
+ near_spec = near_feats[frame_idx]["spectrum"]; far_spec = far_feats[frame_idx]["spectrum"]
196
+ min_freq_bins = min(near_spec.shape[0], far_spec.shape[0])
197
+ diff = near_spec[:min_freq_bins] - far_spec[:min_freq_bins]
198
+ fig = go.Figure(data=go.Heatmap(z=diff, colorscale='RdBu', zmid=0))
199
+ fig.update_layout(title=f"Spectral Difference (Frame {frame_idx})", height=300)
200
+ return fig
201
+
202
+ def plot_cluster_overlay(df, cluster_metric, overlay_metric):
203
+ if cluster_metric not in df.columns or overlay_metric not in df.columns:
204
+ fig = go.Figure(); fig.update_layout(title="Metrics not found"); return fig
205
+ fig = px.scatter(df, x=cluster_metric, y=overlay_metric, color=overlay_metric,
206
+ color_continuous_scale='Viridis',
207
+ title=f"Cluster Overlay: {cluster_metric} vs {overlay_metric}")
208
+ fig.update_layout(height=400)
209
+ return fig
210
+
211
+ # ----------------------------
212
+ # Main Analysis Function
213
+ # ----------------------------
214
+ def analyze_audio_pair(
215
+ near_file, far_file,
216
+ frame_length_ms, hop_length_ms, window_type,
217
+ comparison_metrics, cluster_features, clustering_algo, n_clusters, dbscan_eps
218
+ ):
219
+ if not near_file or not far_file:
220
+ raise gr.Error("Upload both audio files.")
221
+ try:
222
+ y_near, sr_near = librosa.load(near_file.name, sr=None)
223
+ y_far, sr_far = librosa.load(far_file.name, sr=None)
224
+ except Exception as e:
225
+ raise gr.Error(f"Error loading audio: {str(e)}")
226
+ if sr_near != sr_far:
227
+ y_far = librosa.resample(y_far, orig_sr=sr_far, target_sr=sr_near)
228
+ sr = sr_near
229
+ else:
230
+ sr = sr_near
231
+ frames_near, _ = segment_audio(y_near, sr, frame_length_ms, hop_length_ms, window_type)
232
+ frames_far, _ = segment_audio(y_far, sr, frame_length_ms, hop_length_ms, window_type)
233
+ near_feats = extract_features_with_spectrum(frames_near, sr)
234
+ far_feats = extract_features_with_spectrum(frames_far, sr)
235
+ comparison_df = compare_frames_enhanced(near_feats, far_feats, comparison_metrics)
236
+ near_df = pd.DataFrame(near_feats).drop(columns=["spectrum"], errors="ignore")
237
+ clustered_df = cluster_frames_custom(near_df, cluster_features, clustering_algo, n_clusters, dbscan_eps)
238
+ # Plots
239
+ metric_cols = [col for col in comparison_df.columns if col != "frame_index"]
240
+ plot_comparison = px.line(comparison_df, x="frame_index", y=metric_cols[0],
241
+ title=f"{metric_cols[0].replace('_',' ').title()} Over Time") if metric_cols else px.line()
242
+ if len(cluster_features) >= 2 and len(clustered_df) > 0:
243
+ x_feat, y_feat = cluster_features[0], cluster_features[1]
244
+ plot_scatter = px.scatter(clustered_df, x=x_feat, y=y_feat, color="cluster",
245
+ title=f"Clustering: {x_feat} vs {y_feat}")
246
+ else:
247
+ plot_scatter = px.scatter(title="Select β‰₯2 features for clustering")
248
+ spec_heatmap = plot_spectral_difference(near_feats, far_feats, frame_idx=0)
249
+ overlay_fig = plot_cluster_overlay(clustered_df, cluster_features[0], "combined_quality")
250
+ return plot_comparison, comparison_df, plot_scatter, clustered_df, spec_heatmap, overlay_fig
251
+
252
+ def export_results(comparison_df, clustered_df):
253
+ temp_dir = tempfile.mkdtemp()
254
+ comp_path = os.path.join(temp_dir, "frame_comparisons.csv")
255
+ cluster_path = os.path.join(temp_dir, "clustered_frames.csv")
256
+ comparison_df.to_csv(comp_path, index=False)
257
+ clustered_df.to_csv(cluster_path, index=False)
258
+ return [comp_path, cluster_path]
259
+
260
+ # ----------------------------
261
+ # Gradio UI
262
+ # ----------------------------
263
+ dummy_features = ["rms", "spectral_centroid", "zcr"] + [f"mfcc_{i}" for i in range(1,14)] + \
264
+ ["low_freq_energy", "mid_freq_energy", "high_freq_energy"]
265
+
266
+ with gr.Blocks(title="Advanced Near vs Far Field Analyzer") as demo:
267
+ gr.Markdown("# πŸŽ™οΈ Advanced Near vs Far Field Speech Analyzer")
268
+ with gr.Row():
269
+ near_file = gr.File(label="Near-Field Audio (.wav)", file_types=[".wav"])
270
+ far_file = gr.File(label="Far-Field Audio (.wav)")
271
+
272
+ with gr.Accordion("βš™οΈ Frame Settings", open=True):
273
+ frame_length_ms = gr.Slider(10, 500, value=50, step=1, label="Frame Length (ms)")
274
+ hop_length_ms = gr.Slider(1, 250, value=25, step=1, label="Hop Length (ms)")
275
+ window_type = gr.Dropdown(["hann", "hamming", "rectangular"], value="hann", label="Window Type")
276
+
277
+ with gr.Accordion("πŸ“Š Comparison Metrics", open=True):
278
+ comparison_metrics = gr.CheckboxGroup(
279
+ choices=[
280
+ "Euclidean Distance", "Cosine Similarity", "High-Freq Loss Ratio"
281
+ ],
282
+ value=["Cosine Similarity", "High-Freq Loss Ratio"],
283
+ label="Select Metrics"
284
+ )
285
+
286
+ with gr.Accordion("🧩 Clustering Configuration", open=True):
287
+ cluster_features = gr.CheckboxGroup(
288
+ choices=dummy_features, value=["rms", "spectral_centroid", "high_freq_energy"],
289
+ label="Features for Clustering")
290
+ clustering_algo = gr.Radio(["KMeans", "Agglomerative", "DBSCAN"], value="KMeans", label="Clustering Algorithm")
291
+ n_clusters = gr.Slider(2, 20, value=5, step=1, label="Clusters (for KMeans/Agglomerative)")
292
+ dbscan_eps = gr.Slider(0.1, 2.0, value=0.5, step=0.1, label="DBSCAN eps")
293
+
294
+ btn = gr.Button("πŸš€ Analyze")
295
+
296
+ with gr.Tabs():
297
+ with gr.Tab("πŸ“ˆ Frame Comparison"):
298
+ comp_plot = gr.Plot(); comp_table = gr.Dataframe()
299
+ with gr.Tab("🧩 Clustering"):
300
+ cluster_plot = gr.Plot(); cluster_table = gr.Dataframe()
301
+ with gr.Tab("πŸ” Spectral Analysis"):
302
+ spec_heatmap = gr.Plot(label="Spectral Difference (Near - Far)")
303
+ with gr.Tab("🧭 Metric Overlay"):
304
+ overlay_plot = gr.Plot(label="Metric Overlay")
305
+
306
+ with gr.Tab("πŸ“€ Export"):
307
+ export_btn = gr.Button("πŸ’Ύ Download CSVs"); export_files = gr.Files()
308
+
309
+ btn.click(fn=analyze_audio_pair,
310
+ inputs=[near_file, far_file, frame_length_ms, hop_length_ms, window_type,
311
+ comparison_metrics, cluster_features, clustering_algo, n_clusters, dbscan_eps],
312
+ outputs=[comp_plot, comp_table, cluster_plot, cluster_table, spec_heatmap, overlay_plot])
313
+ export_btn.click(fn=export_results, inputs=[comp_table, cluster_table], outputs=export_files)
314
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
315
  if __name__ == "__main__":
316
+ demo.launch()